├── .editorconfig ├── .github └── workflows │ └── run_test_case.yaml ├── .gitignore ├── LICENSE ├── Makefile ├── README.md ├── doc ├── logo.jpg ├── replicant-fsm.png ├── replication-msc.png ├── rlog_internals.md └── src │ ├── replicant-fsm.uml │ └── replication-msc.uml ├── include └── mria.hrl ├── rebar.config ├── rebar.config.script ├── scripts └── build-old-rel ├── src ├── mria.app.src ├── mria.appup.src ├── mria.erl ├── mria_app.erl ├── mria_autoclean.erl ├── mria_autoheal.erl ├── mria_bootstrapper.erl ├── mria_config.erl ├── mria_core_shard_sup.erl ├── mria_guid.erl ├── mria_lb.erl ├── mria_lib.erl ├── mria_membership.erl ├── mria_membership_sup.erl ├── mria_mnesia.erl ├── mria_mnesia_null_storage.erl ├── mria_node.erl ├── mria_node_monitor.erl ├── mria_rebalance.erl ├── mria_replica_importer_worker.erl ├── mria_replicant_shard_sup.erl ├── mria_rlog.erl ├── mria_rlog.hrl ├── mria_rlog_agent.erl ├── mria_rlog_replica.erl ├── mria_rlog_server.erl ├── mria_rlog_sup.erl ├── mria_schema.erl ├── mria_shards_sup.erl ├── mria_status.erl ├── mria_sup.erl └── mria_upstream.erl └── test ├── concuerror_tests.erl ├── mria_SUITE.erl ├── mria_autoclean_SUITE.erl ├── mria_autoheal_SUITE.erl ├── mria_compatibility_suite.erl ├── mria_ct.erl ├── mria_fault_tolerance_suite.erl ├── mria_helper_tab.erl ├── mria_lb_SUITE.erl ├── mria_membership_SUITE.erl ├── mria_mnesia_SUITE.erl ├── mria_mnesia_SUITE_data └── cluster_benchmark │ ├── benchmark.sh │ ├── latency_graph.gp │ ├── nemesis.sh │ └── slowdown.sh ├── mria_mnesia_test_util.erl ├── mria_node_monitor_SUITE.erl ├── mria_proper_mixed_cluster_suite.erl ├── mria_proper_suite.erl ├── mria_proper_utils.erl ├── mria_proper_utils.hrl ├── mria_rlog_props.erl ├── mria_rlog_tests.erl └── mria_transaction_gen.erl /.editorconfig: -------------------------------------------------------------------------------- 1 | root = true 2 | 3 | [*] 4 | charset = utf-8 5 | end_of_line = lf 6 | trim_trailing_whitespace = true 7 | insert_final_newline = true 8 | 9 | [*.{erl, src, hrl}] 10 | indent_style = space 11 | indent_size = 4 12 | 13 | [Makefile] 14 | indent_style = tab 15 | -------------------------------------------------------------------------------- /.github/workflows/run_test_case.yaml: -------------------------------------------------------------------------------- 1 | name: Run test case 2 | 3 | on: [push, pull_request] 4 | 5 | jobs: 6 | 7 | run_test_case: 8 | runs-on: ubuntu-latest 9 | 10 | container: ghcr.io/emqx/emqx-builder/5.3-5:1.15.7-26.2.1-2-ubuntu24.04 11 | 12 | steps: 13 | - uses: actions/checkout@9bb56186c3b09b4f86b1c65136769dd318469633 # v4.1.2 14 | 15 | - name: Install prerequisites 16 | run: | 17 | apt update 18 | apt install -y cmake 19 | 20 | - name: Configure git 21 | run: | 22 | git config --global --add safe.directory "*" 23 | 24 | - name: Compile 25 | run: | 26 | make 27 | 28 | - name: Concuerror tests 29 | run : | 30 | make concuerror_test 31 | 32 | - name: Smoke test 33 | run: | 34 | make smoke-test 35 | 36 | - name: Fault-tolerance tests 37 | run: | 38 | make ct-fault-tolerance 39 | 40 | - name: Consistency tests 41 | run: | 42 | make ct-consistency 43 | 44 | - name: Coveralls 45 | env: 46 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 47 | run: | 48 | make coveralls 49 | 50 | - uses: actions/upload-artifact@5d5d22a31266ced268874388b861e4b58bb5c2f3 # v4.3.1 51 | if: always() 52 | with: 53 | name: logs 54 | path: _build/test/logs 55 | 56 | - uses: actions/upload-artifact@5d5d22a31266ced268874388b861e4b58bb5c2f3 # v4.3.1 57 | with: 58 | name: cover 59 | path: _build/test/cover 60 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .eunit 2 | deps 3 | *.o 4 | *.beam 5 | *.plt 6 | *.swp 7 | erl_crash.dump 8 | ebin/* 9 | rel/example_project 10 | .concrete/DEV_MODE 11 | .rebar 12 | .erlang.mk/ 13 | !data/app.etcd.config 14 | ekka.d 15 | Mnesia* 16 | logs/ 17 | .DS_Store 18 | cover/ 19 | ct.coverdata 20 | ebin/test 21 | eunit.coverdata 22 | test/ct.cover.spec 23 | log/ 24 | .erlang.mk/ 25 | erlang.mk 26 | ekka.d 27 | _build/ 28 | .rebar3/ 29 | rebar.lock 30 | TAGS 31 | _checkouts/ 32 | TEST-*.xml 33 | concuerror_report.txt 34 | snabbkaffe/ 35 | rebar3.crashdump 36 | .#* 37 | *~ 38 | *# 39 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | BUILD_DIR := $(CURDIR)/_build 2 | 3 | REBAR := rebar3 4 | 5 | CT_READABLE ?= false 6 | 7 | compile: 8 | $(REBAR) do compile, dialyzer, xref 9 | 10 | .PHONY: all 11 | all: compile test 12 | 13 | .PHONY: clean 14 | clean: distclean 15 | 16 | .PHONY: distclean 17 | distclean: 18 | @rm -rf _build erl_crash.dump rebar3.crashdump rebar.lock 19 | 20 | .PHONY: xref 21 | xref: 22 | $(REBAR) xref 23 | 24 | .PHONY: eunit 25 | eunit: compile 26 | $(REBAR) eunit verbose=true 27 | 28 | .PHONY: test 29 | test: smoke-test ct-consistency ct-fault-tolerance cover 30 | 31 | .PHONY: smoke-test 32 | smoke-test: 33 | $(REBAR) do eunit, ct -v --cover --readable=$(CT_READABLE) 34 | $(REBAR) ct --readable=$(CT_READABLE) -v --suite mria_compatibility_suite 35 | 36 | .PHONY: ct-consistency 37 | ct-consistency: 38 | $(REBAR) ct --cover -v --readable=$(CT_READABLE) --suite mria_proper_suite,mria_proper_mixed_cluster_suite 39 | 40 | .PHONY: ct-fault-tolerance 41 | ct-fault-tolerance: 42 | $(REBAR) ct --cover -v --readable=$(CT_READABLE) --suite mria_fault_tolerance_suite 43 | 44 | .PHONY: ct-suite 45 | ct-suite: compile 46 | ifneq ($(TESTCASE),) 47 | $(REBAR) ct -v --readable=$(CT_READABLE) --suite $(SUITE) --case $(TESTCASE) 48 | else 49 | $(REBAR) ct -v --readable=$(CT_READABLE) --suite $(SUITE) 50 | endif 51 | 52 | cover: | smoke-test ct-consistency ct-fault-tolerance 53 | $(REBAR) cover 54 | 55 | .PHONY: coveralls 56 | coveralls: 57 | @rebar3 as test coveralls send 58 | 59 | .PHONY: dialyzer 60 | dialyzer: 61 | $(REBAR) dialyzer 62 | 63 | CUTTLEFISH_SCRIPT = _build/default/lib/cuttlefish/cuttlefish 64 | 65 | $(CUTTLEFISH_SCRIPT): 66 | @${REBAR} get-deps 67 | @if [ ! -f cuttlefish ]; then make -C _build/default/lib/cuttlefish; fi 68 | 69 | app.config: $(CUTTLEFISH_SCRIPT) 70 | $(verbose) $(CUTTLEFISH_SCRIPT) -l info -e etc/ -c etc/mria.conf.example -i priv/mria.schema -d data/ 71 | 72 | ########################################################################################## 73 | # Concuerror 74 | ########################################################################################## 75 | 76 | CONCUERROR := $(BUILD_DIR)/Concuerror/bin/concuerror 77 | CONCUERROR_RUN := $(CONCUERROR) \ 78 | --treat_as_normal shutdown --treat_as_normal normal --treat_as_normal intentional \ 79 | --treat_as_normal optvar_set --treat_as_normal optvar_stopped --treat_as_normal optvar_retry \ 80 | -x code -x code_server -x error_handler \ 81 | -pa $(BUILD_DIR)/concuerror+test/lib/snabbkaffe/ebin \ 82 | -pa $(BUILD_DIR)/concuerror+test/lib/optvar/ebin \ 83 | -pa $(BUILD_DIR)/concuerror+test/lib/mria/ebin 84 | 85 | concuerror = $(CONCUERROR_RUN) -f $(BUILD_DIR)/concuerror+test/lib/mria/test/concuerror_tests.beam -t $(1) || \ 86 | { cat concuerror_report.txt; exit 1; } 87 | 88 | .PHONY: concuerror_test 89 | concuerror_test: $(CONCUERROR) 90 | rebar3 as concuerror eunit -m concuerror_tests 91 | # $(call concuerror,wait_for_shards_crash_test) 92 | $(call concuerror,notify_different_tags_test) 93 | $(call concuerror,get_core_node_test) 94 | $(call concuerror,dirty_bootstrap_test) 95 | $(call concuerror,wait_for_shards_timeout_test) 96 | 97 | 98 | $(CONCUERROR): 99 | mkdir -p _build/ 100 | cd _build && git clone https://github.com/parapluu/Concuerror.git 101 | $(MAKE) -C _build/Concuerror/ 102 | 103 | ########################################################################################## 104 | # Docs 105 | ########################################################################################## 106 | DOC_DIR=doc 107 | DOC_SRC_DIR=$(DOC_DIR)/src 108 | 109 | UMLS=$(wildcard $(DOC_SRC_DIR)/*.uml) 110 | PICS=$(UMLS:$(DOC_SRC_DIR)/%.uml=$(DOC_DIR)/%.png) 111 | 112 | .PHONY: doc 113 | doc: $(PICS) 114 | 115 | $(DOC_DIR)/%.png: $(DOC_SRC_DIR)/%.uml 116 | cat $< | plantuml -pipe > $@ 117 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Mria 2 | 3 | Mria is an extension for Mnesia database that adds eventual consistency to the cluster. 4 | 5 | ## Motivation 6 | 7 | Using Mria in RLOG mode aims to improve database write throughput in large clusters (4 nodes and more). 8 | 9 | The default unpatched mnesia has two modes of table access: 10 | 11 | * Local: when the table has a local replica. 12 | The current replication of Mnesia is based on a full-mesh, peer-to-peer Erlang distribution which does not scale well and has the risk of split-brain. 13 | Adding more replicas of the table creates more overhead for the writes. 14 | 15 | * Remote: when the table doesn't have a local replica, and the data is read via RPC call to a node that has a table copy. 16 | Network latency is orders of magnitude larger than reading the data locally. 17 | 18 | Mria aims to find the middle ground between the two approaches: data is read locally on all nodes, but only a few nodes actively participate in the transaction. 19 | This allows to improve write throughput of the cluster without sacrificing read latency, at the cost of strong consistency guarantees. 20 | 21 | ## Logo 22 | 23 | ![](doc/logo.jpg "Mriya logo") 24 | 25 | ## Modes of operation 26 | 27 | Mria works in two modes: 28 | 29 | 1. As a thin wrapper for Mnesia 30 | 1. In a so called `RLOG` mode (Replication LOG) 31 | 32 | RLOG feature is disabled by default. 33 | It can be enabled by setting `mria.db_backend` application environment variable to `rlog`. 34 | 35 | ## Node roles 36 | 37 | When RLOG is enabled, each node assumes one of the two roles: `core` or `replicant`. 38 | The role is determined by `mria.node_role` application environment variable. 39 | The default value is `core`. 40 | Core nodes behave much like regular mnesia nodes: they are connected in a full mesh, and each node can initiate write transactions, hold locks, etc. 41 | 42 | Replicant nodes, on the other hand, don't participate in the mnesia transactions. 43 | They connect to one of the core nodes and passively replicate the transactions from it using an internal Mria protocol based on [gen_rpc](https://github.com/emqx/gen_rpc/). 44 | From the point of mnesia they simply don't exist: they don't appear in the `table_copies` list, they don't hold any locks and don't participate in the transaction commit protocol. 45 | 46 | This means replicant nodes aren't allowed to perform any write operations on their own. 47 | They instead perform an RPC call to a core node, that performs the write operation on their behalf. 48 | Same goes for dirty writes as well. 49 | This is decided internally by `mria:transaction` function. 50 | Conversely, dirty reads and read-only transactions run locally on the replicant. 51 | The semantics of the read operations are the following: they operate on a consistent, but potentially outdated snapshot of the data. 52 | 53 | ## Shards 54 | 55 | For performance reasons, mnesia tables are separated into disjunctive subsets called RLOG shards. 56 | Transactions for each shard are replicated independently. 57 | Currently transaction can only modify tables in one shard. 58 | Usually it is a good idea to group all tables that belong to a particular OTP application in one shard. 59 | 60 | ## Enabling RLOG in your application 61 | 62 | It is important to make the application code compatible with the RLOG feature by using the correct APIs. 63 | Thankfully, migration from plain mnesia to RLOG is rather simple. 64 | 65 | ### Assigning tables to the shards 66 | 67 | First, each mnesia table should be assigned to an RLOG shard. 68 | It is done by adding `{rlog_shard, shard_name}` tuple to the option list of `mria:create_table` function. 69 | 70 | For example: 71 | 72 | ```erlang 73 | -module(mria_app). 74 | 75 | -behaviour(application). 76 | 77 | -export([start/2, stop/1]). 78 | 79 | -include_lib("snabbkaffe/include/trace.hrl"). 80 | 81 | start(_Type, _Args) -> 82 | ok = mria:create_table(foo, [{type, bag}, 83 | {rlog_shard, my_shard}, 84 | {storage, ram_copies}, 85 | ]), 86 | ok = mria:create_table(bar, [{type, ordered_set}, 87 | {storage, ram_copies}, 88 | {rlog_shard, my_shard} 89 | ]), 90 | mria:wait_for_tables([foo, bar]). 91 | ``` 92 | 93 | The API for creating the table is similar to Mnesia, with three notable exceptions: 94 | 95 | 1. `create_table` function is idempotent 96 | 1. All replicas of the table use the same storage backend, as specified by `storage` parameter, and each table is replicated on all nodes in the cluster 97 | 1. There is a mandatory `rlog_shard` parameter that assigns the table to an RLOG shard. 98 | The only exception is `local_content` tables that are implicitly assigned to `undefined` shard, that is not replicated 99 | 100 | ### Waiting for shard replication 101 | 102 | Please note that replicant nodes don't connect to all the shards automatically. 103 | Connection to the upstream core node and replication of the transactions should be triggered by calling `mria:wait_for_tables(Tables, Timeout)` function. 104 | Typically one should call this function in the application start callback, as shown in the example above. 105 | 106 | ### Write operations 107 | 108 | Use of the following `mnesia` APIs is forbidden: 109 | 110 | * `mnesia:transaction` 111 | * `mnesia:dirty_write` 112 | * `mnesia:dirty_delete` 113 | * `mnesia:dirty_delete_object` 114 | * `mnesia:clear_table` 115 | 116 | Replace them with the corresponding functions from `mria` module. 117 | 118 | Using transactional versions of the mnesia APIs for writes and deletes is fine. 119 | 120 | With that in mind, typical write transaction should look like this: 121 | 122 | ```erlang 123 | mria:transaction(my_shard, 124 | fun() -> 125 | mnesia:read(shard_tab, foo), 126 | mnesia:write(#shard_tab{key = foo, val = bar}) 127 | end) 128 | ``` 129 | 130 | ### Read operations 131 | 132 | Dirty read operations (such as `mnesia:dirty_read`, `ets:lookup` and `mnesia:dirty_select`) are allowed. 133 | However, it is recommended to wrap all reads in `mria:ro_transaction` function. 134 | Under normal conditions (when all shards are in sync) it should not introduce extra overhead. 135 | 136 | ## Callbacks 137 | 138 | Mria can execute callbacks on some system events. 139 | They can be registered using `mria:register_callback/2` function. 140 | 141 | - `stop`: This callback is executed when the DB stops or restarts. 142 | - `start`: This callback is executed when the DB starts or restarts. 143 | 144 | Note that the DB restarts when the node joins the cluster. 145 | -------------------------------------------------------------------------------- /doc/logo.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/emqx/mria/760058d6277a6a6bc4729744a5bb5ea901868fa6/doc/logo.jpg -------------------------------------------------------------------------------- /doc/replicant-fsm.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/emqx/mria/760058d6277a6a6bc4729744a5bb5ea901868fa6/doc/replicant-fsm.png -------------------------------------------------------------------------------- /doc/replication-msc.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/emqx/mria/760058d6277a6a6bc4729744a5bb5ea901868fa6/doc/replication-msc.png -------------------------------------------------------------------------------- /doc/rlog_internals.md: -------------------------------------------------------------------------------- 1 | # RLOG: database developer's guide 2 | 3 | ## Transaction interception 4 | 5 | We use a patched version of OTP that allows to hook into post-commit stage of the transaction, after the data has been dumped to the storage. 6 | 7 | ## Actors 8 | 9 | ### RLOG Server 10 | 11 | RLOG server is a `gen_server` process that runs on the core node. 12 | There is an instance of this process for each shard. 13 | This process is registered with the shard's name. 14 | It is responsible for the initial communication with the RLOG replica processes, and spawning RLOG agent and RLOG bootstrapper processes. 15 | Also it receives the transaction messages intercepted by the hook and multicasts them to the agents. 16 | 17 | ### RLOG Agent 18 | 19 | RLOG agent is a `gen_statem` process that runs on the core node. 20 | This processes' lifetime is tied to the lifetime of the remote RLOG replica process. 21 | It is responsible for subscribing to the mnesia events for the shard and forwarding them to the replicant node. 22 | Each message sent by the agent is tagged with its pid. 23 | 24 | #### RLOG Replica 25 | 26 | RLOG replica is a `gen_statem` process that runs on the replicant node. 27 | It spawns during the node startup under the `rlog` supervisor, and is restarted indefinitely. 28 | It talks to the RLOG server in its `post_init` callback, and establishes connection to the remote RLOG agent process. 29 | It also creates a bootstrap client process and manages it. 30 | 31 | ![Replicant FSM](replicant-fsm.png) 32 | 33 | Full process of shard replication: 34 | 35 | ![Replication MSC](replication-msc.png) 36 | 37 | #### RLOG replica importer worker 38 | 39 | `rlog_replica_importer_worker` is a helper process spawned by `rlog_replica` specifically to import batches of transactions into the local database. 40 | 41 | This importing is not done in the parent process because it can have a long message queue, which is really harmful for performance of mnesia transactions: 42 | During commit stage, the transaction does a receive without [ref trick](https://blog.stenmans.org/theBeamBook/#_the_synchronous_call_trick_aka_the_ref_trick), so it has to scan the entire mailbox. 43 | The protocol between `rlog_replica_importer_worker` and `rlog_replica` processes has been designed in such a way that the former process never has more than one message in the mailbox, hence mnesia transactions initiated from this process run much faster. 44 | 45 | Note that replica sends transactions to the importer worker in batches. 46 | Replica process maintains an internal queue of transactions from the upstream agent, where the messages are accumulated while the batch is being imported by the importer worker. 47 | Once the batch is fully imported, replica process immediately initiates importing of the next batch. 48 | 49 | ### RLOG bootstrapper (client/server) 50 | 51 | RLOG bootstrapper is a temporary `gen_server` process that runs on both core and replicant nodes during replica initialization. 52 | RLOG bootstrapper server runs `mnesia:dirty_all_keys` operation on the tables within the shard, and then iterates through the cached keys. 53 | For each table and key pair it performs `mnesia:dirty_read` operation and caches the result. 54 | If the value for the key is missing, such record is ignored. 55 | Records are sent to the remote bootstrapper client process in batches. 56 | Bootstrapper client applies batches to the local table replica using dirty operations. 57 | 58 | ## Bootstrapping 59 | 60 | Upon connecting to the RLOG server, the replica will perform a process called bootstrapping. 61 | It cleans all the tables that belong to the shard, and spawns a bootstrapper client. 62 | 63 | Bootstrapping can be done using dirty operations. 64 | Transaction log has an interesting property: replaying it can heal a partially corrupted replica. 65 | Transaction log replay can fix missing or reordered updates and deletes, as long as the replica has been consistent prior to the first replayed transaction. 66 | This healing property of the TLOG can be used to bootstrap the replica using only dirty operations. (TODO: prove it) 67 | One downside of this approach is that the replica contains subtle inconsistencies during the replay, and cannot be used until the replay process finishes. 68 | It should be mandatory to shutdown business applications while bootstrap and syncing are going on. 69 | -------------------------------------------------------------------------------- /doc/src/replicant-fsm.uml: -------------------------------------------------------------------------------- 1 | @startuml 2 | 3 | bootstrap: Receiving all the records\n from the core node 4 | local_replay: Replaying transactions\n that have been buffered locally\n during bootstrap 5 | normal: Remote transactions are applied\ndirectly to the replica 6 | 7 | [*] --> bootstrap 8 | bootstrap --> local_replay : received bootstrap_complete 9 | local_replay --> normal : reached the end of the local rlog 10 | 11 | @enduml 12 | -------------------------------------------------------------------------------- /doc/src/replication-msc.uml: -------------------------------------------------------------------------------- 1 | @startuml 2 | scale 2000 width 3 | 4 | participant "RLOG server" as server #ffc 5 | participant "RLOG agent" as agent #ffc 6 | participant "RLOG bootstrapper" as boot_serv #ffc 7 | 8 | participant "RLOG replica" as repl #ccf 9 | participant "RLOG replica\nimporter worker" as repl_imp #ccf 10 | participant "bootstrap client" as boot_client #ccf 11 | 12 | activate server 13 | activate repl 14 | activate repl_imp 15 | 16 | group Agent initialization 17 | == Probe the connection to minimize risk of double-subscription and negotiate protocol version == 18 | repl -> server : probe 19 | repl <- server : {true, 0} 20 | note over repl: The protocol version\nshould be the same,\nabort connection if different 21 | 22 | == Initiate connection == 23 | repl -> server : {connect, LocalCheckpointTS} 24 | note over server : Spawn a new agent process\nunder the shard supervisor 25 | server -\\ agent : spawn(now() - SafeInterval) 26 | 27 | activate agent 28 | 29 | repl <- server : {need_bootstrap, AgentPID} 30 | end 31 | 32 | == Bootstrapping == 33 | 34 | group Bootstraper initialization 35 | hnote over repl : bootstrap 36 | 37 | repl -\\ boot_client : spawn() 38 | activate boot_client 39 | 40 | boot_client -> server : {bootstrap, self()} 41 | server -\\ boot_serv : spawn(RemotePid) 42 | activate boot_serv 43 | 44 | boot_serv -> boot_serv : mnesia:dirty_all_keys\nfor each table in shard 45 | 46 | server -> boot_client : {ok, Pid} 47 | end 48 | 49 | group Bootstrap 50 | note over boot_serv : Iterate through the\ncached keys 51 | loop 52 | boot_serv -> boot_client : {batch, [{Tab, Record}]} 53 | boot_client -> boot_client : import batch to the\ntable replica 54 | boot_serv <- boot_client : ok 55 | end 56 | 57 | note over agent : At the same time... 58 | 59 | loop 60 | server -> agent : {trans, SeqNo, TLOG} 61 | agent -> repl : '#entry{}' 62 | repl -> repl : cache batch to the local rlog 63 | end 64 | 65 | boot_serv -> boot_client : bootstrap_complete 66 | deactivate boot_serv 67 | boot_client -> repl : bootstrap_complete 68 | deactivate boot_client 69 | end 70 | 71 | group local_replay 72 | hnote over repl : local_replay 73 | 74 | note over repl : Iterate through the\ncached transactions 75 | 76 | loop 77 | server -> agent : {trans, SeqNo, TLOG} 78 | agent -> repl : '#entry{}' 79 | repl -> repl : cache batch in the local rlog 80 | 81 | repl -> repl : Import ops from the local rlog\nto the local replica 82 | end 83 | 84 | note over repl : Reached the end of\nthe local rlog 85 | end 86 | 87 | 88 | == Normal operation == 89 | 90 | hnote over repl : normal 91 | 92 | loop 93 | server -> agent : {trans, SeqNo, TLOG} 94 | agent -> repl : '#entry{SeqNo}' 95 | server -> agent : {trans, SeqNo + 1, TLOG} 96 | agent -> repl : '#entry{SeqNo + 1}' 97 | server -> agent : {trans, SeqNo + 2, TLOG} 98 | agent -> repl : '#entry{SeqNo + 2}' 99 | repl -> repl_imp : {import_batch, [Entries 1 2 3]} 100 | repl_imp -> repl_imp : Import batch to the\nlocal replica 101 | repl <- repl_imp : ok 102 | end 103 | 104 | @enduml 105 | -------------------------------------------------------------------------------- /include/mria.hrl: -------------------------------------------------------------------------------- 1 | -type(cluster() :: atom()). 2 | 3 | -type(member_status() :: joining | up | healing | leaving | down). 4 | 5 | -type(member_address() :: {inet:ip_address(), inet:port_number()}). 6 | 7 | -record(member, { 8 | node :: node(), 9 | addr :: undefined | member_address(), 10 | guid :: undefined | mria_guid:guid(), 11 | hash :: undefined | pos_integer(), 12 | status :: member_status(), 13 | mnesia :: undefined | running | stopped | false, 14 | ltime :: undefined | erlang:timestamp(), 15 | role :: mria_rlog:role() 16 | }). 17 | 18 | -type(member() :: #member{}). 19 | 20 | -define(JOIN_LOCK_ID(REQUESTER), {mria_sync_join, REQUESTER}). 21 | -------------------------------------------------------------------------------- /rebar.config: -------------------------------------------------------------------------------- 1 | %% -*- mode:erlang -*- 2 | {minimum_otp_vsn, "21.0"}. 3 | 4 | {deps, 5 | [{snabbkaffe, {git, "https://github.com/kafka4beam/snabbkaffe", {tag, "1.0.10"}}}, 6 | {gen_rpc, {git, "https://github.com/emqx/gen_rpc", {tag, "3.4.1"}}}, 7 | {replayq, {git, "https://github.com/emqx/replayq", {tag, "0.3.6"}}}, 8 | {mnesia_rocksdb, {git, "https://github.com/emqx/mnesia_rocksdb", {tag, "0.1.17"}}}, 9 | {optvar, {git, "https://github.com/emqx/optvar", {tag, "1.0.5"}}} 10 | ]}. 11 | 12 | {erl_opts, 13 | [warn_unused_vars, 14 | warn_shadow_vars, 15 | warn_unused_import, 16 | warn_obsolete_guard, 17 | warnings_as_errors, 18 | debug_info, 19 | compressed, 20 | {d, 'MRIA_HAS_ROCKSDB', true} 21 | ]}. 22 | 23 | {validate_app_modules, true}. 24 | 25 | %% Check layer violations (TODO: make it stricter and remove all exemptions): 26 | {xref_queries, 27 | [ {"closure(E) | mria_status : Mod || [mria, mria_lb, mria_schema, mria_membership, mria_node_monitor, mria_rlog] : Mod", 28 | [{{mria_status,get_shard_stats,1}, {mria_lb,core_node_weight,1}}, 29 | {{mria_status,shards_down,0}, {mria_schema,shards,0}}, 30 | {{mria_status,shards_status,0}, {mria_schema,shards,0}}, 31 | {{mria_status,shards_syncing,0}, {mria_schema,shards,0}}, 32 | {{mria_status,shards_up,0}, {mria_schema,shards,0}}]} 33 | , {"closure(E) | mria_schema : Mod || [mria, mria_lb, mria_node_monitor, mria_membership, mria_rlog] : Mod", 34 | []} 35 | , {"closure(E) | mria_mnesia : Mod || [mria, mria_lb, mria_schema, mria_node_monitor, mria_membership, mria_rlog] : Mod", 36 | [{{mria_mnesia,join_cluster,1}, {mria_rlog,role,1}}]} 37 | , {"closure(E) | mria_membership : Mod || [mria, mria_lb, mria_schema, mria_node_monitor, mria_rlog] : Mod", 38 | [{{mria_membership,handle_cast,2}, {mria_rlog,role,1}}, 39 | {{mria_membership,role,1},{mria_rlog,role,1}}]} 40 | ]}. 41 | 42 | {xref_checks, 43 | [undefined_function_calls, undefined_functions, 44 | locals_not_used, deprecated_function_calls, 45 | warnings_as_errors, deprecated_functions 46 | ]}. 47 | 48 | {eunit_opts, 49 | [verbose, 50 | {report,{eunit_surefire,[{dir,"."}]}} 51 | ]}. 52 | 53 | %% {cover_enabled, false}. % Note: it's important to enable it per test-suite only. Otherwise it will mess up the compatibility suite 54 | {cover_opts, [verbose]}. 55 | {cover_export_enabled, true}. 56 | 57 | {dialyzer, 58 | [{warnings, [unknown]}, 59 | {plt_extra_apps, [replayq, mnesia, mnesia_rocksdb, optvar]} 60 | ]}. 61 | 62 | {profiles, 63 | [{test, 64 | [{plugins, [{coveralls, {git, "https://github.com/emqx/coveralls-erl", {branch, "github"}}}]}, 65 | {deps, [{meck, "0.8.13"}, 66 | {proper, "1.3.0"} 67 | ]}, 68 | {erl_opts, [debug_info]} 69 | ]} 70 | %% , {concuerror, 71 | %% [{overrides, 72 | %% [ {add, snabbkaffe, 73 | %% [{erl_opts, [{d, 'CONCUERROR'}]}]} 74 | %% , {add, mria, 75 | %% [{erl_opts, [{d, 'CONCUERROR'}]}]} 76 | %% ]} 77 | %% ]} 78 | ]}. 79 | 80 | {ct_readable, true}. 81 | -------------------------------------------------------------------------------- /rebar.config.script: -------------------------------------------------------------------------------- 1 | %% -*- mode:erlang -*- 2 | 3 | BuildWithoutRocks = 4 | fun(Config) -> 5 | case os:getenv("BUILD_WITHOUT_ROCKSDB") of 6 | false -> 7 | Config; 8 | _ -> 9 | io:format(user, "Building without rocksDB", []), 10 | Deps = lists:keydelete(mnesia_rocksdb, 1, proplists:get_value(deps, Config)), 11 | ErlOpts = lists:keyreplace('MRIA_HAS_ROCKSDB', 2, proplists:get_value(erl_opts, Config), 12 | {d, 'MRIA_HAS_ROCKSDB', false}), 13 | Config1 = lists:keyreplace(deps, 1, Config, {deps, Deps}), 14 | lists:keyreplace(erl_opts, 1, Config1, {erl_opts, ErlOpts}) 15 | end 16 | end, 17 | 18 | AddCoverallsToken = 19 | fun(Config) -> 20 | case {os:getenv("GITHUB_ACTIONS"), os:getenv("GITHUB_TOKEN")} of 21 | {"true", Token} when is_list(Token) -> 22 | [{coveralls_repo_token, Token}, 23 | {coveralls_service_job_id, os:getenv("GITHUB_RUN_ID")}, 24 | {coveralls_commit_sha, os:getenv("GITHUB_SHA")}, 25 | {coveralls_service_number, os:getenv("GITHUB_RUN_NUMBER")}, 26 | {coveralls_coverdata, "_build/test/cover/*.coverdata"}, 27 | {coveralls_service_name, "github"} | Config]; 28 | _ -> 29 | Config 30 | end 31 | end, 32 | 33 | AddCoverallsPR = 34 | fun(Config) -> 35 | case os:getenv("GITHUB_EVENT_NAME") =:= "pull_request" andalso 36 | string:tokens(os:getenv("GITHUB_REF"), "/") of 37 | [_, "pull", PRNO, _] -> 38 | [{coveralls_service_pull_request, PRNO} | Config]; 39 | _ -> 40 | Config 41 | end 42 | end, 43 | 44 | %%%% Create final configuarion %%%% 45 | 46 | lists:foldl(fun(Fun, Conf) -> Fun(Conf) end, CONFIG, 47 | [ BuildWithoutRocks 48 | , AddCoverallsToken 49 | , AddCoverallsPR 50 | ]). 51 | -------------------------------------------------------------------------------- /scripts/build-old-rel: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -euxo pipefail 3 | 4 | echo "Building "${tag}"" 5 | 6 | [ -d "${tmp_dir}" ] || 7 | git clone "${root_dir}" "${tmp_dir}" 8 | 9 | cd "${tmp_dir}" 10 | git checkout "${tag}" 11 | 12 | # Avoid rebuilding rocksdb: 13 | mkdir -p "_checkouts" 14 | pushd "_checkouts" 15 | [ -L rocksdb ] || 16 | ln -s "${root_dir}/_build/default/lib/rocksdb" rocksdb 17 | popd 18 | 19 | [ ! -z $(find . -name mria.app) ] || 20 | rebar3 as test compile 21 | -------------------------------------------------------------------------------- /src/mria.app.src: -------------------------------------------------------------------------------- 1 | {application, mria, 2 | [{description, "Async replication for Mnesia"}, 3 | {vsn, "git"}, 4 | {mod, {mria_app,[]}}, 5 | {registered, 6 | [mria_sup, 7 | mria_membership, 8 | mria_node_monitor, 9 | mria_rlog_server, 10 | mria_lb, 11 | mria_rlog_sup, 12 | mria_shards_sup 13 | ]}, 14 | %% Note: DON'T add mnesia to the list, or it will mess up start order: 15 | {applications, 16 | [kernel, 17 | stdlib, 18 | inets, 19 | gen_rpc, 20 | replayq, 21 | snabbkaffe, 22 | optvar 23 | ]}, 24 | {modules, []}, 25 | {licenses, ["Apache 2.0"]}, 26 | {maintainers, ["EMQX Team "]}, 27 | {links, [{"Github","https://github.com/emqx/mria"}]} 28 | ]}. 29 | -------------------------------------------------------------------------------- /src/mria.appup.src: -------------------------------------------------------------------------------- 1 | %% -*- mode: erlang -*- 2 | %% Unless you know what you are doing, DO NOT edit manually!! 3 | {VSN, 4 | [{"0.2.13", 5 | [{load_module,mria_lib,brutal_purge,soft_purge,[]}, 6 | {load_module,mria,brutal_purge,soft_purge,[]}]}, 7 | {"0.2.10", 8 | [{load_module,mria_lib,brutal_purge,soft_purge,[]}, 9 | {load_module,mria,brutal_purge,soft_purge,[]}, 10 | {load_module,mria_bootstrapper,brutal_purge,soft_purge,[]}, 11 | {load_module,mria_app,brutal_purge,soft_purge,[]}]}, 12 | {"0.2.8", 13 | [{load_module,mria_rlog,brutal_purge,soft_purge,[]}, 14 | {load_module,mria_shards_sup,brutal_purge,soft_purge,[]}, 15 | {load_module,mria_schema,brutal_purge,soft_purge,[]}, 16 | {load_module,mria_rlog_server,brutal_purge,soft_purge,[]}, 17 | {load_module,mria_rlog_replica,brutal_purge,soft_purge,[]}, 18 | {load_module,mria_rlog_agent,brutal_purge,soft_purge,[]}, 19 | {load_module,mria_node_monitor,brutal_purge,soft_purge,[]}, 20 | {load_module,mria_mnesia,brutal_purge,soft_purge,[]}, 21 | {load_module,mria_lb,brutal_purge,soft_purge,[]}, 22 | {load_module,mria_bootstrapper,brutal_purge,soft_purge,[]}, 23 | {load_module,mria_autoheal,brutal_purge,soft_purge,[]}, 24 | {load_module,mria_app,brutal_purge,soft_purge,[]}, 25 | {load_module,mria,brutal_purge,soft_purge,[]}, 26 | {load_module,mria_config,brutal_purge,soft_purge,[]}, 27 | {load_module,mria_lib,brutal_purge,soft_purge,[]}]}], 28 | [{"0.2.13", 29 | [{load_module,mria_lib,brutal_purge,soft_purge,[]}, 30 | {load_module,mria,brutal_purge,soft_purge,[]}]}, 31 | {"0.2.10", 32 | [{load_module,mria_lib,brutal_purge,soft_purge,[]}, 33 | {load_module,mria,brutal_purge,soft_purge,[]}, 34 | {load_module,mria_bootstrapper,brutal_purge,soft_purge,[]}, 35 | {load_module,mria_app,brutal_purge,soft_purge,[]}]}, 36 | {"0.2.8", 37 | [{load_module,mria_rlog,brutal_purge,soft_purge,[]}, 38 | {load_module,mria_shards_sup,brutal_purge,soft_purge,[]}, 39 | {load_module,mria_schema,brutal_purge,soft_purge,[]}, 40 | {load_module,mria_rlog_server,brutal_purge,soft_purge,[]}, 41 | {load_module,mria_rlog_replica,brutal_purge,soft_purge,[]}, 42 | {load_module,mria_rlog_agent,brutal_purge,soft_purge,[]}, 43 | {load_module,mria_node_monitor,brutal_purge,soft_purge,[]}, 44 | {load_module,mria_mnesia,brutal_purge,soft_purge,[]}, 45 | {load_module,mria_lb,brutal_purge,soft_purge,[]}, 46 | {load_module,mria_bootstrapper,brutal_purge,soft_purge,[]}, 47 | {load_module,mria_autoheal,brutal_purge,soft_purge,[]}, 48 | {load_module,mria_app,brutal_purge,soft_purge,[]}, 49 | {load_module,mria,brutal_purge,soft_purge,[]}, 50 | {load_module,mria_config,brutal_purge,soft_purge,[]}, 51 | {load_module,mria_lib,brutal_purge,soft_purge,[]}]}]}. 52 | -------------------------------------------------------------------------------- /src/mria_app.erl: -------------------------------------------------------------------------------- 1 | %%-------------------------------------------------------------------- 2 | %% Copyright (c) 2019-2023 EMQ Technologies Co., Ltd. All Rights Reserved. 3 | %% 4 | %% Licensed under the Apache License, Version 2.0 (the "License"); 5 | %% you may not use this file except in compliance with the License. 6 | %% You may obtain a copy of the License at 7 | %% 8 | %% http://www.apache.org/licenses/LICENSE-2.0 9 | %% 10 | %% Unless required by applicable law or agreed to in writing, software 11 | %% distributed under the License is distributed on an "AS IS" BASIS, 12 | %% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | %% See the License for the specific language governing permissions and 14 | %% limitations under the License. 15 | %%-------------------------------------------------------------------- 16 | 17 | -module(mria_app). 18 | 19 | -behaviour(application). 20 | 21 | -export([start/2, prep_stop/1, stop/1]). 22 | 23 | -include_lib("snabbkaffe/include/trace.hrl"). 24 | 25 | %%================================================================================ 26 | %% API funcions 27 | %%================================================================================ 28 | 29 | start(_Type, _Args) -> 30 | ?tp(notice, "Starting mria", #{}), 31 | mria_config:load_config(), 32 | mria_rlog:init(), 33 | ?tp(notice, "Starting mnesia", #{}), 34 | maybe_perform_disaster_recovery(), 35 | mria_mnesia:ensure_schema(), 36 | mria_mnesia:ensure_started(), 37 | ?tp(notice, "Starting shards", #{}), 38 | mria_sup:start_link(). 39 | 40 | prep_stop(State) -> 41 | ?tp(debug, "Mria is preparing to stop", #{}), 42 | mria_rlog:cleanup(), 43 | State. 44 | 45 | stop(_State) -> 46 | mria_config:erase_all_config(), 47 | ?tp(notice, "Mria is stopped", #{}). 48 | 49 | %%================================================================================ 50 | %% Internal functions 51 | %%================================================================================ 52 | 53 | maybe_perform_disaster_recovery() -> 54 | case os:getenv("MNESIA_MASTER_NODES") of 55 | false -> 56 | ok; 57 | Str -> 58 | {ok, Tokens, _} = erl_scan:string(Str), 59 | MasterNodes = [A || {atom, _, A} <- Tokens], 60 | perform_disaster_recovery(MasterNodes) 61 | end. 62 | 63 | perform_disaster_recovery(MasterNodes) -> 64 | logger:critical("Disaster recovery procedures have been enacted. " 65 | "Starting mnesia with explicitly set master nodes: ~p", [MasterNodes]), 66 | mnesia:set_master_nodes(MasterNodes). 67 | -------------------------------------------------------------------------------- /src/mria_autoclean.erl: -------------------------------------------------------------------------------- 1 | %%-------------------------------------------------------------------- 2 | %% Copyright (c) 2019-2021 EMQ Technologies Co., Ltd. All Rights Reserved. 3 | %% 4 | %% Licensed under the Apache License, Version 2.0 (the "License"); 5 | %% you may not use this file except in compliance with the License. 6 | %% You may obtain a copy of the License at 7 | %% 8 | %% http://www.apache.org/licenses/LICENSE-2.0 9 | %% 10 | %% Unless required by applicable law or agreed to in writing, software 11 | %% distributed under the License is distributed on an "AS IS" BASIS, 12 | %% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | %% See the License for the specific language governing permissions and 14 | %% limitations under the License. 15 | %%-------------------------------------------------------------------- 16 | 17 | -module(mria_autoclean). 18 | 19 | -include("mria.hrl"). 20 | 21 | -export([init/0, check/1]). 22 | 23 | -record(autoclean, {expiry, timer}). 24 | 25 | -type(autoclean() :: #autoclean{}). 26 | 27 | -export_type([autoclean/0]). 28 | 29 | init() -> 30 | case application:get_env(mria, cluster_autoclean) of 31 | {ok, Expiry} -> timer_backoff(#autoclean{expiry = Expiry}); 32 | undefined -> undefined 33 | end. 34 | 35 | timer_backoff(State = #autoclean{expiry = Expiry}) -> 36 | TRef = mria_node_monitor:run_after(Expiry div 4, autoclean), 37 | State#autoclean{timer = TRef}. 38 | 39 | check(State = #autoclean{expiry = Expiry}) -> 40 | [maybe_clean(Member, Expiry) || Member <- mria_membership:members(down)], 41 | timer_backoff(State). 42 | 43 | maybe_clean(#member{node = Node, ltime = LTime}, Expiry) -> 44 | case expired(LTime, Expiry) of 45 | true -> mria:force_leave(Node); 46 | false -> ok 47 | end. 48 | 49 | expired(LTime, Expiry) -> 50 | timer:now_diff(erlang:timestamp(), LTime) div 1000 > Expiry. 51 | -------------------------------------------------------------------------------- /src/mria_autoheal.erl: -------------------------------------------------------------------------------- 1 | %%-------------------------------------------------------------------- 2 | %% Copyright (c) 2019-2023 EMQ Technologies Co., Ltd. All Rights Reserved. 3 | %% 4 | %% Licensed under the Apache License, Version 2.0 (the "License"); 5 | %% you may not use this file except in compliance with the License. 6 | %% You may obtain a copy of the License at 7 | %% 8 | %% http://www.apache.org/licenses/LICENSE-2.0 9 | %% 10 | %% Unless required by applicable law or agreed to in writing, software 11 | %% distributed under the License is distributed on an "AS IS" BASIS, 12 | %% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | %% See the License for the specific language governing permissions and 14 | %% limitations under the License. 15 | %%-------------------------------------------------------------------- 16 | 17 | -module(mria_autoheal). 18 | 19 | -export([ init/0 20 | , enabled/0 21 | , proc/1 22 | , handle_msg/2 23 | ]). 24 | 25 | -record(autoheal, {delay, role, proc, timer}). 26 | 27 | -type autoheal() :: #autoheal{}. 28 | 29 | -export_type([autoheal/0]). 30 | 31 | -include_lib("snabbkaffe/include/trace.hrl"). 32 | 33 | -define(DEFAULT_DELAY, 15000). 34 | -define(LOG(Level, Format, Args), 35 | logger:Level("Mria(Autoheal): " ++ Format, Args)). 36 | 37 | init() -> 38 | case enabled() of 39 | {true, Delay} -> 40 | ?tp("Starting autoheal", #{delay => Delay}), 41 | #autoheal{delay = Delay}; 42 | false -> 43 | undefined 44 | end. 45 | 46 | enabled() -> 47 | case application:get_env(mria, cluster_autoheal, true) of 48 | false -> false; 49 | true -> {true, ?DEFAULT_DELAY}; 50 | Delay when is_integer(Delay) -> 51 | {true, Delay} 52 | end. 53 | 54 | proc(undefined) -> undefined; 55 | proc(#autoheal{proc = Proc}) -> 56 | Proc. 57 | 58 | handle_msg(Msg, undefined) -> 59 | ?LOG(error, "Autoheal not enabled! Unexpected msg: ~p", [Msg]), undefined; 60 | 61 | handle_msg({report_partition, _Node}, Autoheal = #autoheal{proc = Proc}) 62 | when Proc =/= undefined -> 63 | Autoheal; 64 | 65 | handle_msg({report_partition, Node}, Autoheal = #autoheal{delay = Delay, timer = TRef}) -> 66 | ?tp(info, mria_autoheal_report_partition, #{node => Node}), 67 | case mria_membership:leader() =:= node() of 68 | true -> 69 | ensure_cancel_timer(TRef), 70 | TRef1 = mria_node_monitor:run_after(Delay, {autoheal, {create_splitview, node()}}), 71 | Autoheal#autoheal{role = leader, timer = TRef1}; 72 | false -> 73 | ?LOG(critical, "I am not leader, but received partition report from ~s", [Node]), 74 | Autoheal 75 | end; 76 | 77 | handle_msg(Msg = {create_splitview, Node}, Autoheal = #autoheal{delay = Delay, timer = TRef}) 78 | when Node =:= node() -> 79 | ensure_cancel_timer(TRef), 80 | case is_majority_alive() of 81 | true -> 82 | Nodes = mria_mnesia:db_nodes(), 83 | RPCResult = erpc:multicall(Nodes, mria_mnesia, running_nodes, []), 84 | SplitView = lists:foldl(fun({N, Result}, Acc) -> 85 | case Result of 86 | {ok, Peers} -> 87 | Acc #{N => Peers}; 88 | _ -> 89 | %% Ignore unreachable nodes: 90 | Acc 91 | end 92 | end, 93 | #{}, 94 | lists:zip(Nodes, RPCResult)), 95 | Cliques = lists:sort(fun compare_cliques/2, 96 | mria_lib:find_clusters(SplitView)), 97 | mria_node_monitor:cast(coordinator(Cliques), {heal_partition, Cliques}), 98 | Autoheal#autoheal{timer = undefined}; 99 | false -> 100 | Autoheal#autoheal{timer = mria_node_monitor:run_after(Delay, {autoheal, Msg})} 101 | end; 102 | 103 | handle_msg(Msg = {create_splitview, _Node}, Autoheal) -> 104 | ?LOG(critical, "I am not leader, but received : ~p", [Msg]), 105 | Autoheal; 106 | 107 | handle_msg({heal_partition, Cliques}, Autoheal = #autoheal{proc = undefined}) -> 108 | ?tp(info, mria_autoheal_partition, #{cliques => Cliques}), 109 | Proc = spawn_link(fun() -> 110 | ?LOG(info, "Healing partition: ~p", [Cliques]), 111 | heal_partition(Cliques) 112 | end), 113 | Autoheal#autoheal{role = coordinator, proc = Proc}; 114 | 115 | handle_msg({heal_partition, Cliques}, Autoheal= #autoheal{proc = _Proc}) -> 116 | ?LOG(critical, "Unexpected heal_partition msg: ~p", [Cliques]), 117 | Autoheal; 118 | 119 | handle_msg({'EXIT', Pid, normal}, Autoheal = #autoheal{proc = Pid}) -> 120 | Autoheal#autoheal{proc = undefined}; 121 | handle_msg({'EXIT', Pid, Reason}, Autoheal = #autoheal{proc = Pid}) -> 122 | ?LOG(critical, "Autoheal process crashed: ~s", [Reason]), 123 | Autoheal#autoheal{proc = undefined}; 124 | 125 | handle_msg(Msg, Autoheal) -> 126 | ?LOG(critical, "Unexpected msg: ~p", [Msg, Autoheal]), 127 | Autoheal. 128 | 129 | compare_cliques(Running1, Running2) -> 130 | Len1 = length(Running1), Len2 = length(Running2), 131 | if 132 | Len1 > Len2 -> true; 133 | Len1 == Len2 -> lists:member(node(), Running1); 134 | true -> false 135 | end. 136 | 137 | -spec coordinator([[node()]]) -> node(). 138 | coordinator([Majority | _]) -> 139 | mria_membership:coordinator(Majority). 140 | 141 | -spec heal_partition([[node()]]) -> ok. 142 | heal_partition([[_Majority]]) -> 143 | %% There are no partitions: 144 | ok; 145 | heal_partition([_Majority|Minorities]) -> 146 | reboot_minority(lists:append(Minorities)). 147 | 148 | reboot_minority(Minority) -> 149 | ?tp(info, "Rebooting minority", #{nodes => Minority}), 150 | lists:foreach(fun rejoin/1, Minority). 151 | 152 | rejoin(Node) -> 153 | Ret = rpc:call(Node, mria, join, [node(), heal]), 154 | ?tp(critical, "Rejoin for autoheal", 155 | #{ node => Node 156 | , return => Ret 157 | }). 158 | 159 | ensure_cancel_timer(undefined) -> 160 | ok; 161 | ensure_cancel_timer(TRef) -> 162 | catch erlang:cancel_timer(TRef). 163 | 164 | is_majority_alive() -> 165 | All = mria_mnesia:cluster_nodes(all), 166 | NotAliveLen = length(All -- [node() | nodes()]), 167 | NotAliveLen < (length(All) div 2). 168 | -------------------------------------------------------------------------------- /src/mria_bootstrapper.erl: -------------------------------------------------------------------------------- 1 | %%-------------------------------------------------------------------- 2 | %% Copyright (c) 2021-2023 EMQ Technologies Co., Ltd. All Rights Reserved. 3 | %% 4 | %% Licensed under the Apache License, Version 2.0 (the "License"); 5 | %% you may not use this file except in compliance with the License. 6 | %% You may obtain a copy of the License at 7 | %% 8 | %% http://www.apache.org/licenses/LICENSE-2.0 9 | %% 10 | %% Unless required by applicable law or agreed to in writing, software 11 | %% distributed under the License is distributed on an "AS IS" BASIS, 12 | %% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | %% See the License for the specific language governing permissions and 14 | %% limitations under the License. 15 | %%-------------------------------------------------------------------- 16 | 17 | %% @doc This module implements both bootstrap server and client 18 | 19 | -module(mria_bootstrapper). 20 | 21 | -behaviour(gen_server). 22 | 23 | %% API: 24 | -export([start_link/2, start_link_client/3]). 25 | 26 | %% gen_server callbacks: 27 | -export([ init/1 28 | , terminate/2 29 | , handle_call/3 30 | , handle_cast/2 31 | , handle_info/2 32 | , code_change/3 33 | ]). 34 | 35 | %% Internal exports: 36 | -export([do_push_batch/2, do_complete/3]). 37 | 38 | -include("mria_rlog.hrl"). 39 | -include_lib("snabbkaffe/include/trace.hrl"). 40 | 41 | -define(end_of_table, '$end_of_table'). 42 | 43 | %%================================================================================ 44 | %% Type declarations 45 | %%================================================================================ 46 | 47 | -define(clear_table, clear_table). 48 | 49 | -type batch() :: { _From :: pid() 50 | , _Table :: mria:table() 51 | , _Records :: [tuple()] | ?clear_table 52 | }. 53 | 54 | -record(iter, 55 | { table :: mria:table() 56 | , storage :: atom() | {ext, _, _} 57 | , state :: _ 58 | }). 59 | 60 | -record(server, 61 | { shard :: mria_rlog:shard() 62 | , subscriber :: mria_lib:subscriber() 63 | , iterator :: #iter{} | undefined 64 | , tables :: [mria:table()] 65 | }). 66 | 67 | -record(client, 68 | { shard :: mria_rlog:shard() 69 | , server :: pid() 70 | , parent :: pid() 71 | }). 72 | 73 | %%================================================================================ 74 | %% API funcions 75 | %%================================================================================ 76 | 77 | %% @doc Start bootstrapper server 78 | -spec start_link(mria_rlog:shard(), mria_lib:subscriber()) -> {ok, pid()}. 79 | start_link(Shard, Subscriber) -> 80 | gen_server:start_link(?MODULE, {server, Shard, Subscriber}, []). 81 | 82 | %% @doc Start bootstrapper client 83 | -spec start_link_client(mria_rlog:shard(), node(), pid()) -> {ok, pid()}. 84 | start_link_client(Shard, RemoteNode, Parent) -> 85 | gen_server:start_link(?MODULE, {client, Shard, RemoteNode, Parent}, []). 86 | 87 | %%================================================================================ 88 | %% Internal exports (gen_rpc) 89 | %%================================================================================ 90 | 91 | -spec do_push_batch(pid(), batch()) -> ok. 92 | do_push_batch(Pid, Batch) -> 93 | gen_server:call(Pid, {batch, Batch}, infinity). 94 | 95 | -spec do_complete(pid(), pid(), mria_rlog_server:checkpoint()) -> ok. 96 | do_complete(Client, Server, Snapshot) -> 97 | gen_server:call(Client, {complete, Server, Snapshot}, infinity). 98 | 99 | %%================================================================================ 100 | %% gen_server callbacks 101 | %%================================================================================ 102 | 103 | init({server, Shard, Subscriber}) -> 104 | process_flag(trap_exit, true), 105 | logger:set_process_metadata(#{ domain => [mria, rlog, bootstrapper, server] 106 | , shard => Shard 107 | }), 108 | #{tables := Tables} = mria_config:shard_config(Shard), 109 | mria_schema:wait_for_tables(Tables), 110 | ?tp(info, rlog_bootstrapper_start, 111 | #{ shard => Shard 112 | , subscribe => Subscriber 113 | }), 114 | self() ! loop, 115 | {ok, #server{ shard = Shard 116 | , subscriber = Subscriber 117 | , tables = Tables 118 | }}; 119 | init({client, Shard, RemoteNode, Parent}) -> 120 | process_flag(trap_exit, true), 121 | logger:set_process_metadata(#{ domain => [mria, rlog, bootstrapper, client] 122 | , shard => Shard 123 | }), 124 | mria_status:notify_replicant_bootstrap_start(Shard), 125 | {ok, Pid} = mria_rlog_server:bootstrap_me(RemoteNode, Shard), 126 | {ok, #client{ parent = Parent 127 | , shard = Shard 128 | , server = Pid 129 | }}. 130 | 131 | handle_info(loop, St = #server{}) -> 132 | server_loop(St); 133 | handle_info(Info, St) -> 134 | ?unexpected_event_tp(#{info => Info, state => St}), 135 | {noreply, St}. 136 | 137 | handle_cast(Cast, St) -> 138 | ?unexpected_event_tp(#{cast => Cast, state => St}), 139 | {noreply, St}. 140 | 141 | handle_call({complete, Server, Checkpoint}, From, St = #client{server = Server, parent = Parent, shard = Shard}) -> 142 | ?tp(info, shard_bootstrap_complete, #{}), 143 | Parent ! #bootstrap_complete{sender = self(), checkpoint = Checkpoint}, 144 | gen_server:reply(From, ok), 145 | mria_status:notify_replicant_bootstrap_complete(Shard), 146 | {stop, normal, St}; 147 | handle_call({batch, {Server, Table, Records}}, _From, St = #client{server = Server, shard = Shard}) -> 148 | handle_batch(Server, Table, Records), 149 | mria_status:notify_replicant_bootstrap_import(Shard), 150 | {reply, ok, St}; 151 | handle_call(Call, From, St) -> 152 | ?unexpected_event_tp(#{call => Call, from => From, state => St}), 153 | {reply, {error, {unknown_call, Call}}, St}. 154 | 155 | code_change(_OldVsn, St, _Extra) -> 156 | {ok, St}. 157 | 158 | terminate(_Reason, St = #server{iterator = I}) -> 159 | ?terminate_tp, 160 | I =/= undefined andalso iter_end(I), 161 | {ok, St}; 162 | terminate(_Reason, St = #client{}) -> 163 | {ok, St}. 164 | 165 | %%================================================================================ 166 | %% Internal functions 167 | %%================================================================================ 168 | 169 | -spec push_records(mria_lib:subscriber(), mria:table(), [tuple()] | ?clear_table) -> ok | {badrpc, _}. 170 | push_records(Subscriber, Table, Records) -> 171 | push_batch(Subscriber, {self(), Table, Records}). 172 | 173 | -spec push_batch(mria_lib:subscriber(), batch()) -> ok | {badrpc, _}. 174 | push_batch({Node, Pid}, Batch = {_, _, _}) -> 175 | mria_lib:rpc_call_nothrow(Node, ?MODULE, do_push_batch, [Pid, Batch]). 176 | 177 | -spec complete(mria_lib:subscriber(), pid(), mria_rlog_server:checkpoint()) -> ok. 178 | complete({Node, Pid}, Server, Checkpoint) -> 179 | mria_lib:rpc_call_nothrow(Node, ?MODULE, do_complete, [Pid, Server, Checkpoint]). 180 | 181 | handle_batch(_Server, Table, ?clear_table) -> 182 | mria_schema:ensure_local_table(Table), 183 | {atomic, ok} = mnesia:clear_table(Table), 184 | ok; 185 | handle_batch(_Server, Table, Records) -> 186 | lists:foreach(fun(I) -> mnesia:dirty_write(Table, I) end, Records). 187 | 188 | server_loop(St = #server{tables = [], subscriber = Subscriber, iterator = undefined}) -> 189 | %% All tables and chunks have been sent: 190 | _ = complete(Subscriber, self(), mria_lib:approx_checkpoint()), 191 | {stop, normal, St}; 192 | server_loop(St0 = #server{tables = [Table|Rest], subscriber = Subscriber, iterator = It0, shard = Shard}) -> 193 | {It, Records} = case It0 of 194 | undefined -> 195 | BatchSize = mria_config:shard_bootstrap_batch_size(Shard), 196 | ?tp(info, start_shard_table_bootstrap, 197 | #{ shard => Shard 198 | , table => Table 199 | }), 200 | iter_start(Subscriber, Table, BatchSize); 201 | #iter{} -> 202 | iter_next(It0) 203 | end, 204 | St = St0#server{iterator = It}, 205 | case Records of 206 | ?end_of_table -> 207 | iter_end(It), 208 | ?tp(info, complete_shard_table_bootstrap, 209 | #{ shard => Shard 210 | , table => Table 211 | }), 212 | noreply(St#server{tables = Rest, iterator = undefined}); 213 | _ -> 214 | case push_records(Subscriber, Table, Records) of 215 | ok -> 216 | noreply(St); 217 | {badrpc, Err} -> 218 | ?tp(debug, "Failed to push batch", 219 | #{ subscriber => Subscriber 220 | , reason => Err 221 | , shard => Shard 222 | }), 223 | {stop, normal, St} 224 | end 225 | end. 226 | 227 | noreply(State) -> 228 | self() ! loop, 229 | {noreply, State}. 230 | 231 | %% We could, naturally, use mnesia checkpoints here, but they do extra 232 | %% work accumulating all the ongoing transactions, so we avoid it. 233 | 234 | -spec iter_start(mria_lib:subscriber(), mria:table(), non_neg_integer()) -> {#iter{}, [tuple()] | ?end_of_table}. 235 | iter_start(Subscriber, Table, BatchSize) -> 236 | Storage = mnesia:table_info(Table, storage_type), 237 | %% Push an empty batch to the replica to make sure it created the 238 | %% local table before we start actual iteration and the receiving 239 | %% table is empty: 240 | push_records(Subscriber, Table, ?clear_table), 241 | %% Start iteration over records: 242 | mnesia_lib:db_fixtable(Storage, Table, true), 243 | Iter0 = #iter{ table = Table 244 | , storage = Storage 245 | }, 246 | case mnesia_lib:db_init_chunk(Storage, Table, BatchSize) of 247 | {Matches, Cont} -> 248 | {Iter0#iter{state = Cont}, Matches}; 249 | ?end_of_table -> 250 | {Iter0, ?end_of_table} 251 | end. 252 | 253 | -spec iter_next(#iter{}) -> {#iter{}, [tuple()] | ?end_of_table}. 254 | iter_next(Iter0 = #iter{storage = Storage, state = State}) -> 255 | case mnesia_lib:db_chunk(Storage, State) of 256 | {Matches, Cont} -> 257 | {Iter0#iter{state = Cont}, Matches}; 258 | ?end_of_table -> 259 | {Iter0#iter{state = undefined}, ?end_of_table} 260 | end. 261 | 262 | -spec iter_end(#iter{}) -> ok. 263 | iter_end(#iter{table = Table, storage = Storage}) -> 264 | mnesia_lib:db_fixtable(Storage, Table, false). 265 | -------------------------------------------------------------------------------- /src/mria_core_shard_sup.erl: -------------------------------------------------------------------------------- 1 | %%-------------------------------------------------------------------- 2 | %% Copyright (c) 2021-2025 EMQ Technologies Co., Ltd. All Rights Reserved. 3 | %% 4 | %% Licensed under the Apache License, Version 2.0 (the "License"); 5 | %% you may not use this file except in compliance with the License. 6 | %% You may obtain a copy of the License at 7 | %% 8 | %% http://www.apache.org/licenses/LICENSE-2.0 9 | %% 10 | %% Unless required by applicable law or agreed to in writing, software 11 | %% distributed under the License is distributed on an "AS IS" BASIS, 12 | %% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | %% See the License for the specific language governing permissions and 14 | %% limitations under the License. 15 | %%-------------------------------------------------------------------- 16 | 17 | %% Supervision tree for the shard. 18 | %% Runs on core nodes under `mria_shards_sup' 19 | -module(mria_core_shard_sup). 20 | 21 | -behaviour(supervisor). 22 | 23 | %% API: 24 | -export([ start_link/1 25 | , start_agent_sup/2 26 | , start_bootstrapper_sup/2 27 | , restart_agent_sup/1 28 | , restart_bootstrapper_sup/1 29 | 30 | , list_agents/0 31 | ]). 32 | 33 | %% supervisor callbacks & external exports: 34 | -export([init/1, start_link/2]). 35 | 36 | %%================================================================================ 37 | %% API funcions 38 | %%================================================================================ 39 | 40 | start_link(Shard) -> 41 | supervisor:start_link(?MODULE, [shard, Shard]). 42 | 43 | start_agent_sup(SupPid, Shard) -> 44 | start_sibling(SupPid, agent, Shard). 45 | 46 | start_bootstrapper_sup(SupPid, Shard) -> 47 | start_sibling(SupPid, bootstrapper, Shard). 48 | 49 | %% @doc Restart agent sup without modifying its child spec 50 | restart_agent_sup(SupPid) -> 51 | restart_sibling(SupPid, agent). 52 | 53 | %% @doc Restart bootstrapper sup without modifying its child spec 54 | restart_bootstrapper_sup(SupPid) -> 55 | restart_sibling(SupPid, bootstrapper). 56 | 57 | %% @doc Return the list of agents for all shards 58 | -spec list_agents() -> [{mria_rlog:shard(), [pid()]}]. 59 | list_agents() -> 60 | lists:map( 61 | fun({Shard, Pid, _, _}) -> 62 | [AgentSupPid] = [P || {agent, P, supervisor, _} <- supervisor:which_children(Pid)], 63 | Agents = [P || {_, P, _, _} <- supervisor:which_children(AgentSupPid)], 64 | {Shard, Agents} 65 | end, 66 | supervisor:which_children(mria_shards_sup)). 67 | 68 | %%================================================================================ 69 | %% Supervisor callbacks 70 | %%================================================================================ 71 | 72 | init([shard, Shard]) -> 73 | SupFlags = #{ strategy => one_for_all 74 | , intensity => 0 75 | , period => 1 76 | }, 77 | Children = [server(mria_rlog_server, Shard)], 78 | {ok, {SupFlags, Children}}; 79 | init([agent, Shard]) -> 80 | init_simple_sup(mria_rlog_agent, Shard); 81 | init([bootstrapper, Shard]) -> 82 | init_simple_sup(mria_bootstrapper, Shard). 83 | 84 | %%================================================================================ 85 | %% Internal exports 86 | %%================================================================================ 87 | 88 | start_link(Type, Shard) -> 89 | supervisor:start_link(?MODULE, [Type, Shard]). 90 | 91 | %%================================================================================ 92 | %% Internal functions 93 | %%================================================================================ 94 | 95 | server(Module, Shard) -> 96 | #{ id => Module 97 | , start => {Module, start_link, [self(), Shard]} 98 | , restart => permanent 99 | , shutdown => 1000 100 | , type => worker 101 | }. 102 | 103 | init_simple_sup(Module, Shard) -> 104 | SupFlags = #{ strategy => simple_one_for_one 105 | , intensity => 0 106 | , period => 1 107 | }, 108 | ChildSpec = #{ id => ignore 109 | , start => {Module, start_link, [Shard]} 110 | , restart => temporary 111 | , type => worker 112 | }, 113 | {ok, {SupFlags, [ChildSpec]}}. 114 | 115 | -spec start_sibling(pid(), agent | bootstrapper, mria_rlog:shard()) -> pid(). 116 | start_sibling(Sup, Id, Shard) -> 117 | {ok, Pid} = supervisor:start_child(Sup, simple_sup(Id, Shard)), 118 | Pid. 119 | 120 | -spec restart_sibling(pid(), agent | bootstrapper) -> pid(). 121 | restart_sibling(Sup, Id) -> 122 | ok = supervisor:terminate_child(Sup, Id), 123 | {ok, Pid} = supervisor:restart_child(Sup, Id), 124 | Pid. 125 | 126 | -spec simple_sup(agent | bootstrapper, mria_rlog:shard()) -> supervisor:child_spec(). 127 | simple_sup(Id, Shard) -> 128 | #{ id => Id 129 | , start => {?MODULE, start_link, [Id, Shard]} 130 | , restart => permanent 131 | , shutdown => infinity 132 | , type => supervisor 133 | }. 134 | -------------------------------------------------------------------------------- /src/mria_guid.erl: -------------------------------------------------------------------------------- 1 | %%-------------------------------------------------------------------- 2 | %% Copyright (c) 2019 EMQ Technologies Co., Ltd. All Rights Reserved. 3 | %% 4 | %% Licensed under the Apache License, Version 2.0 (the "License"); 5 | %% you may not use this file except in compliance with the License. 6 | %% You may obtain a copy of the License at 7 | %% 8 | %% http://www.apache.org/licenses/LICENSE-2.0 9 | %% 10 | %% Unless required by applicable law or agreed to in writing, software 11 | %% distributed under the License is distributed on an "AS IS" BASIS, 12 | %% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | %% See the License for the specific language governing permissions and 14 | %% limitations under the License. 15 | %%-------------------------------------------------------------------- 16 | 17 | %% @doc Generate global unique id. 18 | %% 19 | %% -------------------------------------------------------- 20 | %% | Timestamp | NodeID + PID | Sequence | 21 | %% |<------- 64bits ------->|<--- 48bits --->|<- 16bits ->| 22 | %% -------------------------------------------------------- 23 | %% 24 | %% 1. Timestamp: erlang:system_time if Erlang >= R18, otherwise os:timestamp 25 | %% 2. NodeId: encode node() to 2 bytes integer 26 | %% 3. Pid: encode pid to 4 bytes integer 27 | %% 4. Sequence: 2 bytes sequence in one process 28 | %% 29 | %% @end 30 | 31 | -module(mria_guid). 32 | 33 | -export([ gen/0 34 | , new/0 35 | , timestamp/1 36 | , to_hexstr/1 37 | , from_hexstr/1 38 | ]). 39 | 40 | -define(TAG_VERSION, 131). 41 | -define(PID_EXT, 103). 42 | -define(NEW_PID_EXT, 88). 43 | 44 | -define(MAX_SEQ, 16#FFFF). 45 | 46 | -type(guid() :: <<_:128>>). 47 | 48 | -export_type([guid/0]). 49 | 50 | %% @doc Generate a global unique id. 51 | -spec(gen() -> guid()). 52 | gen() -> 53 | Guid = case get(guid) of 54 | undefined -> new(); 55 | {_Ts, NPid, Seq} -> next(NPid, Seq) 56 | end, 57 | put(guid, Guid), bin(Guid). 58 | 59 | new() -> 60 | {ts(), npid(), 0}. 61 | 62 | -spec(timestamp(guid()) -> integer()). 63 | timestamp(<>) -> 64 | Ts. 65 | 66 | next(NPid, Seq) when Seq >= ?MAX_SEQ -> 67 | {ts(), NPid, 0}; 68 | next(NPid, Seq) -> 69 | {ts(), NPid, Seq + 1}. 70 | 71 | bin({Ts, NPid, Seq}) -> 72 | <>. 73 | 74 | ts() -> 75 | case erlang:function_exported(erlang, system_time, 1) of 76 | true -> %% R18 77 | erlang:system_time(micro_seconds); 78 | false -> 79 | {MegaSeconds, Seconds, MicroSeconds} = os:timestamp(), 80 | (MegaSeconds * 1000000 + Seconds) * 1000000 + MicroSeconds 81 | end. 82 | 83 | %% Copied from https://github.com/okeuday/uuid.git. 84 | npid() -> 85 | <> = 89 | crypto:hash(sha, erlang:list_to_binary(erlang:atom_to_list(node()))), 90 | 91 | PidBin = 92 | case erlang:term_to_binary(self()) of 93 | <> -> 94 | binary:part(B, erlang:byte_size(B), -9); 95 | % format supported in Erlang/OTP 19.0-rc1 96 | % required for Erlang/OTP 23.0 (and Erlang/OTP 22.0-rc2) 97 | <> -> 98 | binary:part(B, erlang:byte_size(B), -12) 99 | end, 100 | 101 | % 72/86 bits for the Erlang pid 102 | <> = PidBin, 106 | 107 | PidCR1 = case PidCreation of 108 | <> -> 109 | D1; 110 | <> -> 111 | D1 bxor D2 bxor D3 bxor D4 112 | end, 113 | 114 | % reduce the 160 bit NodeData checksum to 16 bits 115 | NodeByte1 = ((((((((NodeD01 bxor NodeD02) 116 | bxor NodeD03) 117 | bxor NodeD04) 118 | bxor NodeD05) 119 | bxor NodeD06) 120 | bxor NodeD07) 121 | bxor NodeD08) 122 | bxor NodeD09) 123 | bxor NodeD10, 124 | NodeByte2 = (((((((((NodeD11 bxor NodeD12) 125 | bxor NodeD13) 126 | bxor NodeD14) 127 | bxor NodeD15) 128 | bxor NodeD16) 129 | bxor NodeD17) 130 | bxor NodeD18) 131 | bxor NodeD19) 132 | bxor NodeD20) 133 | bxor PidCR1, 134 | 135 | % reduce the Erlang pid to 32 bits 136 | PidByte1 = PidID1 bxor PidSR4, 137 | PidByte2 = PidID2 bxor PidSR3, 138 | PidByte3 = PidID3 bxor PidSR2, 139 | PidByte4 = PidID4 bxor PidSR1, 140 | 141 | <> = <>, 144 | NPid. 145 | 146 | to_hexstr(<>) -> 147 | list_to_binary(integer_to_list(I, 16)). 148 | 149 | from_hexstr(S) -> 150 | I = list_to_integer(binary_to_list(S), 16), <>. 151 | -------------------------------------------------------------------------------- /src/mria_membership_sup.erl: -------------------------------------------------------------------------------- 1 | %%-------------------------------------------------------------------- 2 | %% Copyright (c) 2023 EMQ Technologies Co., Ltd. All Rights Reserved. 3 | %% 4 | %% Licensed under the Apache License, Version 2.0 (the "License"); 5 | %% you may not use this file except in compliance with the License. 6 | %% You may obtain a copy of the License at 7 | %% 8 | %% http://www.apache.org/licenses/LICENSE-2.0 9 | %% 10 | %% Unless required by applicable law or agreed to in writing, software 11 | %% distributed under the License is distributed on an "AS IS" BASIS, 12 | %% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | %% See the License for the specific language governing permissions and 14 | %% limitations under the License. 15 | %%-------------------------------------------------------------------- 16 | -module(mria_membership_sup). 17 | 18 | 19 | -behavior(supervisor). 20 | 21 | %% API: 22 | -export([start_link/0]). 23 | 24 | %% behavior callbacks: 25 | -export([init/1]). 26 | 27 | %% internal exports: 28 | -export([]). 29 | 30 | -export_type([]). 31 | 32 | %%================================================================================ 33 | %% Type declarations 34 | %%================================================================================ 35 | 36 | %%================================================================================ 37 | %% API funcions 38 | %%================================================================================ 39 | 40 | -define(SUP, ?MODULE). 41 | 42 | -spec start_link() -> supervisor:startlink_ret(). 43 | start_link() -> 44 | supervisor:start_link({local, ?SUP}, ?MODULE, []). 45 | 46 | 47 | %%================================================================================ 48 | %% behavior callbacks 49 | %%================================================================================ 50 | 51 | init([]) -> 52 | Children = [ worker(mria_membership) 53 | , worker(mria_node_monitor) 54 | ], 55 | SupFlags = #{ strategy => rest_for_one 56 | , intensity => 10 57 | , period => 10 58 | , auto_shutdown => never 59 | }, 60 | {ok, {SupFlags, Children}}. 61 | 62 | -spec worker(module()) -> supervisor:child_spec(). 63 | worker(Mod) -> 64 | #{ id => Mod 65 | , start => {Mod, start_link, []} 66 | , shutdown => 5_000 67 | , restart => permanent 68 | , type => worker 69 | , significant => false 70 | }. 71 | 72 | %%================================================================================ 73 | %% Internal exports 74 | %%================================================================================ 75 | 76 | %%================================================================================ 77 | %% Internal functions 78 | %%================================================================================ 79 | -------------------------------------------------------------------------------- /src/mria_mnesia_null_storage.erl: -------------------------------------------------------------------------------- 1 | %%-------------------------------------------------------------------- 2 | %% Copyright (c) 2021 EMQ Technologies Co., Ltd. All Rights Reserved. 3 | %% 4 | %% Licensed under the Apache License, Version 2.0 (the "License"); 5 | %% you may not use this file except in compliance with the License. 6 | %% You may obtain a copy of the License at 7 | %% 8 | %% http://www.apache.org/licenses/LICENSE-2.0 9 | %% 10 | %% Unless required by applicable law or agreed to in writing, software 11 | %% distributed under the License is distributed on an "AS IS" BASIS, 12 | %% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | %% See the License for the specific language governing permissions and 14 | %% limitations under the License. 15 | %%-------------------------------------------------------------------- 16 | 17 | %% @doc This module implements a "/dev/null" storage for mnesia: it 18 | %% discards any data that is written into it. This is useful for 19 | %% emitting events in a transactional manner without worriying about 20 | %% cleanup. 21 | %% 22 | %% Mria uses it to "store" transaction logs. 23 | -module(mria_mnesia_null_storage). 24 | 25 | -include_lib("snabbkaffe/include/trace.hrl"). 26 | 27 | -export([register/0, register/1]). 28 | 29 | -export([insert/3, 30 | delete/3, 31 | add_aliases/1, 32 | check_definition/4, 33 | close_table/2, 34 | create_table/3, 35 | delete_table/2, 36 | first/2, 37 | fixtable/3, 38 | last/2, 39 | index_is_consistent/3, 40 | init_backend/0, 41 | info/3, 42 | lookup/3, 43 | is_index_consistent/2, 44 | load_table/4, 45 | match_delete/3, 46 | next/3, 47 | prev/3, 48 | receiver_first_message/4, 49 | receive_data/5, 50 | receive_done/4, 51 | real_suffixes/0, 52 | remove_aliases/1, 53 | repair_continuation/2, 54 | select/1, 55 | select/3, 56 | select/4, 57 | sender_init/4, 58 | semantics/2, 59 | slot/3, 60 | sync_close_table/2, 61 | tmp_suffixes/0, 62 | update_counter/4, 63 | validate_key/6, 64 | validate_record/6 65 | ]). 66 | 67 | %%================================================================================ 68 | %% API 69 | %%================================================================================ 70 | 71 | register() -> 72 | register(null_copies). 73 | 74 | register(Alias) -> 75 | Module = ?MODULE, 76 | case mnesia:add_backend_type(Alias, Module) of 77 | {atomic, ok} -> 78 | {ok, Alias}; 79 | {aborted, {backend_type_already_exists, _}} -> 80 | {ok, Alias}; 81 | {aborted, Reason} -> 82 | {error, Reason} 83 | end. 84 | 85 | %%================================================================================ 86 | %% Mnesia storage callbacks 87 | %%================================================================================ 88 | 89 | insert(_Alias, _Tab, _Val) -> 90 | ?tp(mria_rlog_insert_val, 91 | #{ tab => _Tab 92 | , value => _Val 93 | , alias => _Alias 94 | }), 95 | ok. 96 | 97 | delete(_Alias, _Tab, _Key) -> 98 | ok. 99 | 100 | add_aliases(_) -> 101 | ok. 102 | 103 | remove_aliases(_) -> 104 | ok. 105 | 106 | check_definition(_Alias, _Tab, _Nodes, _Properties) -> 107 | ok. 108 | 109 | close_table(_Alias, _Tab) -> 110 | ok. 111 | 112 | create_table(_Alias, _Tab, _Properties) -> 113 | ok. 114 | 115 | delete_table(_Alias, _Tab) -> 116 | ok. 117 | 118 | first(_Alias, _Tab) -> 119 | '$end_of_table'. 120 | 121 | fixtable(_Alias, _Tab, _Bool) -> 122 | ok. 123 | 124 | last(_Alias, _Tab) -> 125 | '$end_of_table'. 126 | 127 | index_is_consistent(_Alias, _IxTag, _Bool) -> 128 | ok. 129 | 130 | init_backend() -> 131 | ok. 132 | 133 | info(_Alias, _Tab, memory) -> 134 | 0; 135 | info(_Alias, _Tab, size) -> 136 | 0; 137 | info(_Alias, _Info, _Item) -> 138 | nobody_here_but_us_chicken. 139 | 140 | lookup(_Alias, _Tab, _Key) -> 141 | []. 142 | 143 | is_index_consistent(_Alias, _IxTag) -> 144 | true. 145 | 146 | load_table(_Alias, _Tab, _Reason, _CsList) -> 147 | ok. 148 | 149 | match_delete(_Alias, _Tab, _Pattern) -> 150 | ok. 151 | 152 | next(_Alias, _Tab, _Key) -> 153 | '$end_of_table'. 154 | 155 | prev(_Alias, _Tab, _Key) -> 156 | '$end_of_table'. 157 | 158 | real_suffixes() -> 159 | []. 160 | 161 | repair_continuation(Continuation, _MatchSpec) -> 162 | Continuation. 163 | 164 | select(_Continuation) -> 165 | '$end_of_table'. 166 | 167 | select(_Alias, _Tab, _Pattern) -> 168 | '$end_of_table'. 169 | 170 | select(_Alias, _Tab, _Pattern, _Limit) -> 171 | '$end_of_table'. 172 | 173 | 174 | semantics(_Alias, storage) -> ram_copies; 175 | semantics(_Alias, types ) -> [set, ordered_set, bag]; 176 | semantics(_Alias, index_types) -> []; 177 | semantics(_Alias, _) -> undefined. 178 | 179 | slot(_Alias, _Tab, _Pos) -> 180 | '$end_of_table'. 181 | 182 | sync_close_table(_Alias, _Tab) -> 183 | ok. 184 | 185 | tmp_suffixes() -> 186 | []. 187 | 188 | update_counter(_Alias, _Tab, _Counter, _Val) -> 189 | error(not_implemented). 190 | 191 | validate_key(_Alias, _Tab, RecName, Arity, Type, _Key) -> 192 | {RecName, Arity, Type}. 193 | 194 | validate_record(_Alias, _Tab, RecName, Arity, Type, _Obj) -> 195 | {RecName, Arity, Type}. 196 | 197 | %% Table sync protocol 198 | sender_init(_Alias, _Tab, _LoadReason, _Pid) -> 199 | {standard, fun() -> '$end_of_table' end, fun eot/1}. 200 | 201 | receiver_first_message(_Pid, {first, Size} = _Msg, _Alias, _Tab) -> 202 | {Size, _State = []}. 203 | 204 | receive_data(_Data, _Alias, _Tab, _Sender, State) -> 205 | {more, State}. 206 | 207 | receive_done(_Alias, _Tab, _Sender, _State) -> 208 | ok. 209 | 210 | %%================================================================================ 211 | %% Internal functions 212 | %%================================================================================ 213 | 214 | eot(_) -> 215 | '$end_of_table'. 216 | -------------------------------------------------------------------------------- /src/mria_node.erl: -------------------------------------------------------------------------------- 1 | %%-------------------------------------------------------------------- 2 | %% Copyright (c) 2019 EMQ Technologies Co., Ltd. All Rights Reserved. 3 | %% 4 | %% Licensed under the Apache License, Version 2.0 (the "License"); 5 | %% you may not use this file except in compliance with the License. 6 | %% You may obtain a copy of the License at 7 | %% 8 | %% http://www.apache.org/licenses/LICENSE-2.0 9 | %% 10 | %% Unless required by applicable law or agreed to in writing, software 11 | %% distributed under the License is distributed on an "AS IS" BASIS, 12 | %% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | %% See the License for the specific language governing permissions and 14 | %% limitations under the License. 15 | %%-------------------------------------------------------------------- 16 | 17 | -module(mria_node). 18 | 19 | %% Node API 20 | -export([ is_aliving/1 21 | , is_running/1 22 | , is_running/3 23 | ]). 24 | 25 | %% @doc Is the node aliving? 26 | -spec(is_aliving(node()) -> boolean()). 27 | is_aliving(Node) when Node =:= node() -> 28 | true; 29 | is_aliving(Node) -> 30 | lists:member(Node, nodes()) orelse net_adm:ping(Node) =:= pong. 31 | 32 | %% @doc Is the application running? 33 | -spec is_running(node()) -> boolean(). 34 | is_running(Node) -> 35 | is_running(Node, mria_sup, is_running). 36 | 37 | %% @doc Is the application running? 38 | %% M:F/0 must return boolean() 39 | -spec is_running(node(), module(), atom()) -> boolean(). 40 | is_running(Node, M, F) -> 41 | case rpc:call(Node, M, F, []) of 42 | {badrpc, _} -> false; 43 | Result -> Result 44 | end. 45 | -------------------------------------------------------------------------------- /src/mria_node_monitor.erl: -------------------------------------------------------------------------------- 1 | %%-------------------------------------------------------------------- 2 | %% Copyright (c) 2019-2023 EMQ Technologies Co., Ltd. All Rights Reserved. 3 | %% 4 | %% Licensed under the Apache License, Version 2.0 (the "License"); 5 | %% you may not use this file except in compliance with the License. 6 | %% You may obtain a copy of the License at 7 | %% 8 | %% http://www.apache.org/licenses/LICENSE-2.0 9 | %% 10 | %% Unless required by applicable law or agreed to in writing, software 11 | %% distributed under the License is distributed on an "AS IS" BASIS, 12 | %% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | %% See the License for the specific language governing permissions and 14 | %% limitations under the License. 15 | %%-------------------------------------------------------------------- 16 | 17 | -module(mria_node_monitor). 18 | 19 | -behaviour(gen_server). 20 | 21 | -include("mria.hrl"). 22 | -include("mria_rlog.hrl"). 23 | -include_lib("snabbkaffe/include/trace.hrl"). 24 | 25 | %% API 26 | -export([start_link/0, stop/0]). 27 | 28 | -export([partitions/0]). 29 | 30 | %% Internal Exports 31 | -export([cast/2, run_after/2]). 32 | 33 | %% gen_server Callbacks 34 | -export([ init/1 35 | , handle_call/3 36 | , handle_cast/2 37 | , handle_info/2 38 | , terminate/2 39 | , code_change/3 40 | ]). 41 | 42 | -record(state, { 43 | partitions :: list(node()), 44 | heartbeat :: undefined | reference(), 45 | autoheal :: mria_autoheal:autoheal(), 46 | autoclean :: mria_autoclean:autoclean() 47 | }). 48 | 49 | -define(SERVER, ?MODULE). 50 | 51 | %% @doc Start the node monitor. 52 | -spec(start_link() -> {ok, pid()} | {error, term()}). 53 | start_link() -> 54 | gen_server:start_link({local, ?SERVER}, ?MODULE, [], []). 55 | 56 | stop() -> gen_server:stop(?SERVER). 57 | 58 | %% @doc Get partitions. 59 | partitions() -> 60 | gen_server:call(?SERVER, partitions). 61 | 62 | %% @private 63 | cast(Node, Msg) -> 64 | gen_server:cast({?SERVER, Node}, Msg). 65 | 66 | %% @private 67 | run_after(Delay, Msg) -> 68 | erlang:send_after(Delay, ?SERVER, Msg). 69 | 70 | %%-------------------------------------------------------------------- 71 | %% gen_server Callbacks 72 | %%-------------------------------------------------------------------- 73 | 74 | init([]) -> 75 | process_flag(trap_exit, true), 76 | logger:update_process_metadata(#{domain => [mria, node_monitor]}), 77 | rand:seed(exsplus, erlang:timestamp()), 78 | net_kernel:monitor_nodes(true, [{node_type, visible}, nodedown_reason]), 79 | {ok, _} = mnesia:subscribe(system), 80 | lists:foreach(fun(N) -> self() ! {nodeup, N, []} end, nodes() -- [node()]), 81 | State = #state{partitions = [], 82 | autoheal = mria_autoheal:init(), 83 | autoclean = mria_autoclean:init() 84 | }, 85 | {ok, ensure_heartbeat(State)}. 86 | 87 | handle_call(partitions, _From, State = #state{partitions = Partitions}) -> 88 | {reply, Partitions, State}; 89 | 90 | handle_call(Req, _From, State) -> 91 | logger:warning("Unexpected call: ~p", [Req]), 92 | {reply, ignore, State}. 93 | 94 | handle_cast({heartbeat, _FromNode}, State) -> 95 | {noreply, State}; 96 | 97 | handle_cast({suspect, FromNode, TargetNode}, State) -> 98 | ?tp(info, mria_monitor_suspect, 99 | #{ from_node => FromNode 100 | , target_node => TargetNode 101 | }), 102 | spawn(fun() -> 103 | Status = case net_adm:ping(TargetNode) of 104 | pong -> up; 105 | pang -> down 106 | end, 107 | cast(FromNode, {confirm, TargetNode, Status}) 108 | end), 109 | {noreply, State}; 110 | 111 | handle_cast({confirm, TargetNode, Status}, State) -> 112 | ?tp(info, mria_node_monitor_confirm, 113 | #{ target_node => TargetNode 114 | , status => Status 115 | }), 116 | {noreply, State}; 117 | 118 | handle_cast(Msg = {report_partition, _Node}, State) -> 119 | {noreply, autoheal_handle_msg(Msg, State)}; 120 | 121 | handle_cast(Msg = {heal_partition, _SplitView}, State) -> 122 | {noreply, autoheal_handle_msg(Msg, State)}; 123 | 124 | handle_cast(Msg, State) -> 125 | logger:warning("Unexpected cast: ~p", [Msg]), 126 | {noreply, State}. 127 | 128 | handle_info({nodeup, Node, _Info}, State) -> 129 | mria_membership:node_up(Node), 130 | {noreply, State}; 131 | 132 | handle_info({nodedown, Node, _Info}, State) -> 133 | mria_membership:node_down(Node), 134 | run_after(3000, {suspect, Node}), 135 | {noreply, State}; 136 | 137 | handle_info({suspect, Node}, State) -> 138 | case mria_mnesia:running_nodes() -- [node(), Node] of 139 | [ProxyNode|_] -> 140 | cast(ProxyNode, {suspect, node(), Node}); 141 | [] -> ignore 142 | end, 143 | {noreply, State}; 144 | 145 | handle_info({mnesia_system_event, {mnesia_up, Node}}, 146 | State = #state{partitions = Partitions}) -> 147 | mria_membership:mnesia_up(Node), 148 | case lists:member(Node, Partitions) of 149 | false -> ok; 150 | true -> mria_membership:partition_healed(Node) 151 | end, 152 | {noreply, State#state{partitions = lists:delete(Node, Partitions)}}; 153 | 154 | handle_info({mnesia_system_event, {mnesia_down, Node}}, State) -> 155 | mria_membership:mnesia_down(Node), 156 | {noreply, State}; 157 | 158 | handle_info({mnesia_system_event, {inconsistent_database, Context, Node}}, 159 | State = #state{partitions = Partitions}) -> 160 | ?tp(critical, "Core cluster partition", 161 | #{ from => Node 162 | , context => Context 163 | }), 164 | mria_membership:partition_occurred(Node), 165 | case mria_autoheal:enabled() of 166 | {true, _} -> run_after(3000, confirm_partition); 167 | false -> ignore 168 | end, 169 | {noreply, State#state{partitions = lists:usort([Node | Partitions])}}; 170 | 171 | handle_info({mnesia_system_event, {mnesia_overload, Details}}, State) -> 172 | logger:warning("Mnesia overload: ~p", [Details]), 173 | {noreply, State}; 174 | 175 | handle_info({mnesia_system_event, Event}, State) -> 176 | logger:info("Mnesia system event: ~p", [Event]), 177 | {noreply, State}; 178 | 179 | %% Confirm if we should report the partitions 180 | handle_info(confirm_partition, State = #state{partitions = []}) -> 181 | {noreply, State}; 182 | 183 | handle_info(confirm_partition, State = #state{partitions = Partitions}) -> 184 | Leader = mria_membership:leader(), 185 | case mria_node:is_running(Leader) of 186 | true -> 187 | cast(Leader, {report_partition, node()}); 188 | false -> 189 | logger:critical("Leader is down, cannot autoheal the partitions: ~p", [Partitions]) 190 | end, 191 | {noreply, State}; 192 | 193 | handle_info({autoheal, Msg}, State) -> 194 | {noreply, autoheal_handle_msg(Msg, State)}; 195 | 196 | handle_info(heartbeat, State) -> 197 | lists:foreach(fun(Node) -> 198 | if Node =/= node() -> cast(Node, {heartbeat, node()}); 199 | true -> ok 200 | end 201 | end, mria_mnesia:cluster_nodes(all)), 202 | {noreply, ensure_heartbeat(State#state{heartbeat = undefined})}; 203 | 204 | handle_info(Msg = {'EXIT', Pid, _Reason}, State = #state{autoheal = Autoheal}) -> 205 | case mria_autoheal:proc(Autoheal) of 206 | Pid -> {noreply, autoheal_handle_msg(Msg, State)}; 207 | _ -> {noreply, State} 208 | end; 209 | 210 | %% Autoclean Event. 211 | handle_info(autoclean, State = #state{autoclean = AutoClean}) -> 212 | {noreply, State#state{autoclean = mria_autoclean:check(AutoClean)}}; 213 | 214 | handle_info(Info, State) -> 215 | logger:error("Unexpected info: ~p", [Info]), 216 | {noreply, State}. 217 | 218 | terminate(_Reason, _State) -> 219 | ?terminate_tp, 220 | ok. 221 | 222 | code_change(_OldVsn, State, _Extra) -> 223 | {ok, State}. 224 | 225 | %%-------------------------------------------------------------------- 226 | %% Internal functions 227 | %%-------------------------------------------------------------------- 228 | 229 | %% TODO: This function triggers a bug in dialyzer, where it forgets about some record fields. 230 | -dialyzer({nowarn_function, [ensure_heartbeat/1]}). 231 | ensure_heartbeat(State = #state{heartbeat = undefined}) -> 232 | Interval = rand:uniform(2000) + 2000, 233 | State#state{heartbeat = run_after(Interval, heartbeat)}; 234 | 235 | ensure_heartbeat(State) -> 236 | State. 237 | 238 | autoheal_handle_msg(Msg, State = #state{autoheal = Autoheal}) -> 239 | State#state{autoheal = mria_autoheal:handle_msg(Msg, Autoheal)}. 240 | -------------------------------------------------------------------------------- /src/mria_rebalance.erl: -------------------------------------------------------------------------------- 1 | %%-------------------------------------------------------------------- 2 | %% Copyright (c) 2025 EMQ Technologies Co., Ltd. All Rights Reserved. 3 | %% 4 | %% Licensed under the Apache License, Version 2.0 (the "License"); 5 | %% you may not use this file except in compliance with the License. 6 | %% You may obtain a copy of the License at 7 | %% 8 | %% http://www.apache.org/licenses/LICENSE-2.0 9 | %% 10 | %% Unless required by applicable law or agreed to in writing, software 11 | %% distributed under the License is distributed on an "AS IS" BASIS, 12 | %% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | %% See the License for the specific language governing permissions and 14 | %% limitations under the License. 15 | %%-------------------------------------------------------------------- 16 | 17 | %% @doc This module implements a semi-manual procedure for rebalancing 18 | %% replicants among core nodes. 19 | %% 20 | %% Since bootstrapping of replicants can be relatively expensive, 21 | %% rebalance must be triggered manually. But the rest of the procedure 22 | %% is automatic. 23 | %% 24 | %% How to use it: 25 | %% 26 | %% 1. mria_rebalance:start(). -- plan the rebalance. Should be 27 | %% executed on a core node. 28 | %% 29 | %% 2. mria_rebalance:status(). -- get information about the rebalance. 30 | %% 31 | %% 3. mria_rebalance:confirm(). -- start executing the plan. 32 | %% 33 | %% 4. mria_rebalance:abort(). -- abort the rebalance. 34 | -module(mria_rebalance). 35 | 36 | %% API: 37 | -export([start/0, abort/0, confirm/0, status/0]). 38 | 39 | %% gen_statem callbacks: 40 | -export([init/1, callback_mode/0, handle_event/4]). 41 | 42 | %% Internal exports: 43 | -export([list_agents/0, kick/2, collect/0, plan/1]). 44 | 45 | -export_type([input/0, plan/0]). 46 | 47 | -include_lib("snabbkaffe/include/trace.hrl"). 48 | -include("mria_rlog.hrl"). 49 | 50 | -ifdef(TEST). 51 | -include_lib("eunit/include/eunit.hrl"). 52 | -endif. 53 | 54 | %%================================================================================ 55 | %% Type declarations 56 | %%================================================================================ 57 | 58 | -type input() :: #{mria_rlog:shard() => 59 | [{_Core :: node(), _Agents :: [pid()]}]}. 60 | 61 | -record(kick, {shard :: mria_rlog:shard(), core :: node(), agents :: [pid()]}). 62 | 63 | -type plan() :: [#kick{}]. 64 | 65 | -ifndef(TEST). 66 | -define(second, 1000). 67 | -else. 68 | -define(second, 100). 69 | -endif. 70 | 71 | -define(n, {global, ?MODULE}). 72 | 73 | -record(d, {plan = [] :: plan()}). 74 | 75 | -define(wait_confirmation, wait_confirmation). 76 | -define(idle_timeout, idle_timeout). 77 | -define(running, running). 78 | -define(exec_timeout, exec_timeout). 79 | -define(complete, complete). 80 | 81 | -define(execute, execute). 82 | -define(get_status, get_status). 83 | 84 | %%================================================================================ 85 | %% API functions 86 | %%================================================================================ 87 | 88 | abort() -> 89 | try gen_statem:stop(?n) 90 | catch 91 | exit:noproc -> not_started 92 | end. 93 | 94 | start() -> 95 | _ = abort(), 96 | gen_statem:start(?n, ?MODULE, [], []). 97 | 98 | confirm() -> 99 | gen_statem:call(?n, ?execute). 100 | 101 | status() -> 102 | try gen_statem:call(?n, ?get_status) 103 | catch 104 | exit:{noproc, _} -> not_started 105 | end. 106 | 107 | %%================================================================================ 108 | %% Behaviour callbacks 109 | %%================================================================================ 110 | 111 | callback_mode() -> [handle_event_function, state_enter]. 112 | 113 | init(_) -> 114 | Plan = plan(collect()), 115 | D = #d{plan = Plan}, 116 | case Plan of 117 | [] -> 118 | {ok, ?complete, D}; 119 | _ -> 120 | {ok, ?wait_confirmation, D} 121 | end. 122 | 123 | %% Wait confirmation state: 124 | handle_event(enter, _, ?wait_confirmation, _D) -> 125 | %% Shut down automatically if plan is not confirmed in 60 seconds: 126 | Timeout = 60 * ?second, 127 | {keep_state_and_data, [{state_timeout, Timeout, ?idle_timeout}]}; 128 | handle_event({call, From}, ?execute, ?wait_confirmation, D) -> 129 | Reply = {reply, From, ok}, 130 | {next_state, ?running, D, [Reply]}; 131 | %% Running state: 132 | handle_event(enter, _, ?running, _D) -> 133 | {keep_state_and_data, [{state_timeout, 0, ?exec_timeout}]}; 134 | handle_event(state_timeout, ?exec_timeout, ?running, D = #d{plan = P0}) -> 135 | case pop_task(P0) of 136 | {{Shard, Core, Agent}, P} -> 137 | erpc:call(Core, ?MODULE, kick, [Shard, Agent]), 138 | %% TODO: Make it configurable? 139 | Timeout = 5 * ?second, 140 | {keep_state, D#d{plan = P}, [{state_timeout, Timeout, ?exec_timeout}]}; 141 | undefined -> 142 | {next_state, ?complete, D#d{plan = []}} 143 | end; 144 | %% Complete state: 145 | handle_event(enter, _, ?complete, _D) -> 146 | Timeout = 60 * ?second, 147 | {keep_state_and_data, [{state_timeout, Timeout, ?idle_timeout}]}; 148 | %% Common: 149 | handle_event({call, From}, ?get_status, State, D) -> 150 | Reply = {reply, From, {State, D#d.plan}}, 151 | {keep_state_and_data, [Reply]}; 152 | handle_event(state_timeout, ?idle_timeout, _, _D) -> 153 | {stop, normal}; 154 | handle_event(EventType, Event, State, Data) -> 155 | ?unexpected_event_tp(#{ event_type => EventType 156 | , event => Event 157 | , state => State 158 | , data => Data 159 | }), 160 | keep_state_and_data. 161 | 162 | %%================================================================================ 163 | %% Internal exports 164 | %%================================================================================ 165 | 166 | %% @doc Given the current status of the core cluster, derive the 167 | %% rebalance plan: 168 | -spec plan(input()) -> plan(). 169 | plan(Status) -> 170 | L = maps:fold( 171 | fun(Shard, Input, Acc) -> 172 | plan(Shard, Input) ++ Acc 173 | end, 174 | [], 175 | Status), 176 | %% Prioritize the most unbalanced nodes/shards: 177 | lists:sort(fun(A, B) -> 178 | length(A#kick.agents) =< length(B#kick.agents) 179 | end, 180 | L). 181 | 182 | %% @doc Collect information about agents from the core nodes. Export 183 | %% for debugging/testing. 184 | -spec collect() -> input(). 185 | collect() -> 186 | core = mria_rlog:role(), 187 | Cores = mria_mnesia:db_nodes(), 188 | Return = erpc:multicall(Cores, ?MODULE, list_agents, []), 189 | L = [{Shard, Node, Agents} || 190 | {Node, {ok, L}} <- lists:zip(Cores, Return), 191 | {Shard, Agents} <- L], 192 | maps:groups_from_list( 193 | fun({Shard, _, _}) -> 194 | Shard 195 | end, 196 | fun({_, Node, Agents}) -> 197 | {Node, Agents} 198 | end, 199 | L). 200 | 201 | %% RPC target: kick the replicant from the given core node by stopping 202 | %% the agent process. Replicant will automatically reconnect to the 203 | %% core node that is currently the least loaded, hence approaching the 204 | %% balance. 205 | -spec kick(mria_rlog:shard(), pid()) -> ok. 206 | kick(Shard, AgentPid) -> 207 | ?tp(notice, "Kicking agent due to rebalance", #{agent => AgentPid, shard => Shard}), 208 | mria_rlog_agent:stop(AgentPid). 209 | 210 | %% RPC target: 211 | list_agents() -> 212 | mria_core_shard_sup:list_agents(). 213 | 214 | %%================================================================================ 215 | %% Internal functions 216 | %%================================================================================ 217 | 218 | pop_task([]) -> 219 | undefined; 220 | pop_task([#kick{agents = []} | Rest]) -> 221 | pop_task(Rest); 222 | pop_task([K = #kick{shard = Shard, core = Core, agents = [A | AL]} | Rest]) -> 223 | {{Shard, Core, A}, [K#kick{agents = AL} | Rest]}. 224 | 225 | -spec plan(mria_rlog:shard(), [{node(), [pid()]}]) -> [#kick{}]. 226 | plan(Shard, L) -> 227 | NAgents = lists:foldl( 228 | fun({_Node, Agents}, Acc) -> 229 | Acc + length(Agents) 230 | end, 231 | 0, 232 | L), 233 | NNodes = length(L), 234 | Avg = ceil(NAgents / NNodes), 235 | lists:filtermap( 236 | fun({Node, Agents}) when length(Agents) > Avg -> 237 | {_, Excess} = lists:split(Avg, Agents), 238 | {true, #kick{shard = Shard, core = Node, agents = Excess}}; 239 | (_) -> 240 | false 241 | end, 242 | L). 243 | 244 | %%================================================================================ 245 | %% Tests 246 | %%================================================================================ 247 | 248 | -ifdef(TEST). 249 | 250 | plan0_test() -> 251 | ?assertMatch( 252 | [], 253 | plan(#{})). 254 | 255 | %% No rebalance is needed when there is only one core node: 256 | plan_single_node_test() -> 257 | ?assertMatch( 258 | [], 259 | plan(#{foo => [{n1, [1, 2, 3]}], 260 | bar => [{n1, [1, 2, 3]}] 261 | })). 262 | 263 | %% No further rebalance is needed: 264 | plan_balanced1_test() -> 265 | ?assertMatch( 266 | [], 267 | plan(#{foo => [ {n1, [1, 2]} 268 | , {n2, [3]} 269 | , {n3, [4]} 270 | ]})). 271 | 272 | plan_balanced2_test() -> 273 | ?assertMatch( 274 | [], 275 | plan(#{foo => [ {n1, [1, 2]} 276 | , {n2, [3, 4]} 277 | , {n3, [5]} 278 | ]})). 279 | 280 | plan_balanced3_test() -> 281 | ?assertMatch( 282 | [], 283 | plan(#{foo => [ {n1, [1, 2]} 284 | , {n2, [3]} 285 | , {n3, [4]} 286 | , {n4, [5, 6]} 287 | ]})). 288 | 289 | %% Rebalance is needed: 290 | plan_unbalanced1_test() -> 291 | ?assertMatch( 292 | [#kick{shard = foo, core = n1, agents = [2]}], 293 | plan(#{foo => [ {n1, [1, 2]} 294 | , {n2, []} 295 | ]})). 296 | 297 | plan_unbalanced2_test() -> 298 | ?assertMatch( 299 | [#kick{shard = foo, core = n1, agents = [3]}], 300 | plan(#{foo => [ {n1, [1, 2, 3]} 301 | , {n2, [4]} 302 | , {n3, []} 303 | ]})). 304 | 305 | plan_unbalanced3_test() -> 306 | ?assertMatch( 307 | [#kick{shard = foo, core = n1, agents = [2, 3]}], 308 | plan(#{foo => [ {n1, [1, 2, 3]} 309 | , {n2, [4]} 310 | , {n3, []} 311 | , {n4, []} 312 | ]})). 313 | 314 | plan_unbalanced4_test() -> 315 | ?assertMatch( 316 | [ #kick{shard = foo, core = n1, agents = [3]} 317 | , #kick{shard = foo, core = n2, agents = [6]} 318 | ], 319 | plan(#{foo => [ {n1, [1, 2, 3]} 320 | , {n2, [4, 5, 6]} 321 | , {n3, []} 322 | ]})). 323 | 324 | -endif. % TEST 325 | -------------------------------------------------------------------------------- /src/mria_replica_importer_worker.erl: -------------------------------------------------------------------------------- 1 | %%-------------------------------------------------------------------- 2 | %% Copyright (c) 2021-2023 EMQ Technologies Co., Ltd. All Rights Reserved. 3 | %% 4 | %% Licensed under the Apache License, Version 2.0 (the "License"); 5 | %% you may not use this file except in compliance with the License. 6 | %% You may obtain a copy of the License at 7 | %% 8 | %% http://www.apache.org/licenses/LICENSE-2.0 9 | %% 10 | %% Unless required by applicable law or agreed to in writing, software 11 | %% distributed under the License is distributed on an "AS IS" BASIS, 12 | %% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | %% See the License for the specific language governing permissions and 14 | %% limitations under the License. 15 | %%-------------------------------------------------------------------- 16 | 17 | %% @doc This process runs on the replicant nodes and it imports 18 | %% transactions to the local replica. 19 | %% 20 | %% The reason it's done in a separate process is because 21 | %% `mria_rlog_replica' process can potentially have a long message 22 | %% queue, and that kills performance of mnesia transaction, which 23 | %% needs to scan the message queue. 24 | -module(mria_replica_importer_worker). 25 | 26 | -behavior(gen_server). 27 | 28 | %% API: 29 | -export([ set_initial_seqno/2 30 | , import_batch/3 31 | , start_link/2 32 | , name/1 33 | ]). 34 | 35 | %% gen_server callbacks 36 | -export([ init/1 37 | , handle_call/3 38 | , handle_cast/2 39 | , handle_info/2 40 | , terminate/2 41 | ]). 42 | 43 | -include_lib("snabbkaffe/include/trace.hrl"). 44 | -include("mria_rlog.hrl"). 45 | 46 | -record(s, 47 | { shard :: mria_rlog:shard() 48 | , seqno :: non_neg_integer() | undefined 49 | }). 50 | 51 | %%================================================================================ 52 | %% API funcions 53 | %%================================================================================ 54 | 55 | -spec start_link(mria_rlog:shard(), integer()) -> {ok, pid()}. 56 | start_link(Shard, SeqNo) -> 57 | gen_server:start_link(?MODULE, [Shard, SeqNo], []). 58 | 59 | -spec import_batch(transaction | dirty, pid(), [mria_rlog:tx()]) -> reference(). 60 | import_batch(ImportType, Server, Tx) -> 61 | Alias = alias([reply]), 62 | gen_server:cast(Server, {import_batch, ImportType, Alias, Tx}), 63 | Alias. 64 | 65 | -spec set_initial_seqno(pid(), non_neg_integer()) -> ok. 66 | set_initial_seqno(Server, SeqNo) -> 67 | gen_server:call(Server, {set_initial_seqno, SeqNo}). 68 | 69 | -spec name(mria_rlog:shard()) -> atom(). 70 | name(Shard) -> 71 | list_to_atom(atom_to_list(Shard) ++ "_importer_worker"). 72 | 73 | %%================================================================================ 74 | %% gen_server callbacks 75 | %%================================================================================ 76 | 77 | init([Shard, SeqNo]) -> 78 | process_flag(trap_exit, true), 79 | logger:set_process_metadata(#{ domain => [mria, rlog, replica, importer] 80 | , shard => Shard 81 | }), 82 | ?tp(mria_replica_importer_worker_start, #{shard => Shard, seqno => SeqNo}), 83 | State = #s{shard = Shard, seqno = SeqNo}, 84 | register(name(Shard), self()), 85 | {ok, State}. 86 | 87 | handle_call(Call, From, St) -> 88 | ?unexpected_event_tp(#{call => Call, from => From, state => St}), 89 | {reply, {error, unknown_call}, St}. 90 | 91 | handle_info(Info, St) -> 92 | ?unexpected_event_tp(#{info => Info, state => St}), 93 | {noreply, St}. 94 | 95 | handle_cast({import_batch, ImportType, Alias, Batch}, St = #s{shard = Shard, seqno = SeqNo0}) -> 96 | ?tp(importer_worker_import_batch, #{shard => Shard, reply_to => Alias}), 97 | ok = case ImportType of 98 | dirty -> import_batch_dirty(Batch); 99 | transaction -> import_batch(Batch) 100 | end, 101 | SeqNo = SeqNo0 + length(Batch), 102 | mria_status:notify_replicant_import_trans(Shard, SeqNo), 103 | Alias ! #imported{ref = Alias}, 104 | {noreply, St#s{seqno = SeqNo}}; 105 | handle_cast(Cast, St) -> 106 | ?unexpected_event_tp(#{cast => Cast, state => St}), 107 | {noreply, St}. 108 | 109 | terminate(_Reason, #s{shard = _Shard, seqno = _SeqNo}) -> 110 | ?terminate_tp, 111 | ?tp(mria_replica_importer_worker_stop, #{ shard => _Shard 112 | , seqno => _SeqNo 113 | , reason => _Reason 114 | }). 115 | 116 | %%================================================================================ 117 | %% Transaction import 118 | %%================================================================================ 119 | 120 | -spec import_batch_dirty([mria_rlog:tx()]) -> ok. 121 | import_batch_dirty(Batch) -> 122 | mnesia:async_dirty(fun do_import_batch_dirty/1, [Batch]). 123 | 124 | -spec do_import_batch_dirty([mria_rlog:tx()]) -> ok. 125 | do_import_batch_dirty(Batch) -> 126 | lists:foreach(fun({_TID, Ops}) -> 127 | ?tp(rlog_import_dirty, 128 | #{ tid => _TID 129 | , ops => Ops 130 | }), 131 | Waiting = lists:foldr(fun import_op_dirty/2, [], Ops), 132 | maybe_reply_awaiting_dirty(Waiting) 133 | end, 134 | Batch). 135 | 136 | -spec import_batch([mria_rlog:tx()]) -> ok. 137 | import_batch([]) -> 138 | ok; 139 | import_batch(L = [{TID, _Ops}|_]) when ?IS_DIRTY(TID) -> 140 | Rest = mnesia:async_dirty(fun do_import_batch/2, [dirty, L]), 141 | import_batch(Rest); 142 | import_batch(L = [{TID, _Ops}|_]) when ?IS_TRANS(TID) -> 143 | {atomic, Res} = mnesia:transaction(fun do_import_batch/2, [transaction, L]), 144 | Rest1 = case Res of 145 | {#?rlog_sync{reply_to = Alias}, Rest} -> 146 | Alias ! {done, Alias}, 147 | Rest; 148 | _ -> Res 149 | end, 150 | import_batch(Rest1). 151 | 152 | -spec do_import_batch(dirty | transaction, [mria_rlog:tx()]) -> [mria_rlog:tx()]. 153 | do_import_batch(dirty, [{TID, Ops} | Rest]) when ?IS_DIRTY(TID) -> 154 | ?tp(rlog_import_dirty, 155 | #{ tid => TID 156 | , ops => Ops 157 | }), 158 | Waiting = lists:foldr(fun import_op_dirty/2, [], Ops), 159 | maybe_reply_awaiting_dirty(Waiting), 160 | do_import_batch(dirty, Rest); 161 | do_import_batch(transaction, [{TID, Ops} | Rest]) when ?IS_TRANS(TID) -> 162 | ?tp(rlog_import_trans, 163 | #{ tid => TID 164 | , ops => Ops 165 | }), 166 | Waiting = lists:foldr(fun import_op/2, [], Ops), 167 | %% Whenever we encounter synchronous transaction initiated by this node, 168 | %% we stop the iteration, so that an initial caller waiting for a reply 169 | %% is notified ASAP without waiting for the whole batch to be committed. 170 | case Waiting of 171 | [] -> do_import_batch(transaction, Rest); 172 | [ReplyTo] -> {ReplyTo, Rest}; 173 | [ReplyTo | _] = L -> 174 | %% One transaction has (and must be awaited by) only one caller. 175 | %% More than one may happen if someone additionally calls 176 | %% mnesia:write(#?rlog_sync{} = ReplyTo) inside a transaction. 177 | ?unexpected_event_tp(#{sync_trans_reply_to => L}), 178 | {ReplyTo, Rest} 179 | end; 180 | do_import_batch(_, L) -> 181 | L. 182 | 183 | -spec import_op(mria_rlog:op(), list()) -> list(). 184 | import_op(Op, Acc) -> 185 | case Op of 186 | {write, ?rlog_sync, ReplyTo} -> 187 | maybe_add_reply(ReplyTo, Acc); 188 | {write, Tab, Rec} -> 189 | mnesia:write(Tab, Rec, write), 190 | Acc; 191 | {delete, Tab, Key} -> 192 | mnesia:delete({Tab, Key}), 193 | Acc; 194 | {delete_object, Tab, Rec} -> 195 | mnesia:delete_object(Tab, Rec, write), 196 | Acc; 197 | {clear_table, Tab} -> 198 | mria_mnesia:clear_table_int(Tab), 199 | Acc; 200 | {clear_table, Tab, Pattern} -> 201 | mria_mnesia:clear_table_int(Tab, Pattern), 202 | Acc 203 | end. 204 | 205 | -spec import_op_dirty(mria_rlog:op(), list()) -> ok. 206 | import_op_dirty(Op, Acc) -> 207 | case Op of 208 | {write, ?rlog_sync, ReplyTo} -> 209 | maybe_add_reply(ReplyTo, Acc); 210 | {write, Tab, Rec} -> 211 | mnesia:dirty_write(Tab, Rec), 212 | Acc; 213 | {delete, Tab, Key} -> 214 | mnesia:dirty_delete({Tab, Key}), 215 | Acc; 216 | {delete_object, Tab, Rec} -> 217 | mnesia:dirty_delete_object(Tab, Rec), 218 | Acc; 219 | {update_counter, Tab, Key, Incr} -> 220 | mnesia:dirty_update_counter(Tab, Key, Incr), 221 | Acc; 222 | {clear_table, Tab} -> 223 | mnesia:clear_table(Tab), 224 | Acc; 225 | {clear_table, Tab, Pattern} -> 226 | %% If this op is received, we assume that this node also has 227 | %% `mnesia:match_delete/2. 228 | %% As mria protocol has been bumped, during rolling updates 229 | %% new replicants must connect only to new cores, 230 | %% so that both should have this new function. 231 | mnesia:match_delete(Tab, Pattern), 232 | Acc 233 | end. 234 | 235 | maybe_add_reply(#?rlog_sync{reply_to = Alias} = ReplyTo, Acc) 236 | when node(Alias) =:= node() -> 237 | ?tp(importer_worker_sync_trans_recv, #{reply_to => Alias}), 238 | [ReplyTo | Acc]; 239 | maybe_add_reply(_ReplyTo, Acc) -> 240 | Acc. 241 | 242 | maybe_reply_awaiting_dirty([]) -> 243 | ok; 244 | maybe_reply_awaiting_dirty([#?rlog_sync{reply_to = Alias} | T] = L) -> 245 | %% We can reply right here inside a dirty activity context, 246 | %% at this point, all operations of a given transaction have 247 | %% been applied, so it's safe to reply to an awaiting process (if any). 248 | T =/= [] andalso ?unexpected_event_tp(#{sync_trans_reply_to => L}), 249 | Alias ! {done, Alias}, 250 | ok. 251 | -------------------------------------------------------------------------------- /src/mria_replicant_shard_sup.erl: -------------------------------------------------------------------------------- 1 | %%-------------------------------------------------------------------- 2 | %% Copyright (c) 2021-2023 EMQ Technologies Co., Ltd. All Rights Reserved. 3 | %% 4 | %% Licensed under the Apache License, Version 2.0 (the "License"); 5 | %% you may not use this file except in compliance with the License. 6 | %% You may obtain a copy of the License at 7 | %% 8 | %% http://www.apache.org/licenses/LICENSE-2.0 9 | %% 10 | %% Unless required by applicable law or agreed to in writing, software 11 | %% distributed under the License is distributed on an "AS IS" BASIS, 12 | %% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | %% See the License for the specific language governing permissions and 14 | %% limitations under the License. 15 | %%-------------------------------------------------------------------- 16 | 17 | %% Supervision tree for the shard. 18 | %% Runs on replicant nodes under `mria_shards_sup' 19 | -module(mria_replicant_shard_sup). 20 | 21 | -behaviour(supervisor). 22 | 23 | %% API: 24 | -export([ start_link/1 25 | , start_importer_worker/3 26 | , stop_importer_worker/1 27 | , start_bootstrap_client/4 28 | ]). 29 | 30 | %% Supervisor callbacks: 31 | -export([init/1]). 32 | 33 | -define(shutdown, 5000). 34 | 35 | %%================================================================================ 36 | %% API funcions 37 | %%================================================================================ 38 | 39 | -spec start_link(mria_rlog:shard()) -> {ok, pid()}. 40 | start_link(Shard) -> 41 | supervisor:start_link(?MODULE, Shard). 42 | 43 | -spec start_importer_worker(pid(), mria_rlog:shard(), integer()) -> pid(). 44 | start_importer_worker(SupPid, Shard, SeqNo) -> 45 | Id = importer_worker, 46 | Spec = #{ id => Id 47 | , start => {mria_replica_importer_worker, start_link, [Shard, SeqNo]} 48 | , restart => permanent 49 | , significant => false 50 | , type => worker 51 | , shutdown => ?shutdown 52 | }, 53 | start_worker(SupPid, Id, Spec). 54 | 55 | -spec stop_importer_worker(pid()) -> ok. 56 | stop_importer_worker(SupPid) -> 57 | stop_worker(SupPid, importer_worker). 58 | 59 | -spec start_bootstrap_client(pid(), mria_rlog:shard(), node(), pid()) -> pid(). 60 | start_bootstrap_client(SupPid, Shard, RemoteNode, ReplicaPid) -> 61 | Id = bootstrap_client, 62 | Spec = #{ id => Id 63 | , start => {mria_bootstrapper, start_link_client, [Shard, RemoteNode, ReplicaPid]} 64 | , restart => transient 65 | , type => worker 66 | , shutdown => ?shutdown 67 | }, 68 | start_worker(SupPid, Id, Spec). 69 | 70 | %%================================================================================ 71 | %% Supervisor callbacks 72 | %%================================================================================ 73 | 74 | init(Shard) -> 75 | SupFlags = #{ strategy => one_for_all 76 | , intensity => 0 77 | , period => 1 78 | , auto_shutdown => any_significant 79 | }, 80 | Children = [ #{ id => replica 81 | , start => {mria_rlog_replica, start_link, [self(), Shard]} 82 | , restart => transient 83 | , significant => true 84 | , shutdown => ?shutdown 85 | , type => worker 86 | } 87 | ], 88 | {ok, {SupFlags, Children}}. 89 | 90 | %%================================================================================ 91 | %% Internal functions 92 | %%================================================================================ 93 | 94 | start_worker(SupPid, Id, Spec) -> 95 | stop_worker(SupPid, Id), 96 | {ok, Pid} = supervisor:start_child(SupPid, Spec), 97 | Pid. 98 | 99 | stop_worker(SupPid, Id) -> 100 | _ = supervisor:terminate_child(SupPid, Id), 101 | _ = supervisor:delete_child(SupPid, Id), 102 | ok. 103 | -------------------------------------------------------------------------------- /src/mria_rlog.erl: -------------------------------------------------------------------------------- 1 | %%-------------------------------------------------------------------- 2 | %% Copyright (c) 2021-2025 EMQ Technologies Co., Ltd. All Rights Reserved. 3 | %% 4 | %% Licensed under the Apache License, Version 2.0 (the "License"); 5 | %% you may not use this file except in compliance with the License. 6 | %% You may obtain a copy of the License at 7 | %% 8 | %% http://www.apache.org/licenses/LICENSE-2.0 9 | %% 10 | %% Unless required by applicable law or agreed to in writing, software 11 | %% distributed under the License is distributed on an "AS IS" BASIS, 12 | %% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | %% See the License for the specific language governing permissions and 14 | %% limitations under the License. 15 | %%-------------------------------------------------------------------- 16 | 17 | %% API and management functions for asynchronous Mnesia replication 18 | -module(mria_rlog). 19 | 20 | -compile({inline, [do_detect_shard/1]}). 21 | 22 | -export([ status/0 23 | , get_protocol_version/0 24 | 25 | , role/0 26 | , role/1 27 | , backend/0 28 | 29 | , core_nodes/0 30 | , subscribe/4 31 | , wait_for_shards/2 32 | , init/0 33 | , cleanup/0 34 | 35 | , intercept_trans/2 36 | , ensure_shard/1 37 | ]). 38 | 39 | -export_type([ shard/0 40 | , role/0 41 | , shard_config/0 42 | , change_type/0 43 | , op/0 44 | , tx/0 45 | , seqno/0 46 | , entry/0 47 | , transport/0 48 | , sync_reply_to/0 49 | ]). 50 | 51 | -include("mria_rlog.hrl"). 52 | -include_lib("mnesia/src/mnesia.hrl"). 53 | -include_lib("stdlib/include/ms_transform.hrl"). 54 | -include_lib("snabbkaffe/include/trace.hrl"). 55 | 56 | %%================================================================================ 57 | %% Type declarations 58 | %%================================================================================ 59 | 60 | -type shard() :: atom(). 61 | 62 | -type role() :: core | replicant. 63 | 64 | -type shard_config() :: #{ tables := [mria:table()] 65 | }. 66 | -type change_type() :: write | delete | delete_object | clear_table. 67 | 68 | -type op() :: {write, mria:table(), mria_mnesia:record()} 69 | | {delete, mria:table(), _Key} 70 | | {delete_object, mria:table(), mria_mnesia:record()} 71 | | {clear_table, mria:table()}. 72 | 73 | -type tx() :: {mria_mnesia:tid(), [op()]}. 74 | 75 | -type entry() :: #entry{}. 76 | 77 | %% Note: seqno is specific for the core node, not for the entire 78 | %% cluster! 79 | -type seqno() :: non_neg_integer(). 80 | 81 | -type transport() :: ?TRANSPORT_GEN_RPC | ?TRANSPORT_ERL_DISTR. 82 | 83 | -type sync_reply_to() :: #?rlog_sync{reply_to :: reference(), shard :: shard()}. 84 | 85 | %%================================================================================ 86 | %% API 87 | %%================================================================================ 88 | 89 | status() -> 90 | Backend = backend(), 91 | Role = role(), 92 | Info0 = #{ backend => Backend 93 | , role => Role 94 | }, 95 | case {Backend, Role} of 96 | {mnesia, _} -> 97 | Info0; 98 | {rlog, replicant} -> 99 | Stats = [{I, mria_status:get_shard_stats(I)} 100 | || I <- mria_schema:shards()], 101 | Info0#{ shards_in_sync => mria_status:shards_up() 102 | , shards_down => mria_status:shards_down() 103 | , shard_stats => maps:from_list(Stats) 104 | }; 105 | {rlog, core} -> 106 | Info0#{ imbalance => mria_rebalance:plan(mria_rebalance:collect()) 107 | } 108 | end. 109 | 110 | -spec role() -> mria_rlog:role(). 111 | role() -> 112 | mria_config:role(). 113 | 114 | -spec role(node()) -> mria_rlog:role(). 115 | role(Node) -> 116 | %% TODO: replace with the throwing version, once we stop supporting EMQX releases < 5.0.17 117 | case mria_lib:rpc_call_nothrow(Node, ?MODULE, role, []) of 118 | core -> core; 119 | replicant -> replicant 120 | end. 121 | 122 | backend() -> 123 | mria_config:backend(). 124 | 125 | %% @doc Should be only called in a replicant node. Returns the list 126 | %% of core nodes cached in `mria_lb'. 127 | -spec core_nodes() -> [node()]. 128 | core_nodes() -> 129 | mria_lb:core_nodes(). 130 | 131 | -spec wait_for_shards([shard()], timeout()) -> ok | {timeout, [shard()]}. 132 | wait_for_shards(Shards, Timeout) -> 133 | case mria_config:backend() of 134 | rlog -> 135 | lists:foreach(fun ensure_shard/1, Shards), 136 | %% Note: core node also must wait for shards, to make sure 137 | %% the schema has converged, and the shard config is set: 138 | mria_status:wait_for_shards(Shards, Timeout); 139 | mnesia -> 140 | ok 141 | end. 142 | 143 | -spec ensure_shard(shard()) -> ok. 144 | ensure_shard(?LOCAL_CONTENT_SHARD) -> 145 | ok; 146 | ensure_shard(Shard) -> 147 | case whereis(Shard) of 148 | undefined -> 149 | case mria_shards_sup:start_shard(Shard) of 150 | {ok, _} -> ok; 151 | {error, already_present} -> ok; 152 | {error, {already_started, _}} -> ok; 153 | Err -> error({failed_to_create_shard, Shard, Err}) 154 | end; 155 | _ -> 156 | ok 157 | end. 158 | 159 | -spec subscribe(mria_rlog:shard(), node(), pid(), mria_rlog_server:checkpoint()) -> 160 | { ok 161 | , _NeedBootstrap :: boolean() 162 | , _Agent :: pid() 163 | , [mria_schema:entry()] 164 | , integer() 165 | } 166 | | {badrpc | badtcp, term()}. 167 | subscribe(Shard, RemoteNode, Subscriber, Checkpoint) -> 168 | case mria_rlog_server:probe(RemoteNode, Shard) of 169 | true -> 170 | MyNode = node(), 171 | Args = [Shard, {MyNode, Subscriber}, Checkpoint], 172 | mria_lib:rpc_call_nothrow({RemoteNode, Shard}, mria_rlog_server, subscribe, Args); 173 | false -> 174 | {badrpc, {probe_failed, Shard}} 175 | end. 176 | 177 | %% @doc Get version of Mria protocol running on the node 178 | -spec get_protocol_version() -> integer(). 179 | get_protocol_version() -> 180 | %% Should be increased on incompatible changes: 181 | %% 182 | %% Changelog: 183 | %% 184 | %% 0 -> 1: Add clear_table message to the batch message of the 185 | %% boostrapper. 186 | %% 1 -> 2: Add `{clear_table, Tab, Pattern}` op to support 187 | %% `mnesia:match_delete/2` API extension. 188 | 2. 189 | 190 | intercept_trans(Tid, Commit) -> 191 | ?tp(mria_rlog_intercept_trans, Commit#{tid => Tid}), 192 | case detect_shard(Commit) of 193 | undefined -> ok; 194 | Shard -> mria_rlog_server:dispatch(Shard, Tid, Commit) 195 | end. 196 | 197 | %% Assuming that all ops belong to one shard: 198 | %% TODO: Handle local content tables more soundly. 199 | detect_shard(#{ram_copies := [Op | _]}) -> 200 | do_detect_shard(Op); 201 | detect_shard(#{disc_copies := [Op | _]}) -> 202 | do_detect_shard(Op); 203 | detect_shard(#{disc_only_copies := [Op | _]}) -> 204 | do_detect_shard(Op); 205 | detect_shard(#{ext := [{ext_copies, [{_, Op}]} | _]}) -> 206 | do_detect_ext_shard(Op); 207 | detect_shard(_) -> 208 | undefined. 209 | 210 | do_detect_ext_shard({{?rlog_sync, _Key}, #?rlog_sync{shard = Shard}, _Operation}) -> 211 | Shard; 212 | do_detect_ext_shard(Op) -> 213 | do_detect_shard(Op). 214 | 215 | do_detect_shard({{Tab, _Key}, _Value, _Operation}) -> 216 | mria_config:shard_rlookup(Tab). 217 | 218 | -spec init() -> ok. 219 | init() -> 220 | case mria_config:whoami() of 221 | core -> 222 | mnesia_hook:register_hook(post_commit, fun ?MODULE:intercept_trans/2); 223 | _ -> 224 | ok 225 | end. 226 | 227 | cleanup() -> 228 | case mria_config:whoami() of 229 | core -> 230 | mnesia_hook:unregister_hook(post_commit); 231 | _ -> 232 | ok 233 | end. 234 | -------------------------------------------------------------------------------- /src/mria_rlog.hrl: -------------------------------------------------------------------------------- 1 | -ifndef(MRIA_RLOG_HRL). 2 | -define(MRIA_RLOG_HRL, true). 3 | 4 | -define(mria_meta_shard, '$mria_meta_shard'). 5 | -define(schema, mria_schema). 6 | -define(rlog_sync, '$mria_rlog_sync'). 7 | 8 | %% Note to self: don't forget to update all the match specs in 9 | %% `mria_schema' module when changing fields in this record 10 | -record(?schema, 11 | { mnesia_table 12 | , shard 13 | , storage 14 | , config 15 | }). 16 | 17 | -record(?rlog_sync, {reply_to, shard, extra = #{}}). 18 | 19 | -define(LOCAL_CONTENT_SHARD, undefined). 20 | 21 | -define(IS_DIRTY(TID), (element(1, (TID)) =:= dirty)). 22 | 23 | -define(IS_TRANS(TID), (element(1, (TID)) =:= tid)). 24 | 25 | -define(unexpected_event_kind, "Mria worker received unexpected event"). 26 | -define(unexpected_event_tp(Params), 27 | ?tp(warning, ?unexpected_event_kind, 28 | (begin Params end)#{ process => ?MODULE 29 | , callback => ?FUNCTION_NAME 30 | })). 31 | 32 | -define(terminate_tp, 33 | ?tp(debug, mria_worker_terminate, #{process => ?MODULE, callback => terminate})). 34 | 35 | %% Messages 36 | 37 | -record(entry, 38 | { sender :: pid() 39 | , seqno :: mria_rlog:seqno() 40 | , tx :: mria_rlog:tx() 41 | }). 42 | 43 | -record(imported, 44 | { ref :: reference() 45 | }). 46 | 47 | -record(bootstrap_complete, 48 | { sender :: pid() 49 | , checkpoint 50 | }). 51 | 52 | -define(ERL_RPC, rpc). 53 | -define(GEN_RPC, gen_rpc). 54 | -define(DEFAULT_RPC_MODULE, ?ERL_RPC). 55 | 56 | -define(TRANSPORT_ERL_DISTR, distr). 57 | -define(TRANSPORT_GEN_RPC, gen_rpc). 58 | -define(DEFAULT_SHARD_TRANSPORT, ?TRANSPORT_ERL_DISTR). 59 | 60 | -endif. 61 | -------------------------------------------------------------------------------- /src/mria_rlog_agent.erl: -------------------------------------------------------------------------------- 1 | %%-------------------------------------------------------------------- 2 | %% Copyright (c) 2021-2023 EMQ Technologies Co., Ltd. All Rights Reserved. 3 | %% 4 | %% Licensed under the Apache License, Version 2.0 (the "License"); 5 | %% you may not use this file except in compliance with the License. 6 | %% You may obtain a copy of the License at 7 | %% 8 | %% http://www.apache.org/licenses/LICENSE-2.0 9 | %% 10 | %% Unless required by applicable law or agreed to in writing, software 11 | %% distributed under the License is distributed on an "AS IS" BASIS, 12 | %% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | %% See the License for the specific language governing permissions and 14 | %% limitations under the License. 15 | %%-------------------------------------------------------------------- 16 | 17 | %% @doc This module implements a gen_statem which pushes rlogs to 18 | %% a remote node. 19 | %% 20 | %% All sends are done as `gen_rpc' calls to the replicant node. 21 | 22 | -module(mria_rlog_agent). 23 | 24 | -behaviour(gen_statem). 25 | 26 | %% API: 27 | -export([start_link/3, stop/1, dispatch/3]). 28 | 29 | %% gen_statem callbacks: 30 | -export([init/1, terminate/3, code_change/4, callback_mode/0, handle_event/4]). 31 | 32 | -include("mria_rlog.hrl"). 33 | -include_lib("snabbkaffe/include/trace.hrl"). 34 | 35 | %% Define macros for each state to prevent typos: 36 | -define(catchup, catchup). 37 | -define(switchover, switchover). 38 | -define(normal, normal). 39 | 40 | -type state() :: ?catchup | ?switchover | ?normal. 41 | 42 | -record(d, 43 | { shard :: mria_rlog:shard() 44 | , subscriber :: mria_lib:subscriber() 45 | , transport :: mria_rlog:transport() 46 | }). 47 | 48 | -type data() :: #d{}. 49 | 50 | -type fsm_result() :: gen_statem:event_handler_result(state()). 51 | 52 | %%-------------------------------------------------------------------- 53 | %% API functions 54 | %%-------------------------------------------------------------------- 55 | 56 | start_link(Shard, Subscriber, ReplaySince) -> 57 | gen_statem:start_link(?MODULE, {Shard, Subscriber, ReplaySince}, []). 58 | 59 | stop(Pid) -> 60 | try 61 | gen_statem:call(Pid, stop, infinity) 62 | catch 63 | exit : {noproc, _} -> 64 | %% race condition, the process exited 65 | %% before or during this call 66 | ok 67 | end. 68 | 69 | -spec dispatch(pid(), mria_rlog:seqno(), mria_rlog:tx()) -> ok. 70 | dispatch(AgentPid, SeqNo, TLOGEntry) -> 71 | AgentPid ! {trans, SeqNo, TLOGEntry}, 72 | ok. 73 | 74 | %%-------------------------------------------------------------------- 75 | %% gen_statem callbacks 76 | %%-------------------------------------------------------------------- 77 | 78 | callback_mode() -> [handle_event_function, state_enter]. 79 | 80 | -spec init({mria_rlog:shard(), mria_lib:subscriber(), integer()}) -> 81 | {ok, state(), data()}. 82 | init({Shard, Subscriber, _ReplaySince}) -> 83 | process_flag(trap_exit, true), 84 | process_flag(message_queue_data, off_heap), 85 | logger:update_process_metadata(#{ domain => [mria, rlog, agent] 86 | , shard => Shard 87 | , subscriber => Subscriber 88 | }), 89 | D = #d{ shard = Shard 90 | , subscriber = Subscriber 91 | , transport = mria_config:shard_transport(Shard) 92 | }, 93 | ?tp(info, rlog_agent_started, 94 | #{ shard => Shard 95 | }), 96 | {ok, ?normal, D}. 97 | 98 | -spec handle_event(gen_statem:event_type(), _EventContent, state(), data()) -> 99 | gen_statem:event_handler_result(state()). 100 | handle_event(info, {trans, SeqNo, TLOGEntry}, ?normal, D) -> 101 | handle_mnesia_event(SeqNo, TLOGEntry, D); 102 | %% Common actions: 103 | handle_event({call, From}, stop, State, D) -> 104 | handle_stop(State, From, D); 105 | handle_event(enter, OldState, State, D) -> 106 | handle_state_trans(OldState, State, D); 107 | handle_event(info, {'EXIT', SubscriberPid, Reason}, _State, 108 | #d{subscriber = {Node, SubscriberPid}, shard = Shard}) -> 109 | ?tp(info, rlog_agent_subscriber_died, 110 | #{ reason => Reason 111 | , shard => Shard 112 | , subscriber => {Node, SubscriberPid} 113 | }), 114 | {stop, {shutdown, {subscriber_died, Reason}}}; 115 | handle_event(EventType, Event, State, D) -> 116 | handle_unknown(EventType, Event, State, D). 117 | 118 | code_change(_OldVsn, State, Data, _Extra) -> 119 | {ok, State, Data}. 120 | 121 | terminate(Reason, _State, Data) -> 122 | ?terminate_tp, 123 | ?tp(debug, rlog_agent_terminating, 124 | #{ subscriber => Data#d.subscriber 125 | , shard => Data#d.shard 126 | , reason => Reason 127 | }), 128 | ok. 129 | 130 | %%-------------------------------------------------------------------- 131 | %% Internal functions 132 | %%-------------------------------------------------------------------- 133 | 134 | handle_stop(_State, From, _Data) -> 135 | ?tp(rlog_agent_stop, 136 | #{ state => _State 137 | , data => _Data 138 | }), 139 | {stop_and_reply, normal, {reply, From, ok}}. 140 | 141 | handle_unknown(EventType, Event, State, Data) -> 142 | ?unexpected_event_tp(#{ event_type => EventType 143 | , event => Event 144 | , state => State 145 | , data => Data 146 | }), 147 | keep_state_and_data. 148 | 149 | handle_state_trans(_OldState, _State, _Data) -> 150 | ?tp(rlog_agent_state_change, 151 | #{ from => _OldState 152 | , to => _State 153 | }), 154 | keep_state_and_data. 155 | 156 | %%================================================================================ 157 | %% Internal functions 158 | %%================================================================================ 159 | 160 | -spec handle_mnesia_event(mria_rlog:seqno(), mria_rlog:tx(), data()) -> 161 | fsm_result(). 162 | handle_mnesia_event(SeqNo, Tx = {_Tid, _Ops}, D = #d{shard = Shard}) -> 163 | Transport = D#d.transport, 164 | ?tp(rlog_realtime_op, 165 | #{ ops => _Ops 166 | , activity_id => _Tid 167 | , agent => self() 168 | , seqno => SeqNo 169 | }), 170 | TLOGEntry = #entry{ sender = self() 171 | , seqno = SeqNo 172 | , tx = Tx 173 | }, 174 | ok = mria_rlog_replica:push_tlog_entry(Transport, Shard, D#d.subscriber, TLOGEntry), 175 | keep_state_and_data. 176 | -------------------------------------------------------------------------------- /src/mria_rlog_sup.erl: -------------------------------------------------------------------------------- 1 | %%-------------------------------------------------------------------- 2 | %% Copyright (c) 2021, 2023 EMQ Technologies Co., Ltd. All Rights Reserved. 3 | %% 4 | %% Licensed under the Apache License, Version 2.0 (the "License"); 5 | %% you may not use this file except in compliance with the License. 6 | %% You may obtain a copy of the License at 7 | %% 8 | %% http://www.apache.org/licenses/LICENSE-2.0 9 | %% 10 | %% Unless required by applicable law or agreed to in writing, software 11 | %% distributed under the License is distributed on an "AS IS" BASIS, 12 | %% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | %% See the License for the specific language governing permissions and 14 | %% limitations under the License. 15 | %%-------------------------------------------------------------------- 16 | 17 | %% Top level supervisor for the RLOG tree, that starts the persistent 18 | %% processes. 19 | -module(mria_rlog_sup). 20 | 21 | -behaviour(supervisor). 22 | 23 | -export([init/1, start_link/0]). 24 | 25 | -define(SUPERVISOR, ?MODULE). 26 | 27 | -include("mria_rlog.hrl"). 28 | -include_lib("snabbkaffe/include/snabbkaffe.hrl"). 29 | 30 | %%================================================================================ 31 | %% API funcions 32 | %%================================================================================ 33 | 34 | start_link() -> 35 | Role = mria_rlog:role(), 36 | supervisor:start_link({local, ?SUPERVISOR}, ?MODULE, Role). 37 | 38 | %%================================================================================ 39 | %% supervisor callbacks 40 | %%================================================================================ 41 | 42 | init(core) -> 43 | SupFlags = #{ strategy => one_for_all 44 | , intensity => 1 45 | , period => 1 46 | }, 47 | Children = [child_sup()], 48 | {ok, {SupFlags, Children}}; 49 | init(replicant) -> 50 | SupFlags = #{ strategy => one_for_all 51 | , intensity => 1 52 | , period => 1 53 | }, 54 | Children = [core_node_lb(), child_sup()], 55 | {ok, {SupFlags, Children}}. 56 | 57 | %%================================================================================ 58 | %% Internal functions 59 | %%================================================================================ 60 | 61 | core_node_lb() -> 62 | #{ id => mria_lb 63 | , start => {mria_lb, start_link, []} 64 | , restart => permanent 65 | , shutdown => 5000 66 | , type => worker 67 | }. 68 | 69 | child_sup() -> 70 | #{ id => mria_shards_sup 71 | , start => {mria_shards_sup, start_link, []} 72 | , restart => permanent 73 | , shutdown => infinity 74 | , type => supervisor 75 | }. 76 | -------------------------------------------------------------------------------- /src/mria_shards_sup.erl: -------------------------------------------------------------------------------- 1 | %%-------------------------------------------------------------------- 2 | %% Copyright (c) 2021-2023 EMQ Technologies Co., Ltd. All Rights Reserved. 3 | %% 4 | %% Licensed under the Apache License, Version 2.0 (the "License"); 5 | %% you may not use this file except in compliance with the License. 6 | %% You may obtain a copy of the License at 7 | %% 8 | %% http://www.apache.org/licenses/LICENSE-2.0 9 | %% 10 | %% Unless required by applicable law or agreed to in writing, software 11 | %% distributed under the License is distributed on an "AS IS" BASIS, 12 | %% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | %% See the License for the specific language governing permissions and 14 | %% limitations under the License. 15 | %%-------------------------------------------------------------------- 16 | 17 | %% Supervisor that manages the shards 18 | -module(mria_shards_sup). 19 | 20 | -behaviour(supervisor). 21 | 22 | -export([init/1, start_link/0, find_shard/1, start_shard/1, restart_shard/2]). 23 | 24 | -define(SUPERVISOR, ?MODULE). 25 | 26 | -include("mria_rlog.hrl"). 27 | -include_lib("snabbkaffe/include/snabbkaffe.hrl"). 28 | 29 | %%================================================================================ 30 | %% API funcions 31 | %%================================================================================ 32 | 33 | start_link() -> 34 | Shards = application:get_env(mria, rlog_startup_shards, []), 35 | supervisor:start_link({local, ?SUPERVISOR}, ?MODULE, [Shards]). 36 | 37 | -spec find_shard(mria_rlog:shard()) -> {ok, pid()} | undefined. 38 | find_shard(Shard) -> 39 | mria_lib:sup_child_pid(?SUPERVISOR, Shard). 40 | 41 | %% @doc Add shard dynamically 42 | -spec start_shard(mria_rlog:shard()) -> {ok, pid()} 43 | | {error, _}. 44 | start_shard(Shard) -> 45 | case whereis(?SUPERVISOR) of 46 | undefined -> 47 | %% FIXME: communicate via CVAR instead 48 | timer:sleep(100), 49 | start_shard(Shard); 50 | _Pid -> 51 | Child = shard_sup(Shard), 52 | supervisor:start_child(?SUPERVISOR, Child) 53 | end. 54 | 55 | %% @doc Restart a shard 56 | -spec restart_shard(mria_rlog:shard(), _Reason) -> ok. 57 | restart_shard(Shard, Reason) -> 58 | ?tp(notice, "Restarting RLOG shard", 59 | #{ shard => Shard 60 | , reason => Reason 61 | }), 62 | {ok, _} = supervisor:restart_child(?SUPERVISOR, Shard), 63 | ok. 64 | 65 | %%================================================================================ 66 | %% supervisor callbacks 67 | %%================================================================================ 68 | 69 | init([Shards]) -> 70 | %% Shards should be restarted individually to avoid bootstrapping 71 | %% of too many replicants simulataneously, hence `one_for_one': 72 | SupFlags = #{ strategy => one_for_one 73 | , intensity => 100 74 | , period => 1 75 | }, 76 | Children = lists:map(fun shard_sup/1, [?mria_meta_shard|Shards]), 77 | {ok, {SupFlags, Children}}. 78 | 79 | %%================================================================================ 80 | %% Internal functions 81 | %%================================================================================ 82 | 83 | shard_sup(Shard) -> 84 | Start = case mria_rlog:role() of 85 | core -> {mria_core_shard_sup, start_link, [Shard]}; 86 | replicant -> {mria_replicant_shard_sup, start_link, [Shard]} 87 | end, 88 | #{ id => Shard 89 | , start => Start 90 | , restart => permanent 91 | , shutdown => infinity 92 | , type => supervisor 93 | }. 94 | -------------------------------------------------------------------------------- /src/mria_sup.erl: -------------------------------------------------------------------------------- 1 | %%-------------------------------------------------------------------- 2 | %% Copyright (c) 2019-2023 EMQ Technologies Co., Ltd. All Rights Reserved. 3 | %% 4 | %% Licensed under the Apache License, Version 2.0 (the "License"); 5 | %% you may not use this file except in compliance with the License. 6 | %% You may obtain a copy of the License at 7 | %% 8 | %% http://www.apache.org/licenses/LICENSE-2.0 9 | %% 10 | %% Unless required by applicable law or agreed to in writing, software 11 | %% distributed under the License is distributed on an "AS IS" BASIS, 12 | %% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | %% See the License for the specific language governing permissions and 14 | %% limitations under the License. 15 | %%-------------------------------------------------------------------- 16 | 17 | -module(mria_sup). 18 | 19 | -behaviour(supervisor). 20 | 21 | -export([start_link/0, stop/0, is_running/0]). 22 | 23 | -export([init/1, post_init/1]). 24 | 25 | -include("mria_rlog.hrl"). 26 | -include_lib("snabbkaffe/include/trace.hrl"). 27 | 28 | start_link() -> 29 | Backend = mria_rlog:backend(), 30 | supervisor:start_link({local, ?MODULE}, ?MODULE, Backend). 31 | 32 | stop() -> 33 | mria_lib:shutdown_process(?MODULE). 34 | 35 | is_running() -> 36 | is_pid(whereis(?MODULE)). 37 | 38 | post_init(Parent) -> 39 | proc_lib:init_ack(Parent, {ok, self()}), 40 | %% Exec the start callback, but first make sure the schema is in 41 | %% sync: 42 | ok = mria_rlog:wait_for_shards([?mria_meta_shard], infinity), 43 | ?tp(notice, "Mria is running", #{}), 44 | mria_lib:exec_callback(start). 45 | 46 | -spec init(mria:backend()) -> {ok, {supervisor:sup_flags(), [supervisor:child_spec()]}}. 47 | init(mnesia) -> 48 | {ok, {#{ strategy => one_for_all 49 | , intensity => 0 50 | , period => 3600 51 | }, 52 | [child(mria_status, worker), 53 | child(mria_schema, worker), 54 | child(mria_membership_sup, supervisor), 55 | post_init_child() 56 | ]}}; 57 | init(rlog) -> 58 | {ok, {#{ strategy => one_for_all 59 | , intensity => 0 60 | , period => 3600 61 | }, 62 | [child(mria_status, worker), 63 | child(mria_schema, worker), 64 | child(mria_membership_sup, supervisor), 65 | child(mria_rlog_sup, supervisor), 66 | post_init_child() 67 | ]}}. 68 | 69 | child(Mod, worker) -> 70 | #{id => Mod, 71 | start => {Mod, start_link, []}, 72 | restart => permanent, 73 | shutdown => 5000, 74 | type => worker, 75 | modules => [Mod] 76 | }; 77 | child(Mod, supervisor) -> 78 | #{id => Mod, 79 | start => {Mod, start_link, []}, 80 | restart => permanent, 81 | shutdown => infinity, 82 | type => supervisor, 83 | modules => [Mod] 84 | }. 85 | 86 | %% Simple worker process that runs the start callback. We put it into 87 | %% the supervision tree to make sure it doesn't outlive mria app 88 | post_init_child() -> 89 | #{ id => post_init 90 | , start => {proc_lib, start_link, [?MODULE, post_init, [self()]]} 91 | , restart => temporary 92 | , shutdown => 5_000 93 | , type => worker 94 | , modules => [] 95 | }. 96 | -------------------------------------------------------------------------------- /src/mria_upstream.erl: -------------------------------------------------------------------------------- 1 | %%-------------------------------------------------------------------- 2 | %% Copyright (c) 2022-2023 EMQ Technologies Co., Ltd. All Rights Reserved. 3 | %% 4 | %% Licensed under the Apache License, Version 2.0 (the "License"); 5 | %% you may not use this file except in compliance with the License. 6 | %% You may obtain a copy of the License at 7 | %% 8 | %% http://www.apache.org/licenses/LICENSE-2.0 9 | %% 10 | %% Unless required by applicable law or agreed to in writing, software 11 | %% distributed under the License is distributed on an "AS IS" BASIS, 12 | %% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | %% See the License for the specific language governing permissions and 14 | %% limitations under the License. 15 | %%-------------------------------------------------------------------- 16 | 17 | %% @doc This module contains functions for updating the upstream 18 | %% of the table. 19 | %% 20 | %% Upstream means a core node or the local node if we are talking 21 | %% about `local_content' shard. 22 | %% 23 | %% NOTE: All of these functions can be called remotely via RPC 24 | -module(mria_upstream). 25 | 26 | %% API: 27 | %% Internal exports 28 | -export([ transactional_wrapper/3 29 | , sync_dummy_wrapper/2 30 | , dirty_wrapper/4 31 | , dirty_write_sync/2 32 | ]). 33 | 34 | -export_type([]). 35 | 36 | -include("mria_rlog.hrl"). 37 | 38 | %%================================================================================ 39 | %% Type declarations 40 | %%================================================================================ 41 | 42 | %%================================================================================ 43 | %% API funcions 44 | %%================================================================================ 45 | 46 | -spec transactional_wrapper(mria_rlog:shard(), fun(), list()) -> mria:t_result(term()). 47 | transactional_wrapper(Shard, Fun, Args) -> 48 | OldServerPid = whereis(Shard), 49 | ensure_no_transaction(), 50 | mria_rlog:wait_for_shards([Shard], infinity), 51 | mnesia:transaction(fun() -> 52 | Res = apply(Fun, Args), 53 | {_TID, TxStore} = mria_mnesia:get_internals(), 54 | ensure_no_ops_outside_shard(TxStore, Shard, OldServerPid), 55 | Res 56 | end). 57 | 58 | %% @doc Write a special ReplyTo record to rlog_sync (null_copies table) only 59 | %% to trigger its replication. Used by mria:sync_transaction/2,3,4 as a 'retry' 60 | %% mechanism during failures: if the original sync_transaction reply might 61 | %% have been lost because of failure - make RPC with this dummy function to wait 62 | %% for its replication and, thus, ensure that the original transaction has been 63 | %% also already replicated. 64 | -spec sync_dummy_wrapper(mria_rlog:shard(), mria_rlog:sync_reply_to()) -> mria:t_result(term()). 65 | sync_dummy_wrapper(Shard, ReplyTo) -> 66 | mria_rlog:wait_for_shards([Shard], infinity), 67 | %% mimic mnesia transaction return values 68 | try 69 | ok = mnesia:dirty_write(ReplyTo), 70 | {atomic, ok} 71 | catch Err : Reason -> 72 | {aborted, {Err, Reason}} 73 | end. 74 | 75 | %% @doc Perform syncronous dirty operation 76 | -spec dirty_write_sync(mria:table(), tuple()) -> ok. 77 | dirty_write_sync(Table, Record) -> 78 | mnesia:sync_dirty( 79 | fun() -> 80 | mnesia:write(Table, Record, write) 81 | end). 82 | 83 | -spec dirty_wrapper(module(), atom(), mria:table(), list()) -> {ok | error | exit, term()}. 84 | dirty_wrapper(Module, Function, Table, Args) -> 85 | try apply(Module, Function, [Table|Args]) of 86 | Result -> {ok, Result} 87 | catch 88 | EC : Err -> 89 | {EC, Err} 90 | end. 91 | 92 | %%================================================================================ 93 | %% Internal functions 94 | %%================================================================================ 95 | 96 | ensure_no_transaction() -> 97 | case mnesia:get_activity_id() of 98 | undefined -> ok; 99 | _ -> error(nested_transaction) 100 | end. 101 | 102 | ensure_no_ops_outside_shard(TxStore, Shard, OldServerPid) -> 103 | case mria_config:strict_mode() of 104 | true -> do_ensure_no_ops_outside_shard(TxStore, Shard, OldServerPid); 105 | false -> ok 106 | end. 107 | 108 | do_ensure_no_ops_outside_shard(TxStore, Shard, OldServerPid) -> 109 | Tables = ets:match(TxStore, {{'$1', '_'}, '_', '_'}), 110 | lists:foreach( fun([?rlog_sync]) -> ok; 111 | ([Table]) -> 112 | case mria_config:shard_rlookup(Table) =:= Shard of 113 | true -> ok; 114 | false -> case whereis(Shard) of 115 | OldServerPid -> 116 | mnesia:abort({invalid_transaction, Table, Shard}); 117 | ServerPid -> 118 | mnesia:abort({retry, {OldServerPid, ServerPid}}) 119 | end 120 | end 121 | end 122 | , Tables 123 | ), 124 | ok. 125 | -------------------------------------------------------------------------------- /test/concuerror_tests.erl: -------------------------------------------------------------------------------- 1 | %%-------------------------------------------------------------------- 2 | %% Copyright (c) 2021-2025 EMQ Technologies Co., Ltd. All Rights Reserved. 3 | %% 4 | %% Licensed under the Apache License, Version 2.0 (the "License"); 5 | %% you may not use this file except in compliance with the License. 6 | %% You may obtain a copy of the License at 7 | %% 8 | %% http://www.apache.org/licenses/LICENSE-2.0 9 | %% 10 | %% Unless required by applicable law or agreed to in writing, software 11 | %% distributed under the License is distributed on an "AS IS" BASIS, 12 | %% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | %% See the License for the specific language governing permissions and 14 | %% limitations under the License. 15 | %%-------------------------------------------------------------------- 16 | 17 | %% NOTE: Concuerror doesn't pick up testcases automatically, add them 18 | %% to the Makefile explicitly 19 | -module(concuerror_tests). 20 | 21 | -include_lib("eunit/include/eunit.hrl"). 22 | -define(CONCUERROR, true). 23 | -include_lib("snabbkaffe/include/snabbkaffe.hrl"). 24 | 25 | %% Note: the number of interleavings that Concuerror has to explore 26 | %% grows _exponentially_ with the number of concurrent processes and 27 | %% the number of I/O operations that they perform. So all tests in 28 | %% this module should be kept as short and simple as possible and only 29 | %% verify a single property. 30 | 31 | %% Check that waiting for shards with timeout=infinity always results in `ok'. 32 | wait_for_shards_inf_test() -> 33 | optvar:init(), 34 | try 35 | spawn(fun() -> 36 | catch mria_status:notify_shard_up(foo, self()) 37 | end), 38 | spawn(fun() -> 39 | catch mria_status:notify_shard_up(bar, self()) 40 | end), 41 | ?assertMatch(ok, mria_status:wait_for_shards([foo, bar], infinity)), 42 | ?assertMatch(ok, mria_status:wait_for_shards([foo, bar], infinity)), 43 | ?assertMatch([], flush()) 44 | after 45 | cleanup() 46 | end. 47 | 48 | %% Check that events published with different tags don't leave garbage messages behind 49 | notify_different_tags_test() -> 50 | optvar:init(), 51 | try 52 | spawn(fun() -> 53 | catch mria_status:notify_shard_up(foo, self()) 54 | end), 55 | spawn(fun() -> 56 | catch mria_status:notify_core_node_up(foo, node()) 57 | end), 58 | ?assertMatch(ok, mria_status:wait_for_shards([foo], infinity)), 59 | ?assertMatch([], flush()) 60 | after 61 | cleanup() 62 | end. 63 | 64 | %% Test waiting for core node 65 | get_core_node_test() -> 66 | optvar:init(), 67 | try 68 | Node = node(), 69 | spawn(fun() -> 70 | catch mria_status:notify_core_node_up(foo, Node) 71 | end), 72 | ?assertMatch({ok, Node}, mria_status:replica_get_core_node(foo, infinity)), 73 | ?assertMatch([], flush()) 74 | after 75 | cleanup() 76 | end. 77 | 78 | %% Check that waiting for shards with a finite timeout never hangs forever: 79 | wait_for_shards_timeout_test() -> 80 | optvar:init(), 81 | try 82 | spawn(fun() -> 83 | catch mria_status:notify_shard_up(foo, self()) 84 | end), 85 | spawn(fun() -> 86 | catch mria_status:notify_shard_up(bar, self()) 87 | end), 88 | Ret = mria_status:wait_for_shards([foo, bar], 100), 89 | case Ret of 90 | ok -> 91 | %% It should always succeed the second time: 92 | ?assertMatch(ok, mria_status:wait_for_shards([foo, bar], 100)); 93 | {timeout, Shards} -> 94 | case lists:sort(Shards) of 95 | [bar, foo] -> ok; 96 | [foo] -> ok; 97 | [bar] -> ok 98 | end 99 | end, 100 | ?assertMatch([], flush()) 101 | after 102 | %% Hack: set the variables to avoid "deadlocked" error from 103 | %% concuerror for the waker processes: 104 | mria_status:notify_shard_up(foo, self()), 105 | mria_status:notify_shard_up(bar, self()), 106 | cleanup() 107 | end. 108 | 109 | %% Check that waiting for events never results in infinite wait 110 | wait_for_shards_crash_test() -> 111 | optvar:init(), 112 | try 113 | spawn(fun() -> 114 | catch mria_status:notify_shard_up(foo, node()) 115 | end), 116 | spawn(fun() -> 117 | catch optvar:stop() 118 | end), 119 | %% Check the result: 120 | try mria_status:wait_for_shards([foo], 100) of 121 | ok -> 122 | %% It should always return `ok' the second time: 123 | ?assertMatch(ok, mria_status:wait_for_shards([foo], 100)); 124 | {timeout, _Shards} -> 125 | ok 126 | catch 127 | error:_ -> ok 128 | end, 129 | ?assertMatch([], flush()) 130 | after 131 | catch cleanup() 132 | end. 133 | 134 | %% Verify dirty bootstrap procedure (simplified). 135 | %% TODO: use real bootstrapper module? 136 | dirty_bootstrap_test() -> 137 | SourceTab = ets:new(source, [public, named_table]), 138 | ReplicaTab = ets:new(replica, [public, named_table]), 139 | %% Insert some initial data: 140 | ets:insert(source, {1, 1}), 141 | ets:insert(source, {2, 2}), 142 | ets:insert(source, {3, 3}), 143 | try 144 | register(testcase, self()), 145 | %% Spawn "replica" process. In the real code we have two 146 | %% processes: bootstrapper client and the replica 147 | %% process. Replica saves tlogs to the replayq while the 148 | %% bootstrapper client imports batches. Here we buffer tlogs in 149 | %% the message queue instead. 150 | Replica = spawn_link(fun replica/0), 151 | register(replica, Replica), 152 | %% "importer" process emulates mnesia_tm: 153 | spawn_link(fun importer/0), 154 | %% "bootstrapper" process emulates bootstrapper server: 155 | spawn_link(fun bootstrapper/0), 156 | receive 157 | done -> 158 | SrcData = lists:sort(ets:tab2list(source)), 159 | RcvData = lists:sort(ets:tab2list(replica)), 160 | ?assertEqual(SrcData, RcvData) 161 | end 162 | after 163 | ets:delete(SourceTab), 164 | ets:delete(ReplicaTab) 165 | end. 166 | 167 | importer() -> 168 | Ops = [ {write, 3, 3} 169 | , {write, 4, 4} 170 | , {update_counter, 1, 1} 171 | , {write, 4, 5} 172 | , {delete, 2} 173 | , {write, 5, 5} 174 | , {update_counter, 4, 1} 175 | , {write, 4, 3} 176 | , {delete, 5} 177 | ], 178 | lists:map(fun(Op) -> 179 | TransformedOp = import_op(source, Op), 180 | %% Imitate mnesia event (note: here we send it 181 | %% directly to the replica process bypassing 182 | %% the agent): 183 | replica ! {tlog, TransformedOp} 184 | end, 185 | Ops), 186 | replica ! last_trans. 187 | 188 | replica() -> 189 | receive 190 | {bootstrap, K, V} -> 191 | ets:insert(replica, {K, V}), 192 | replica(); 193 | bootstrap_done -> 194 | replay() 195 | end. 196 | 197 | replay() -> 198 | receive 199 | {tlog, Op} -> 200 | import_op(replica, Op), 201 | replay(); 202 | last_trans -> 203 | testcase ! done 204 | end. 205 | 206 | import_op(Tab, {write, K, V} = Op) -> 207 | ets:insert(Tab, {K, V}), 208 | Op; 209 | import_op(Tab, {delete, K} = Op) -> 210 | ets:delete(Tab, K), 211 | Op; 212 | import_op(Tab, {update_counter, K, Incr}) -> 213 | ets:update_counter(Tab, K, Incr), 214 | case ets:lookup(Tab, K) of 215 | [{_, NewVal}] -> 216 | {write, K, NewVal}; 217 | [] -> 218 | {delete, K} 219 | end. 220 | 221 | bootstrapper() -> 222 | {Keys, _} = lists:unzip(ets:tab2list(source)), 223 | [replica ! {bootstrap, K, V} || K <- Keys, {_, V} <- ets:lookup(source, K)], 224 | replica ! bootstrap_done. 225 | 226 | flush() -> 227 | receive 228 | A -> [A|flush()] 229 | after 100 -> 230 | [] 231 | end. 232 | 233 | cleanup() -> 234 | case is_concuerror() of 235 | true -> 236 | %% Cleanup causes more interleavings, skip it: 237 | ok; 238 | false -> 239 | catch optvar:stop() 240 | end. 241 | 242 | %% Hack to detect if running under concuerror: 243 | is_concuerror() -> 244 | code:is_loaded(concuerror) =/= false. 245 | -------------------------------------------------------------------------------- /test/mria_autoclean_SUITE.erl: -------------------------------------------------------------------------------- 1 | %%-------------------------------------------------------------------- 2 | %% Copyright (c) 2019-2021, 2023 EMQ Technologies Co., Ltd. All Rights Reserved. 3 | %% 4 | %% Licensed under the Apache License, Version 2.0 (the "License"); 5 | %% you may not use this file except in compliance with the License. 6 | %% You may obtain a copy of the License at 7 | %% 8 | %% http://www.apache.org/licenses/LICENSE-2.0 9 | %% 10 | %% Unless required by applicable law or agreed to in writing, software 11 | %% distributed under the License is distributed on an "AS IS" BASIS, 12 | %% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | %% See the License for the specific language governing permissions and 14 | %% limitations under the License. 15 | %%-------------------------------------------------------------------- 16 | 17 | -module(mria_autoclean_SUITE). 18 | 19 | -compile(export_all). 20 | -compile(nowarn_export_all). 21 | 22 | all() -> 23 | [t_autoclean]. 24 | 25 | init_per_suite(Config) -> 26 | mria_ct:start_dist(), 27 | Config. 28 | 29 | end_per_suite(_Config) -> 30 | ok. 31 | 32 | t_autoclean(_) -> 33 | Cluster = mria_ct:cluster([core, core], [{mria, cluster_autoclean, 1000}]), 34 | try 35 | [N0, N1] = mria_ct:start_cluster(mria, Cluster), 36 | [N0, N1] = rpc:call(N0, mria, info, [running_nodes]), 37 | mria_ct:stop_slave(N1), 38 | ok = timer:sleep(2000), 39 | [N0] = rpc:call(N0, mria, info, [running_nodes]) 40 | after 41 | mria_ct:teardown_cluster(Cluster) 42 | end. 43 | -------------------------------------------------------------------------------- /test/mria_autoheal_SUITE.erl: -------------------------------------------------------------------------------- 1 | %%-------------------------------------------------------------------- 2 | %% Copyright (c) 2019-2021, 2023 EMQ Technologies Co., Ltd. All Rights Reserved. 3 | %% 4 | %% Licensed under the Apache License, Version 2.0 (the "License"); 5 | %% you may not use this file except in compliance with the License. 6 | %% You may obtain a copy of the License at 7 | %% 8 | %% http://www.apache.org/licenses/LICENSE-2.0 9 | %% 10 | %% Unless required by applicable law or agreed to in writing, software 11 | %% distributed under the License is distributed on an "AS IS" BASIS, 12 | %% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | %% See the License for the specific language governing permissions and 14 | %% limitations under the License. 15 | %%-------------------------------------------------------------------- 16 | 17 | -module(mria_autoheal_SUITE). 18 | 19 | -compile(nowarn_export_all). 20 | -compile(export_all). 21 | 22 | -compile(nowarn_underscore_match). 23 | 24 | -include_lib("snabbkaffe/include/ct_boilerplate.hrl"). 25 | 26 | t_autoheal(Config) when is_list(Config) -> 27 | Cluster = mria_ct:cluster([core, core, core, core], [{mria, cluster_autoheal, 200}]), 28 | ?check_trace( 29 | #{timetrap => 25000}, 30 | try 31 | Nodes = [N1, N2, N3, N4] = mria_ct:start_cluster(mria, Cluster), 32 | %% Simulate netsplit 33 | true = rpc:cast(N4, erlang, disconnect_node, [N3]), 34 | ok = timer:sleep(1000), 35 | %% SplitView: [[N1,N2], [N3], [N4]] 36 | ?assertMatch({[N1, N2], [N3, N4]}, view(N1)), 37 | ?assertMatch({[N1, N2], [N3, N4]}, view(N2)), 38 | ?assertMatch({[N3], [N1, N2, N4]}, view(N3)), 39 | ?assertMatch({[N4], [N1, N2, N3]}, view(N4)), 40 | %% Wait for autoheal, it should happen automatically: 41 | ?retry(1000, 20, 42 | begin 43 | ?assertMatch({Nodes, []}, view(N1)), 44 | ?assertMatch({Nodes, []}, view(N2)), 45 | ?assertMatch({Nodes, []}, view(N3)), 46 | ?assertMatch({Nodes, []}, view(N4)) 47 | end), 48 | Nodes 49 | after 50 | ok = mria_ct:teardown_cluster(Cluster) 51 | end, 52 | [fun ?MODULE:prop_callbacks/1]). 53 | 54 | t_autoheal_with_replicants(Config) when is_list(Config) -> 55 | snabbkaffe:fix_ct_logging(), 56 | Cluster = mria_ct:cluster([ core 57 | , core 58 | , core 59 | , replicant 60 | , replicant 61 | ], 62 | [ {mria, cluster_autoheal, 200} 63 | | mria_mnesia_test_util:common_env() 64 | ]), 65 | ?check_trace( 66 | #{timetrap => 45_000}, 67 | try 68 | Nodes = [N1, N2, N3, N4, N5] = mria_ct:start_cluster(mria, Cluster), 69 | ok = mria_mnesia_test_util:wait_tables(Nodes), 70 | %% Simulate netsplit: 71 | true = rpc:cast(N1, erlang, disconnect_node, [N2]), 72 | %% Wait for the split to be detected: 73 | ?block_until(#{?snk_kind := mria_autoheal_partition}), 74 | %% Wait for autoheal, it should happen automatically: 75 | ?retry(1000, 20, 76 | begin 77 | Nodes = rpc:call(N1, mria, info, [running_nodes]), 78 | Nodes = rpc:call(N2, mria, info, [running_nodes]), 79 | Nodes = rpc:call(N3, mria, info, [running_nodes]), 80 | Nodes = rpc:call(N4, mria, info, [running_nodes]), 81 | Nodes = rpc:call(N5, mria, info, [running_nodes]), 82 | ok 83 | end), 84 | Nodes 85 | after 86 | ok = mria_ct:teardown_cluster(Cluster) 87 | end, 88 | [fun ?MODULE:prop_callbacks/1]). 89 | 90 | t_autoheal_majority_reachable(Config) when is_list(Config) -> 91 | Cluster = mria_ct:cluster([core, core, core, core, core], [{mria, cluster_autoheal, 200}]), 92 | ?check_trace( 93 | #{timetrap => 25000}, 94 | try 95 | Nodes = [N1, N2, N3, N4, N5] = mria_ct:start_cluster(mria, Cluster), 96 | %% Simulate netsplit 97 | true = rpc:cast(N4, erlang, disconnect_node, [N1]), 98 | true = rpc:cast(N5, erlang, disconnect_node, [N1]), 99 | ok = mria_ct:stop_slave(N5), 100 | ok = timer:sleep(1000), 101 | AliveMajorityNodes = [N1, N2, N3, N4], 102 | %% Wait for autoheal, it should happen automatically: 103 | ?retry(1000, 20, 104 | begin 105 | ?assertMatch({AliveMajorityNodes, [N5]}, view(N1)), 106 | ?assertMatch({AliveMajorityNodes, [N5]}, view(N2)), 107 | ?assertMatch({AliveMajorityNodes, [N5]}, view(N3)), 108 | ?assertMatch({AliveMajorityNodes, [N5]}, view(N4)) 109 | end), 110 | Nodes 111 | after 112 | ok = mria_ct:teardown_cluster(lists:sublist(Cluster, 4)) 113 | end, 114 | [fun ?MODULE:prop_callbacks/1]). 115 | 116 | todo_t_reboot_rejoin(Config) when is_list(Config) -> %% FIXME: Flaky and somewhat broken, disable for now 117 | CommonEnv = [ {mria, cluster_autoheal, 200} 118 | , {mria, db_backend, rlog} 119 | , {mria, lb_poll_interval, 100} 120 | ], 121 | Cluster = mria_ct:cluster([core, core, replicant, replicant], 122 | CommonEnv, 123 | [{base_gen_rpc_port, 9001}]), 124 | ?check_trace( 125 | #{timetrap => 60_000}, 126 | try 127 | AllNodes = [C1, C2, R1, R2] = mria_ct:start_cluster(node, Cluster), 128 | [?assertMatch(ok, mria_ct:rpc(N, mria, start, [])) || N <- AllNodes], 129 | [?assertMatch(ok, mria_ct:rpc(N, mria_transaction_gen, init, [])) || N <- AllNodes], 130 | [mria_ct:rpc(N, mria, join, [C2]) || N <- [R1, R2]], 131 | ?tp(about_to_join, #{}), 132 | %% performs a full "power cycle" in C2. 133 | ?assertMatch(ok, rpc:call(C2, mria, join, [C1])), 134 | %% we need to ensure that the rlog server for the shard is 135 | %% restarted, since it died during the "power cycle" from 136 | %% the join operation. 137 | timer:sleep(1000), 138 | ?assertMatch(ok, rpc:call(C2, mria_rlog, wait_for_shards, [[test_shard], 5000])), 139 | ?tp(notice, test_end, #{}), 140 | %% assert there's a single cluster at the end. 141 | mria_mnesia_test_util:wait_full_replication(Cluster, infinity), 142 | AllNodes 143 | after 144 | ok = mria_ct:teardown_cluster(Cluster) 145 | end, 146 | fun([C1, C2, R1, R2], Trace0) -> 147 | {_, Trace1} = ?split_trace_at(#{?snk_kind := about_to_join}, Trace0), 148 | {Trace, _} = ?split_trace_at(#{?snk_kind := test_end}, Trace1), 149 | TraceC2 = ?of_node(C2, Trace), 150 | %% C1 joins C2 151 | ?assert( 152 | ?strict_causality( #{ ?snk_kind := "Mria is restarting to join the cluster" 153 | , seed := C1 154 | } 155 | , #{ ?snk_kind := "Starting autoheal" 156 | } 157 | , TraceC2 158 | )), 159 | ?assert( 160 | ?strict_causality( #{ ?snk_kind := "Starting autoheal" 161 | } 162 | , #{ ?snk_kind := "Mria has joined the cluster" 163 | , seed := C1 164 | , status := #{ running_nodes := [_, _] 165 | } 166 | } 167 | , TraceC2 168 | )), 169 | ?assert( 170 | ?strict_causality( #{ ?snk_kind := "Mria has joined the cluster" 171 | , status := #{ running_nodes := [_, _] 172 | } 173 | } 174 | , #{ ?snk_kind := "starting_rlog_shard" 175 | , shard := test_shard 176 | } 177 | , TraceC2 178 | )), 179 | %% Replicants reboot and bootstrap shard data 180 | assert_replicant_bootstrapped(R1, C2, Trace), 181 | assert_replicant_bootstrapped(R2, C2, Trace) 182 | end). 183 | 184 | assert_replicant_bootstrapped(R, C, Trace) -> 185 | %% The core that the replicas are connected to is changing 186 | %% clusters 187 | ?assert( 188 | ?strict_causality( #{ ?snk_kind := "Mria is restarting to join the cluster" 189 | , ?snk_meta := #{ node := C } 190 | } 191 | , #{ ?snk_kind := "Remote RLOG agent died" 192 | , ?snk_meta := #{ node := R, shard := test_shard } 193 | } 194 | , Trace 195 | )), 196 | mria_rlog_props:replicant_bootstrap_stages(R, Trace), 197 | ok. 198 | 199 | %% Verify that mria callbacks have been executed during heal 200 | prop_callbacks(Trace0) -> 201 | {Trace, _} = ?split_trace_at(#{?snk_kind := teardown_cluster}, Trace0), 202 | {_, [HealEvent|AfterHeal]} = ?split_trace_at(#{?snk_kind := "Rebooting minority"}, Trace), 203 | #{nodes := Minority} = HealEvent, 204 | %% Check that all minority nodes have been restarted: 205 | [?assert( 206 | ?strict_causality( #{?snk_kind := mria_exec_callback, type := stop, ?snk_meta := #{node := N}} 207 | , #{?snk_kind := mria_exec_callback, type := start, ?snk_meta := #{node := N}} 208 | , AfterHeal 209 | )) 210 | || N <- Minority], 211 | %% Check that ONLY the minority nodes have been restarted: 212 | Restarted = lists:usort([Node || #{?snk_kind := mria_exec_callback, ?snk_meta := #{node := Node}} <- AfterHeal]), 213 | ?assertEqual(lists:sort(Minority), 214 | Restarted), 215 | true. 216 | 217 | init_per_suite(Config) -> 218 | mria_ct:start_dist(), 219 | Config. 220 | 221 | end_per_suite(_Config) -> 222 | ok. 223 | 224 | view(Node) -> 225 | Running = rpc:call(Node, mria, info, [running_nodes]), 226 | Stopped = rpc:call(Node, mria, info, [stopped_nodes]), 227 | {lists:sort(Running), lists:sort(Stopped)}. 228 | -------------------------------------------------------------------------------- /test/mria_compatibility_suite.erl: -------------------------------------------------------------------------------- 1 | %%-------------------------------------------------------------------- 2 | %% Copyright (c) 2023 EMQ Technologies Co., Ltd. All Rights Reserved. 3 | %% 4 | %% Licensed under the Apache License, Version 2.0 (the "License"); 5 | %% you may not use this file except in compliance with the License. 6 | %% You may obtain a copy of the License at 7 | %% 8 | %% http://www.apache.org/licenses/LICENSE-2.0 9 | %% 10 | %% Unless required by applicable law or agreed to in writing, software 11 | %% distributed under the License is distributed on an "AS IS" BASIS, 12 | %% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | %% See the License for the specific language governing permissions and 14 | %% limitations under the License. 15 | %%-------------------------------------------------------------------- 16 | 17 | %% @doc Test interoperability of different mria releases 18 | -module(mria_compatibility_suite). 19 | 20 | -compile(export_all). 21 | -compile(nowarn_export_all). 22 | -compile(nowarn_underscore_match). 23 | 24 | -include_lib("eunit/include/eunit.hrl"). 25 | -include_lib("snabbkaffe/include/snabbkaffe.hrl"). 26 | -include("mria_rlog.hrl"). 27 | 28 | releases() -> 29 | string:lexemes(os:cmd("git tag -l 0.3.*"), "\n"). 30 | 31 | matrix() -> 32 | Testcases = [t_core_core], %% TODO: add core_replicant test 33 | [{A, B, TC} || OldRel <- releases(), 34 | {A, B} <- [{"master", OldRel}, {OldRel, "master"}], 35 | TC <- Testcases, 36 | supported(A, B, TC)]. 37 | 38 | supported(_Rel1, _Rel2, _TC) -> 39 | true. 40 | 41 | t_core_core(Config, Rel1, Rel2) -> 42 | Cluster = mria_ct:cluster([ #{role => core, code_paths => code_paths(Config, Rel1)} 43 | , #{role => core, code_paths => code_paths(Config, Rel2)} 44 | ], []), 45 | ?check_trace( 46 | #{timetrap => 30_000}, 47 | try 48 | [N1, N2] = mria_ct:start_cluster(mria, Cluster), 49 | verify_version(Rel1, N1), 50 | verify_version(Rel2, N2), 51 | %% Check clustering: 52 | ?assertMatch([N1, N2], lists:sort(mria_ct:rpc(N1, mria_mnesia, db_nodes, []))), 53 | ?assertMatch([N1, N2], lists:sort(mria_ct:rpc(N2, mria_mnesia, db_nodes, []))) 54 | after 55 | mria_ct:teardown_cluster(Cluster), 56 | mria_ct:cleanup(?FUNCTION_NAME) 57 | end, 58 | common_checks()). 59 | 60 | verify_version("master", _Node) -> 61 | ok; 62 | verify_version(Rel, Node) -> 63 | %% Paranoid check: make sure that the code running on the remote 64 | %% node does indeed match the expected release. There are too many 65 | %% things that can break this assumption, for example cover 66 | %% compilation: 67 | ?assertEqual({ok, Rel}, mria_ct:rpc(Node, application, get_key, [mria, vsn])), 68 | Src = proplists:get_value(source, mria_ct:rpc(Node, mria, module_info, [compile])), 69 | ?assertMatch({match, _}, re:run(Src, "oldrel/" ++ Rel)), 70 | ok. 71 | 72 | common_checks() -> 73 | [ fun mria_rlog_props:replicant_no_restarts/1 74 | , fun mria_rlog_props:no_unexpected_events/1 75 | , fun mria_rlog_props:no_split_brain/1 76 | ]. 77 | 78 | all() -> 79 | [t_run_all]. 80 | 81 | init_per_suite(Config) -> 82 | mria_ct:start_dist(), 83 | snabbkaffe:fix_ct_logging(), 84 | RootDir = root_dir(), 85 | Releases = [{Rel, prep_release(RootDir, Rel)} || Rel <- releases()], 86 | [{releases, Releases} | Config]. 87 | 88 | init_per_testcase(_, Config) -> 89 | Config. 90 | 91 | end_per_testcase(TestCase, Config) -> 92 | mria_ct:cleanup(TestCase), 93 | snabbkaffe:stop(), 94 | Config. 95 | 96 | end_per_suite(_Config) -> 97 | ok. 98 | 99 | t_run_all(Config) -> 100 | Matrix = matrix(), 101 | Results = 102 | lists:map( 103 | fun({Rel1, Rel2, TC}) -> 104 | try 105 | ?MODULE:TC(Config, Rel1, Rel2), 106 | logger:notice(asciiart:visible($%, "~p: ~p -> ~p: OK", [TC, Rel1, Rel2])) 107 | catch 108 | _:_ -> 109 | logger:error(asciiart:visible($!, "~p: ~p -> ~p: FAIL", [TC, Rel1, Rel2])), 110 | false 111 | end 112 | end, 113 | Matrix), 114 | ?assert(true, lists:all(fun(A) -> A end, Results)). 115 | 116 | code_paths(_Config, "master") -> 117 | mria_ct:master_code_paths(); 118 | code_paths(Config, Rel) -> 119 | RelDir = proplists:get_value(Rel, proplists:get_value(releases, Config)), 120 | Rocksdb = filename:join(root_dir(), "_build/test/lib/rocksdb/ebin"), 121 | CodePaths = filelib:wildcard(filename:join(RelDir, "_build/test/lib/*/ebin")), 122 | [Rocksdb|CodePaths]. 123 | 124 | prep_release(RootDir, Tag) -> 125 | TmpDir = filename:join([RootDir, "_build", "oldrel", Tag]), 126 | ok = filelib:ensure_dir(TmpDir), 127 | 0 = cmd( filename:join(RootDir, "scripts/build-old-rel") 128 | , [ {env, [ {"tag", Tag} 129 | , {"tmp_dir", TmpDir} 130 | , {"root_dir", RootDir} 131 | ]} 132 | ] 133 | ), 134 | TmpDir. 135 | 136 | cmd(Cmd, Opts) -> 137 | Port = open_port({spawn_executable, Cmd}, [exit_status, nouse_stdio|Opts]), 138 | receive 139 | {Port, {exit_status, Status}} -> 140 | Status 141 | end. 142 | 143 | root_dir() -> 144 | [RootDir] = string:lexemes(os:cmd("git rev-parse --show-toplevel"), "\n"), 145 | RootDir. 146 | -------------------------------------------------------------------------------- /test/mria_fault_tolerance_suite.erl: -------------------------------------------------------------------------------- 1 | %%-------------------------------------------------------------------- 2 | %% Copyright (c) 2021-2023 EMQ Technologies Co., Ltd. All Rights Reserved. 3 | %% 4 | %% Licensed under the Apache License, Version 2.0 (the "License"); 5 | %% you may not use this file except in compliance with the License. 6 | %% You may obtain a copy of the License at 7 | %% 8 | %% http://www.apache.org/licenses/LICENSE-2.0 9 | %% 10 | %% Unless required by applicable law or agreed to in writing, software 11 | %% distributed under the License is distributed on an "AS IS" BASIS, 12 | %% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | %% See the License for the specific language governing permissions and 14 | %% limitations under the License. 15 | %%-------------------------------------------------------------------- 16 | 17 | %% Random error injection suite. 18 | %% 19 | %% Tests that use error injection should go here, to avoid polluting 20 | %% the logs and scaring people 21 | -module(mria_fault_tolerance_suite). 22 | 23 | -compile(export_all). 24 | -compile(nowarn_export_all). 25 | 26 | -include_lib("eunit/include/eunit.hrl"). 27 | -include_lib("snabbkaffe/include/snabbkaffe.hrl"). 28 | 29 | -compile(nowarn_underscore_match). 30 | 31 | all() -> mria_ct:all(?MODULE). 32 | 33 | init_per_suite(Config) -> 34 | mria_ct:start_dist(), 35 | Config. 36 | 37 | end_per_suite(_Config) -> 38 | ok. 39 | 40 | init_per_testcase(_TestCase, Config) -> 41 | Config. 42 | 43 | end_per_testcase(TestCase, Config) -> 44 | mria_ct:cleanup(TestCase), 45 | snabbkaffe:stop(), 46 | Config. 47 | 48 | t_agent_restart(_) -> 49 | Cluster = mria_ct:cluster([core, core, replicant], mria_mnesia_test_util:common_env()), 50 | CounterKey = counter, 51 | ?check_trace( 52 | #{timetrap => 60000}, 53 | try 54 | Nodes = [N1, _N2, N3] = mria_ct:start_cluster(mria, Cluster), 55 | mria_mnesia_test_util:wait_tables(Nodes), 56 | mria_mnesia_test_util:stabilize(1000), 57 | %% Everything in mria agent will crash 58 | CrashRef = ?inject_crash( #{?snk_meta := #{domain := [mria, rlog, agent|_]}} 59 | , snabbkaffe_nemesis:random_crash(0.4) 60 | ), 61 | ok = rpc:call(N1, mria_transaction_gen, counter, [CounterKey, 100, 100]), 62 | complete_test(CrashRef, Cluster, Nodes), 63 | N3 64 | after 65 | mria_ct:teardown_cluster(Cluster) 66 | end, 67 | fun(N3, Trace) -> 68 | ?assert(mria_rlog_props:replicant_bootstrap_stages(N3, Trace)), 69 | mria_rlog_props:counter_import_check(CounterKey, N3, Trace), 70 | ?assert(length(?of_kind(snabbkaffe_crash, Trace)) > 1), 71 | mria_rlog_props:no_unexpected_events(Trace) 72 | end). 73 | 74 | %% Check that an agent dies if its subscriber dies. 75 | t_rlog_agent_linked_to_subscriber(_) -> 76 | Cluster = mria_ct:cluster([core, replicant], mria_mnesia_test_util:common_env()), 77 | ?check_trace( 78 | #{timetrap => 10000}, 79 | try 80 | Nodes = [_N1, N2] = mria_ct:start_cluster(mria, Cluster), 81 | mria_mnesia_test_util:wait_tables(Nodes), 82 | ReplicantPid = erpc:call(N2, erlang, whereis, [test_shard]), 83 | Ref = monitor(process, ReplicantPid), 84 | exit(ReplicantPid, kill), 85 | receive 86 | {'DOWN', Ref, process, ReplicantPid, killed} -> 87 | ok 88 | end, 89 | ?block_until(#{?snk_kind := rlog_agent_started}), 90 | mria_mnesia_test_util:wait_tables(Nodes), 91 | ?tp(test_end, #{}), 92 | {N2, ReplicantPid} 93 | after 94 | mria_ct:teardown_cluster(Cluster) 95 | end, 96 | fun(Subscriber, Trace0) -> 97 | {Trace, _} = ?split_trace_at(#{?snk_kind := test_end}, Trace0), 98 | ?assertMatch( 99 | [#{ ?snk_kind := rlog_agent_terminating 100 | , subscriber := Subscriber 101 | , shard := test_shard 102 | , reason := {shutdown, {subscriber_died, killed}} 103 | }], 104 | ?of_kind(rlog_agent_terminating, Trace)), 105 | mria_rlog_props:no_unexpected_events(Trace), 106 | ok 107 | end). 108 | 109 | t_rand_error_injection(_) -> 110 | Cluster = mria_ct:cluster([core, core, replicant], mria_mnesia_test_util:common_env()), 111 | CounterKey = counter, 112 | ?check_trace( 113 | #{timetrap => 60000}, 114 | try 115 | Nodes = [N1, _N2, N3] = mria_ct:start_cluster(mria, Cluster), 116 | mria_mnesia_test_util:wait_tables(Nodes), 117 | mria_mnesia_test_util:stabilize(1000), 118 | %% Everything in mria RLOG will crash 119 | CrashRef = ?inject_crash( #{?snk_meta := #{domain := [mria, rlog|_]}} 120 | , snabbkaffe_nemesis:random_crash(0.01) 121 | ), 122 | ok = rpc:call(N1, mria_transaction_gen, counter, [CounterKey, 300, 100]), 123 | complete_test(CrashRef, Cluster, Nodes), 124 | N3 125 | after 126 | mria_ct:teardown_cluster(Cluster) 127 | end, 128 | fun(N3, Trace) -> 129 | ?assert(mria_rlog_props:replicant_bootstrap_stages(N3, Trace)), 130 | ?assert(mria_rlog_props:counter_import_check(CounterKey, N3, Trace) > 0), 131 | mria_rlog_props:no_unexpected_events(Trace) 132 | end). 133 | 134 | %% This testcase verifies verifies various modes of mria:ro_transaction 135 | t_sum_verify(_) -> 136 | Cluster = mria_ct:cluster([core, replicant], mria_mnesia_test_util:common_env()), 137 | NTrans = 100, 138 | ?check_trace( 139 | #{timetrap => 60000}, 140 | try 141 | Nodes = mria_ct:start_cluster(mria, Cluster), 142 | mria_mnesia_test_util:wait_tables(Nodes), 143 | %% Everything in mria RLOG will crash 144 | ?inject_crash( #{?snk_meta := #{domain := [mria, rlog|_]}} 145 | , snabbkaffe_nemesis:random_crash(0.1) 146 | ), 147 | [rpc:async_call(N, mria_transaction_gen, verify_trans_sum, [NTrans, 100]) 148 | || N <- lists:reverse(Nodes)], 149 | [?block_until(#{?snk_kind := verify_trans_sum, node := N}) 150 | || N <- Nodes] 151 | after 152 | mria_ct:teardown_cluster(Cluster) 153 | end, 154 | fun(Trace) -> 155 | ?assertMatch( [ok, ok] 156 | , ?projection(result, ?of_kind(verify_trans_sum, Trace)) 157 | ) 158 | end). 159 | 160 | t_rlog_replica_reconnect(_) -> 161 | Cluster = mria_ct:cluster([core, replicant], mria_mnesia_test_util:common_env()), 162 | NTrans = 200, 163 | CounterKey = counter_key, 164 | ?check_trace( 165 | #{timetrap => NTrans * 10 + 30000}, 166 | try 167 | Nodes = [N1, _N2] = mria_ct:start_cluster(mria_async, Cluster), 168 | ok = mria_mnesia_test_util:wait_tables(Nodes), 169 | {atomic, _} = rpc:call(N1, mria_transaction_gen, create_data, []), 170 | %% consume a few transactions in the first incarnation 171 | ok = rpc:call(N1, mria_transaction_gen, counter, [CounterKey, NTrans - 101]), 172 | mria_mnesia_test_util:stabilize(1000), 173 | mria_mnesia_test_util:compare_table_contents(test_tab, Nodes), 174 | CrashRef = ?inject_crash( #{?snk_meta := #{domain := [mria, rlog, replica | _]}} 175 | , snabbkaffe_nemesis:recover_after(1) 176 | ), 177 | %% consume a few more in the second incarnation 178 | ok = rpc:call(N1, mria_transaction_gen, counter, [CounterKey, NTrans]), 179 | mria_mnesia_test_util:stabilize(5000), 180 | snabbkaffe_nemesis:fix_crash(CrashRef), 181 | mria_mnesia_test_util:wait_full_replication(Cluster), 182 | mria_mnesia_test_util:compare_table_contents(test_tab, Nodes), 183 | Nodes 184 | after 185 | mria_ct:teardown_cluster(Cluster) 186 | end, 187 | fun(Trace) -> 188 | Seqnos = [SN || #{?snk_kind := "Connected to the core node", shard := test_shard, seqno := SN} <- Trace], 189 | snabbkaffe:increasing(Seqnos), 190 | mria_rlog_props:no_unexpected_events(Trace) 191 | end). 192 | 193 | %% Remove the injected errors and check table consistency 194 | complete_test(CrashRef, Cluster, Nodes) -> 195 | mria_mnesia_test_util:stabilize(5100), 196 | snabbkaffe_nemesis:fix_crash(CrashRef), 197 | mria_mnesia_test_util:wait_full_replication(Cluster), 198 | mria_mnesia_test_util:compare_table_contents(test_tab, Nodes). 199 | -------------------------------------------------------------------------------- /test/mria_helper_tab.erl: -------------------------------------------------------------------------------- 1 | %%-------------------------------------------------------------------- 2 | %% Copyright (c) 2021-2023 EMQ Technologies Co., Ltd. All Rights Reserved. 3 | %% 4 | %% Licensed under the Apache License, Version 2.0 (the "License"); 5 | %% you may not use this file except in compliance with the License. 6 | %% You may obtain a copy of the License at 7 | %% 8 | %% http://www.apache.org/licenses/LICENSE-2.0 9 | %% 10 | %% Unless required by applicable law or agreed to in writing, software 11 | %% distributed under the License is distributed on an "AS IS" BASIS, 12 | %% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | %% See the License for the specific language governing permissions and 14 | %% limitations under the License. 15 | %%-------------------------------------------------------------------- 16 | 17 | %% This module contains definitions that are used for working with the 18 | %% special marker tab that we're using for storing test metadata. 19 | -module(mria_helper_tab). 20 | 21 | -export([ init/0 22 | , wait_full_replication/1 23 | , wait_full_replication/2 24 | ]). 25 | 26 | -define(TABLE, ?MODULE). 27 | 28 | -record(?TABLE, {key, val}). 29 | 30 | -include_lib("snabbkaffe/include/snabbkaffe.hrl"). 31 | 32 | init() -> 33 | ok = mria:create_table(?TABLE, [{type, ordered_set}, 34 | {rlog_shard, test_shard}, 35 | {storage, ram_copies}, 36 | {record_name, ?TABLE}, 37 | {attributes, record_info(fields, ?TABLE)} 38 | ]). 39 | 40 | wait_full_replication(Cluster) -> 41 | wait_full_replication(Cluster, infinity). 42 | 43 | %% Emit a special transaction and wait until all replicants consume it. 44 | wait_full_replication(Cluster, Timeout) -> 45 | %% Wait until all nodes are healthy: 46 | [rpc:call(Node, mria_rlog, wait_for_shards, [[test_shard], infinity]) 47 | || #{node := Node} <- Cluster], 48 | %% Emit a transaction and wait for replication: 49 | [CoreNode|_] = [N || #{node := N, role := core} <- Cluster], 50 | Ref = make_ref(), 51 | emit_last_transaction(CoreNode, Ref), 52 | [{ok, _} = ?block_until(#{ ?snk_kind := rlog_import_trans 53 | , ops := [{write, ?TABLE, #?TABLE{key = '$seal', val = Ref}}] 54 | , ?snk_meta := #{node := N} 55 | }, Timeout, infinity) 56 | || #{node := N, role := replicant} <- Cluster], 57 | ok. 58 | 59 | %% We use this transaction to indicate the end of the testcase. 60 | emit_last_transaction(Node, Ref) -> 61 | Fun = fun() -> 62 | mnesia:write(#?TABLE{key = '$seal', val = Ref}) 63 | end, 64 | {atomic, ok} = rpc:call(Node, mria, transaction, [test_shard, Fun]). 65 | -------------------------------------------------------------------------------- /test/mria_mnesia_SUITE_data/cluster_benchmark/benchmark.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Run full cluster benchmark 3 | set -eu pipefail 4 | 5 | # Perform OS check (we need `netem' and `iptables' features to inject 6 | # faults/delays into the system, those are Linux-only): 7 | [ $(uname) = Linux ] || { 8 | echo "Sorry, this script relies on some Linux IP stack features, and only works on Linux"; 9 | exit 1; 10 | } 11 | 12 | export SCRIPT_DIR=$(dirname $0) 13 | 14 | # Start nemesis process: 15 | echo "Root permissions are needed to start nemesis process" 16 | sudo -b ${SCRIPT_DIR}/nemesis.sh 17 | 18 | # Run benchmark: 19 | rebar3 do ct --name ct@127.0.0.1 --suite mria_mnesia_SUITE --case cluster_benchmark --readable=true 20 | 21 | # Collect stats: 22 | ${SCRIPT_DIR}/latency_graph.gp 23 | -------------------------------------------------------------------------------- /test/mria_mnesia_SUITE_data/cluster_benchmark/latency_graph.gp: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env -S gnuplot -persist 2 | # GNUplot script to render plots of mria_mnesia performance 3 | 4 | set datafile separator ',' 5 | 6 | set ylabel "Transaction time (μs)" 7 | set xlabel "Cluster size" 8 | 9 | set key outside right \ 10 | title 'Network latency (μs)' 11 | 12 | plot for [col=2:*] '/tmp/mnesia_stats.csv' using 1:col with linespoints pt 4 lc col dashtype 2 t 'mnesia '.columnhead(col), \ 13 | for [col=2:*] '/tmp/mria_mnesia_stats.csv' using 1:col with linespoints pt 2 lc col t 'rlog '.columnhead(col) 14 | -------------------------------------------------------------------------------- /test/mria_mnesia_SUITE_data/cluster_benchmark/nemesis.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # This script runs as root, receives commands from the common test 3 | # suite over FIFO, and forwards them to slowdown.sh 4 | set -uo pipefail 5 | 6 | FIFO=/tmp/nemesis 7 | 8 | if [ -p $FIFO ]; then 9 | echo "Nemesis is already running" 10 | exit 0 11 | fi 12 | 13 | trap "rm -f $FIFO" EXIT 14 | 15 | mkfifo $FIFO 16 | chmod 666 $FIFO 17 | 18 | while true; do 19 | if read line < $FIFO; then 20 | echo "Received command ${line}" 21 | $(dirname $0)/slowdown.sh -d $line -j 1 epmd 22 | fi 23 | done 24 | -------------------------------------------------------------------------------- /test/mria_mnesia_SUITE_data/cluster_benchmark/slowdown.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -euo pipefail 3 | 4 | [ $(uname) = Linux ] || { 5 | echo "Sorry, this script only works on Linux"; 6 | exit 1; 7 | } 8 | 9 | [ -z ${1+1} ] && { 10 | echo "Emulate network latency on the localhost. 11 | 12 | USAGE: 13 | 14 | $(basename $0) [ -d DELAY ] [ -j JITTER ] [ -r RATE ] [PORT1 PORT2 ...] 15 | 16 | It is possible to specify PORT as 'empd' to apply delay to the 17 | distribution ports of all running BEAM VMs (excluding the CT 18 | master). 19 | 20 | Both DELAY and JITTER should be more than 0 21 | 22 | Port can be: 23 | 24 | 1. A number: it will apply netem on the messages that are sent to 25 | and from the port 26 | 27 | 2. A number with 'd' prefix (e.g. d1883): it will apply netem on 28 | the messages sent to the port 29 | 30 | 3. A number with 's' prefix (e.g. s1883): it will apply netem on 31 | the messages sent from the port 32 | 33 | 4. 'epmd': It will apply delay to all ports registered in epmd and 34 | apply netem to all erlang distribution protocol connections. 35 | 36 | EXAMPLE: 37 | 38 | $(basename $0) -d 500ms -j 1 -r 10kbps 8001 s8002 empd 39 | " 40 | exit 1; 41 | } 42 | 43 | DELAY="10ms" 44 | JITTER=1 45 | INTERFACE=lo 46 | RATE=1000Mbps 47 | 48 | while getopts "r:i:d:j:" flag; do 49 | case "$flag" in 50 | i) INTERFACE="$OPTARG";; 51 | d) DELAY="$OPTARG";; 52 | j) JITTER="$OPTARG";; 53 | r) RATE="$OPTARG";; 54 | esac 55 | done 56 | shift $((OPTIND-1)) 57 | 58 | CHAIN="OUTPUT" 59 | # Clean up: 60 | iptables -t mangle -F "$CHAIN" || true 61 | tc qdisc del dev "$INTERFACE" root || true 62 | 63 | echo "Delay=${DELAY} jitter=${JITTER} rate=${RATE} interface=${INTERFACE}" 64 | 65 | # Shape packets marked as 12 66 | MARK=12 67 | ID=$MARK 68 | tc qdisc add dev "$INTERFACE" root handle 1: htb 69 | tc class add dev "$INTERFACE" parent 1: classid 1:$ID htb rate "$RATE" 70 | # tc qdisc add dev "$INTERFACE" root netem rate "$RATE" delay "$DELAY" "$JITTER" 71 | tc qdisc add dev "$INTERFACE" parent 1:$ID handle $MARK netem delay $DELAY $JITTER distribution normal 72 | tc filter add dev "$INTERFACE" parent 1: prio 1 protocol ip handle $MARK fw flowid 1:$ID 73 | 74 | mark() { 75 | echo "Applying netem on $1 $2" 76 | iptables -A "${CHAIN}" -p tcp -t mangle -j MARK --set-mark $MARK $1 $2 77 | } 78 | 79 | # Create firewall rules to mark the packets: 80 | mark_port() { 81 | local PORT=$1 82 | if [[ $PORT =~ ^([0-9]+)$ ]]; then 83 | mark --sport $PORT 84 | mark --dport $PORT 85 | elif [[ $PORT =~ ^s([0-9]+)$ ]]; then 86 | PORT=${BASH_REMATCH[1]} 87 | mark --sport $PORT 88 | elif [[ $PORT =~ ^d([0-9]+)$ ]]; then 89 | PORT=${BASH_REMATCH[1]} 90 | mark --dport $PORT 91 | fi 92 | } 93 | 94 | while [ ! -z ${1+1} ]; do 95 | PORT=$1 96 | shift 97 | if [ $PORT = epmd ]; then 98 | for i in $(epmd -names | grep -v 'ct@' | awk '/at port/{print $5}'); do 99 | mark_port $i 100 | done 101 | else 102 | mark_port $PORT 103 | fi 104 | done 105 | -------------------------------------------------------------------------------- /test/mria_mnesia_test_util.erl: -------------------------------------------------------------------------------- 1 | %%-------------------------------------------------------------------- 2 | %% Copyright (c) 2021-2023 EMQ Technologies Co., Ltd. All Rights Reserved. 3 | %% 4 | %% Licensed under the Apache License, Version 2.0 (the "License"); 5 | %% you may not use this file except in compliance with the License. 6 | %% You may obtain a copy of the License at 7 | %% 8 | %% http://www.apache.org/licenses/LICENSE-2.0 9 | %% 10 | %% Unless required by applicable law or agreed to in writing, software 11 | %% distributed under the License is distributed on an "AS IS" BASIS, 12 | %% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | %% See the License for the specific language governing permissions and 14 | %% limitations under the License. 15 | %%-------------------------------------------------------------------- 16 | 17 | -module(mria_mnesia_test_util). 18 | 19 | -export([stabilize/1, wait_tables/1, common_env/0, 20 | compare_table_contents/2, wait_full_replication/1, 21 | wait_full_replication/2]). 22 | 23 | -include_lib("snabbkaffe/include/snabbkaffe.hrl"). 24 | -include_lib("stdlib/include/assert.hrl"). 25 | 26 | wait_full_replication(Cluster) -> 27 | mria_helper_tab:wait_full_replication(Cluster). 28 | 29 | wait_full_replication(Cluster, Timeout) -> 30 | mria_helper_tab:wait_full_replication(Cluster, Timeout). 31 | 32 | stabilize(Timeout) -> 33 | case ?block_until(#{?snk_meta := #{domain := [mria, rlog|_]}}, Timeout, 0) of 34 | timeout -> ok; 35 | {ok, _Evt} -> 36 | %%ct:pal("Restart waiting for cluster stabilize sue to ~p", [_Evt]), 37 | stabilize(Timeout) 38 | end. 39 | 40 | wait_tables(Nodes) -> 41 | ?tp(mria_test_util_waiting_for_tables, #{nodes => Nodes}), 42 | [?block_until(#{?snk_kind := mria_ct_cluster_join, node := Node}) 43 | || Node <- Nodes], 44 | Tables = [test_tab, test_bag, mria_helper_tab], 45 | {Rep, BadNodes} = rpc:multicall(Nodes, mria, wait_for_tables, [Tables], infinity), 46 | case lists:all(fun(A) -> A =:= ok end, Rep) andalso BadNodes =:= [] of 47 | true -> 48 | ok; 49 | false -> 50 | ?panic(failed_waiting_for_test_tables, 51 | #{ badnodes => BadNodes 52 | , replies => Rep 53 | , nodes => Nodes 54 | }) 55 | end. 56 | 57 | compare_table_contents(_, []) -> 58 | ok; 59 | compare_table_contents(Table, Nodes) -> 60 | MS = [{'_', [], ['$_']}], 61 | [{_, Reference}|Rest] = [{Node, lists:sort(rpc:call(Node, mnesia, dirty_select, [Table, MS]))} 62 | || Node <- Nodes], 63 | lists:foreach( 64 | fun({Node, Contents}) -> 65 | ?assertEqual({Node, Reference}, {Node, Contents}) 66 | end, 67 | Rest). 68 | 69 | common_env() -> 70 | [ {mria, db_backend, rlog} 71 | , {mria, rlog_startup_shards, [test_shard]} 72 | , {mria, strict_mode, true} 73 | , {mria, rpc_module, gen_rpc} 74 | , {mria, lb_poll_interval, 100} 75 | ]. 76 | -------------------------------------------------------------------------------- /test/mria_node_monitor_SUITE.erl: -------------------------------------------------------------------------------- 1 | %%-------------------------------------------------------------------- 2 | %% Copyright (c) 2019-2023 EMQ Technologies Co., Ltd. All Rights Reserved. 3 | %% 4 | %% Licensed under the Apache License, Version 2.0 (the "License"); 5 | %% you may not use this file except in compliance with the License. 6 | %% You may obtain a copy of the License at 7 | %% 8 | %% http://www.apache.org/licenses/LICENSE-2.0 9 | %% 10 | %% Unless required by applicable law or agreed to in writing, software 11 | %% distributed under the License is distributed on an "AS IS" BASIS, 12 | %% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | %% See the License for the specific language governing permissions and 14 | %% limitations under the License. 15 | %%-------------------------------------------------------------------- 16 | 17 | -module(mria_node_monitor_SUITE). 18 | 19 | -compile(export_all). 20 | -compile(nowarn_export_all). 21 | 22 | -include_lib("eunit/include/eunit.hrl"). 23 | 24 | all() -> 25 | mria_ct:all(?MODULE). 26 | 27 | init_per_suite(Config) -> 28 | mria:start(), 29 | mria_ct:start_dist(), 30 | Config. 31 | 32 | end_per_suite(_Config) -> 33 | ok = mria:stop(). 34 | 35 | t_cast_heartbeat(_) -> 36 | ok = mria_node_monitor:cast(node(), heartbeat). 37 | 38 | t_cast_suspect(_) -> 39 | ok = mria_node_monitor:cast(node(), {suspect, 'n1@127.0.0.1', 'n2@127.0.0.1'}). 40 | 41 | t_cast_confirm(_) -> 42 | ok = mria_node_monitor:cast(node(), {confirm, 'n1@127.0.0.1', down}). 43 | 44 | t_cast_report_partition(_) -> 45 | ok = mria_node_monitor:cast(node, {report_partition, 'n1@127.0.0.1'}). 46 | 47 | t_cast_heal_partition(_) -> 48 | ok = mria_node_monitor:cast(node, {heal_partition, ['n1@127.0.0.1']}). 49 | 50 | t_handle_nodeup_info(_) -> 51 | mria_node_monitor ! {nodeup, 'n1@127.0.0.1', []}. 52 | 53 | t_handle_nodedown_info(_) -> 54 | mria_node_monitor ! {nodedown, 'n1@127.0.0.1', []}. 55 | 56 | t_run_after(_) -> 57 | TRef = mria_node_monitor:run_after(100, heartbeat), 58 | ?assert(is_reference(TRef)). 59 | 60 | t_partitions(_) -> 61 | [] = mria_node_monitor:partitions(). 62 | 63 | t_handle_unexpected(_) -> 64 | {reply, ignore, state} = mria_node_monitor:handle_call(req, from, state), 65 | {noreply, state} = mria_node_monitor:handle_cast(msg, state), 66 | {noreply, state} = mria_node_monitor:handle_info(info, state). 67 | -------------------------------------------------------------------------------- /test/mria_proper_mixed_cluster_suite.erl: -------------------------------------------------------------------------------- 1 | %%-------------------------------------------------------------------- 2 | %% Copyright (c) 2021-2023 EMQ Technologies Co., Ltd. All Rights Reserved. 3 | %% 4 | %% Licensed under the Apache License, Version 2.0 (the "License"); 5 | %% you may not use this file except in compliance with the License. 6 | %% You may obtain a copy of the License at 7 | %% 8 | %% http://www.apache.org/licenses/LICENSE-2.0 9 | %% 10 | %% Unless required by applicable law or agreed to in writing, software 11 | %% distributed under the License is distributed on an "AS IS" BASIS, 12 | %% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | %% See the License for the specific language governing permissions and 14 | %% limitations under the License. 15 | %%-------------------------------------------------------------------- 16 | 17 | %% Test database consistency with random transactions 18 | -module(mria_proper_mixed_cluster_suite). 19 | 20 | -compile(export_all). 21 | -compile(nowarn_export_all). 22 | 23 | -include_lib("snabbkaffe/include/ct_boilerplate.hrl"). 24 | -include("mria_proper_utils.hrl"). 25 | 26 | %%================================================================================ 27 | %% Testcases 28 | %%================================================================================ 29 | 30 | t_import_transactions_mixed_cluster(Config0) when is_list(Config0) -> 31 | Config = [{proper, #{max_size => 300, 32 | numtests => 100, 33 | timeout => 100000 34 | }} | Config0], 35 | ClusterConfig = [ core 36 | , {core, [{mria, db_backend, mnesia}]} 37 | , replicant 38 | ], 39 | ?run_prop(Config, mria_proper_utils:prop(ClusterConfig, ?MODULE)). 40 | 41 | %%================================================================================ 42 | %% Proper FSM definition 43 | %%================================================================================ 44 | 45 | %% Initial model value at system start. Should be deterministic. 46 | initial_state() -> 47 | #s{cores = [n1, n2], replicants = [n3]}. 48 | 49 | command(State) -> mria_proper_utils:command(State). 50 | precondition(_State, {call, _Mod, execute, [_Node, Op]}) -> 51 | %% With more than one core, a race condition involving a 52 | %% `dirty_write' / `dirty_delete' pair of ops happening on 53 | %% different cores can arise: one of the cores might process the 54 | %% dirty ops in a different order than what the state machine 55 | %% expects, thus violating the model consistency. Since this is 56 | %% inherent to mnesia, for this test we simply forbid dirty 57 | %% operations altogether. 58 | case Op of 59 | {dirty, _} -> false; 60 | _ -> true 61 | end; 62 | precondition(State, Op) -> mria_proper_utils:precondition(State, Op). 63 | postcondition(State, Op, Res) -> mria_proper_utils:postcondition(State, Op, Res). 64 | next_state(State, Res, Op) -> mria_proper_utils:next_state(State, Res, Op). 65 | 66 | init_per_suite(Config) -> 67 | mria_ct:start_dist(), 68 | Config. 69 | 70 | end_per_suite(_Config) -> 71 | ok. 72 | -------------------------------------------------------------------------------- /test/mria_proper_suite.erl: -------------------------------------------------------------------------------- 1 | %%-------------------------------------------------------------------- 2 | %% Copyright (c) 2021-2023 EMQ Technologies Co., Ltd. All Rights Reserved. 3 | %% 4 | %% Licensed under the Apache License, Version 2.0 (the "License"); 5 | %% you may not use this file except in compliance with the License. 6 | %% You may obtain a copy of the License at 7 | %% 8 | %% http://www.apache.org/licenses/LICENSE-2.0 9 | %% 10 | %% Unless required by applicable law or agreed to in writing, software 11 | %% distributed under the License is distributed on an "AS IS" BASIS, 12 | %% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | %% See the License for the specific language governing permissions and 14 | %% limitations under the License. 15 | %%-------------------------------------------------------------------- 16 | 17 | %% Test database consistency with random transactions 18 | -module(mria_proper_suite). 19 | 20 | -compile(export_all). 21 | -compile(nowarn_export_all). 22 | 23 | -include_lib("snabbkaffe/include/ct_boilerplate.hrl"). 24 | -include("mria_proper_utils.hrl"). 25 | 26 | %%================================================================================ 27 | %% Testcases 28 | %%================================================================================ 29 | 30 | t_import_transactions(Config0) when is_list(Config0) -> 31 | Config = [{proper, #{max_size => 300, 32 | numtests => 100, 33 | timeout => 100000 34 | }} | Config0], 35 | ClusterConfig = [core, replicant], 36 | ?run_prop(Config, mria_proper_utils:prop(ClusterConfig, ?MODULE)). 37 | 38 | %%================================================================================ 39 | %% Proper FSM definition 40 | %%================================================================================ 41 | 42 | %% Initial model value at system start. Should be deterministic. 43 | initial_state() -> 44 | #s{cores = [n1], replicants = [n2]}. 45 | 46 | command(State) -> mria_proper_utils:command(State). 47 | precondition(State, Op) -> mria_proper_utils:precondition(State, Op). 48 | postcondition(State, Op, Res) -> mria_proper_utils:postcondition(State, Op, Res). 49 | next_state(State, Res, Op) -> mria_proper_utils:next_state(State, Res, Op). 50 | 51 | init_per_suite(Config) -> 52 | mria_ct:start_dist(), 53 | snabbkaffe:fix_ct_logging(), 54 | Config. 55 | 56 | end_per_suite(_Config) -> 57 | ok. 58 | -------------------------------------------------------------------------------- /test/mria_proper_utils.erl: -------------------------------------------------------------------------------- 1 | %%-------------------------------------------------------------------- 2 | %% Copyright (c) 2021-2023 EMQ Technologies Co., Ltd. All Rights Reserved. 3 | %% 4 | %% Licensed under the Apache License, Version 2.0 (the "License"); 5 | %% you may not use this file except in compliance with the License. 6 | %% You may obtain a copy of the License at 7 | %% 8 | %% http://www.apache.org/licenses/LICENSE-2.0 9 | %% 10 | %% Unless required by applicable law or agreed to in writing, software 11 | %% distributed under the License is distributed on an "AS IS" BASIS, 12 | %% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | %% See the License for the specific language governing permissions and 14 | %% limitations under the License. 15 | %%-------------------------------------------------------------------- 16 | 17 | %% Generators and helper functions for property tests 18 | -module(mria_proper_utils). 19 | 20 | -compile(export_all). 21 | -compile(nowarn_export_all). 22 | 23 | -include_lib("snabbkaffe/include/ct_boilerplate.hrl"). 24 | -include("mria_proper_utils.hrl"). 25 | 26 | %%================================================================================ 27 | %% Properties 28 | %%================================================================================ 29 | 30 | prop(ClusterConfig, PropModule) -> 31 | Cluster = mria_ct:cluster(ClusterConfig, mria_mnesia_test_util:common_env()), 32 | snabbkaffe:fix_ct_logging(), 33 | ?forall_trace( 34 | Cmds, commands(PropModule), 35 | #{timetrap => 20000}, 36 | try 37 | Nodes = mria_ct:start_cluster(mria, Cluster), 38 | ok = mria_mnesia_test_util:wait_tables(Nodes), 39 | {History, State, Result} = run_commands(PropModule, Cmds), 40 | mria_mnesia_test_util:wait_full_replication(Cluster), 41 | [check_state(Cmds, State, Node) || Node <- Nodes], 42 | {History, State, Result} 43 | after 44 | catch mria_ct:teardown_cluster(Cluster) 45 | end, 46 | fun({_History, _State, Result}, _Trace) -> 47 | ?assertMatch(ok, Result), 48 | true 49 | end). 50 | 51 | %%================================================================================ 52 | %% Proper generators 53 | %%================================================================================ 54 | 55 | table_key() -> 56 | range(1, 100). 57 | 58 | value() -> 59 | non_neg_integer(). 60 | 61 | table() -> 62 | union([test_tab, test_bag]). 63 | 64 | write_op(Table) -> 65 | {write, Table, table_key(), value()}. 66 | 67 | trans_op(#s{bag = Bag, set = Set}) -> 68 | ?LET(Table, table(), 69 | case Table of 70 | test_tab -> 71 | case maps:keys(Set) of 72 | [] -> 73 | write_op(Table); 74 | Keys -> 75 | frequency([ {60, write_op(Table)} 76 | , {20, {delete, Table, oneof(Keys)}} 77 | ]) 78 | end; 79 | test_bag -> 80 | case Bag of 81 | [] -> 82 | write_op(Table); 83 | Objs -> 84 | Keys = proplists:get_keys(Objs), 85 | frequency([ {60, write_op(Table)} 86 | , {10, {delete, Table, oneof(Keys)}} 87 | , {30, {delete_object, Table, oneof(Objs)}} 88 | ]) 89 | end 90 | end). 91 | 92 | transaction(State) -> 93 | frequency([ {50, {transaction, resize(10, list(trans_op(State)))}} 94 | , {50, {dirty, trans_op(State)}} 95 | , {5, {clear_table, table()}} 96 | ]). 97 | 98 | %%================================================================================ 99 | %% Proper FSM definition (common) 100 | %%================================================================================ 101 | 102 | command(State) -> 103 | frequency([ {90, {call, ?MODULE, execute, [participant(State), transaction(State)]}} 104 | , {0, {call, ?MODULE, restart_mria, [participant(State)]}} %% TODO 105 | ]). 106 | 107 | %% Picks whether a command should be valid under the current state. 108 | precondition(_State, {call, _Mod, _Fun, _Args}) -> 109 | true. 110 | 111 | postcondition(_State, {call, _Mod, _Fun, _Args}, _Res) -> 112 | true. 113 | 114 | next_state(State, _Res, {call, ?MODULE, execute, [_, Args]}) -> 115 | case Args of 116 | {clear_table, test_tab} -> 117 | State#s{set = #{}}; 118 | {clear_table, test_bag} -> 119 | State#s{bag = []}; 120 | {transaction, Ops} -> 121 | lists:foldl(fun symbolic_exec_op/2, State, Ops); 122 | {dirty, Op} -> 123 | symbolic_exec_op(Op, State) 124 | end; 125 | next_state(State, _Res, _Call) -> 126 | State. 127 | 128 | check_state(Cmds, #s{bag = Bag, set = Set}, Node) -> 129 | compare_lists(bag, Node, Cmds, lists:sort(Bag), get_records(Node, test_bag)), 130 | compare_lists(set, Node, Cmds, lists:sort(maps:to_list(Set)), get_records(Node, test_tab)). 131 | 132 | compare_lists(Type, Node, Cmds, Expected, Got) -> 133 | Missing = Expected -- Got, 134 | Unexpected = Got -- Expected, 135 | Comment = [ {node, Node} 136 | , {cmds, Cmds} 137 | , {unexpected, Unexpected} 138 | , {missing, Missing} 139 | , {table_type, Type} 140 | ], 141 | ?assert(length(Missing) + length(Unexpected) =:= 0, Comment). 142 | 143 | %%================================================================================ 144 | %% Internal functions 145 | %%================================================================================ 146 | 147 | symbolic_exec_op({write, test_tab, Key, Val}, State = #s{set = Old}) -> 148 | Set = Old#{Key => Val}, 149 | State#s{set = Set}; 150 | symbolic_exec_op({write, test_bag, Key, Val}, State = #s{bag = Old}) -> 151 | Rec = {Key, Val}, 152 | Bag = [Rec | Old -- [Rec]], 153 | State#s{bag = Bag}; 154 | symbolic_exec_op({delete, test_tab, Key}, State = #s{set = Old}) -> 155 | Set = maps:remove(Key, Old), 156 | State#s{set = Set}; 157 | symbolic_exec_op({delete, test_bag, Key}, State = #s{bag = Old}) -> 158 | Bag = proplists:delete(Key, Old), 159 | State#s{bag = Bag}; 160 | symbolic_exec_op({delete_object, test_bag, Rec}, State = #s{bag = Old}) -> 161 | Bag = lists:delete(Rec, Old), 162 | State#s{bag = Bag}. 163 | 164 | execute_op({write, Tab, Key, Val}) -> 165 | ok = mnesia:write({Tab, Key, Val}); 166 | execute_op({delete, Tab, Key}) -> 167 | ok = mnesia:delete({Tab, Key}); 168 | execute_op({delete_object, Tab, {K, V}}) -> 169 | ok = mnesia:delete_object({Tab, K, V}). 170 | 171 | execute_op_dirty({write, Tab, Key, Val}) -> 172 | ok = mria:dirty_write({Tab, Key, Val}); 173 | execute_op_dirty({delete, Tab, Key}) -> 174 | ok = mria:dirty_delete({Tab, Key}); 175 | execute_op_dirty({delete_object, Tab, {K, V}}) -> 176 | ok = mria:dirty_delete_object({Tab, K, V}). 177 | 178 | execute(Node, {clear_table, Tab}) -> 179 | {atomic, ok} = rpc:call(Node, mria, clear_table, [Tab]); 180 | execute(Node, {transaction, Ops}) -> 181 | Fun = fun() -> 182 | lists:foreach(fun execute_op/1, Ops) 183 | end, 184 | {atomic, ok} = rpc:call(Node, mria, transaction, [test_shard, Fun]); 185 | execute(Node, {dirty, Op}) -> 186 | ok = rpc:call(Node, ?MODULE, execute_op_dirty, [Op]). 187 | 188 | restart_mria(Node) -> 189 | rpc:call(Node, application, stop, [mria]), 190 | {ok, _} = rpc:call(Node, application, ensure_all_started, [mria]). 191 | 192 | participant(#s{cores = Cores, replicants = Replicants}) -> 193 | cluster_node(Cores ++ Replicants). 194 | 195 | cluster_node(Names) -> 196 | oneof([mria_ct:node_id(Name) || Name <- Names]). 197 | 198 | get_records(Node, Table) -> 199 | {atomic, Records} = 200 | rpc:call(Node, mria, transaction, 201 | [ test_shard 202 | , fun() -> 203 | mnesia:foldr(fun(Record, Acc) -> [Record | Acc] end, [], Table) 204 | end] 205 | ), 206 | lists:sort([{K, V} || {_, K, V} <- Records]). 207 | -------------------------------------------------------------------------------- /test/mria_proper_utils.hrl: -------------------------------------------------------------------------------- 1 | %%-------------------------------------------------------------------- 2 | %% Copyright (c) 2021-2022 EMQ Technologies Co., Ltd. All Rights Reserved. 3 | %% 4 | %% Licensed under the Apache License, Version 2.0 (the "License"); 5 | %% you may not use this file except in compliance with the License. 6 | %% You may obtain a copy of the License at 7 | %% 8 | %% http://www.apache.org/licenses/LICENSE-2.0 9 | %% 10 | %% Unless required by applicable law or agreed to in writing, software 11 | %% distributed under the License is distributed on an "AS IS" BASIS, 12 | %% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | %% See the License for the specific language governing permissions and 14 | %% limitations under the License. 15 | %%-------------------------------------------------------------------- 16 | 17 | %%================================================================================ 18 | %% Types 19 | %%================================================================================ 20 | 21 | -ifndef(MRIA_PROPER_UTILS_HRL). 22 | -define(MRIA_PROPER_UTILS_HRL, true). 23 | 24 | -type key() :: non_neg_integer(). 25 | -type value() :: non_neg_integer(). 26 | 27 | -record(s, 28 | { bag = [] :: [{key, value()}] 29 | , set = #{} :: #{key() => value()} 30 | , cores = [] :: [atom()] 31 | , replicants = [] :: [atom()] 32 | }). 33 | 34 | -endif. 35 | -------------------------------------------------------------------------------- /test/mria_rlog_props.erl: -------------------------------------------------------------------------------- 1 | %%-------------------------------------------------------------------- 2 | %% Copyright (c) 2021-2023 EMQ Technologies Co., Ltd. All Rights Reserved. 3 | %% 4 | %% Licensed under the Apache License, Version 2.0 (the "License"); 5 | %% you may not use this file except in compliance with the License. 6 | %% You may obtain a copy of the License at 7 | %% 8 | %% http://www.apache.org/licenses/LICENSE-2.0 9 | %% 10 | %% Unless required by applicable law or agreed to in writing, software 11 | %% distributed under the License is distributed on an "AS IS" BASIS, 12 | %% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | %% See the License for the specific language governing permissions and 14 | %% limitations under the License. 15 | %%-------------------------------------------------------------------- 16 | 17 | -module(mria_rlog_props). 18 | 19 | -compile(nowarn_underscore_match). 20 | 21 | -export([ no_unexpected_events/1 22 | , replicant_no_restarts/1 23 | , no_split_brain/1 24 | , replicant_bootstrap_stages/2 25 | , all_intercepted_commit_logs_received/1 26 | , all_batches_received/1 27 | , counter_import_check/3 28 | , no_tlog_gaps/1 29 | , graceful_stop/1 30 | ]). 31 | 32 | -include_lib("snabbkaffe/include/test_macros.hrl"). 33 | -include_lib("stdlib/include/assert.hrl"). 34 | -include_lib("proper/include/proper.hrl"). 35 | -include_lib("eunit/include/eunit.hrl"). 36 | -include("mria_rlog.hrl"). 37 | 38 | %%================================================================================ 39 | %% Checks 40 | %%================================================================================ 41 | 42 | %% Check that worker processes are terminated gracefully (terminate 43 | %% callback has been executed): 44 | graceful_stop(Trace) -> 45 | ?projection_complete(process, ?of_kind(mria_worker_terminate, Trace), 46 | [mria_lb, mria_bootstrapper, mria_rlog_server, 47 | mria_rlog_replica, mria_rlog_agent, 48 | mria_replica_importer_worker, mria_membership, 49 | mria_status]). 50 | 51 | %% Check that there were no unexpected events 52 | no_unexpected_events(Trace0) -> 53 | %% Ignore everything that happens after cluster teardown: 54 | {Trace, _} = ?split_trace_at(#{?snk_kind := teardown_cluster}, Trace0), 55 | ?assertMatch([], ?of_kind(?unexpected_event_kind, Trace)), 56 | true. 57 | 58 | %% Check that each replicant didn't restart 59 | replicant_no_restarts(Trace0) -> 60 | %% Ignore everything that happens after cluster teardown: 61 | {Trace, _} = ?split_trace_at(#{?snk_kind := teardown_cluster}, Trace0), 62 | StartEvents = ?projection([node, shard], ?of_kind(rlog_replica_start, Trace)), 63 | ?assertEqual(length(StartEvents), length(lists:usort(StartEvents))), 64 | true. 65 | 66 | no_split_brain(Trace0) -> 67 | {Trace, _} = ?split_trace_at(#{?snk_kind := teardown_cluster}, Trace0), 68 | ?assertMatch([], ?of_kind(mria_lb_spit_brain, Trace)), 69 | true. 70 | 71 | %% Check that replicant FSM goes through all the stages in the right sequence 72 | replicant_bootstrap_stages(Node, Trace0) -> 73 | Trace = ?of_node(Node, ?of_domain([mria, rlog, replica|_], Trace0)), 74 | ?causality( #{?snk_kind := state_change, to := disconnected, ?snk_meta := #{pid := _Pid}} 75 | , #{?snk_kind := state_change, to := bootstrap, ?snk_meta := #{pid := _Pid}} 76 | , Trace 77 | ), 78 | ?causality( #{?snk_kind := state_change, to := bootstrap, ?snk_meta := #{pid := _Pid}} 79 | , #{?snk_kind := state_change, to := local_replay, ?snk_meta := #{pid := _Pid}} 80 | , Trace 81 | ), 82 | ?causality( #{?snk_kind := state_change, to := local_replay, ?snk_meta := #{pid := _Pid}} 83 | , #{?snk_kind := state_change, to := normal, ?snk_meta := #{pid := _Pid}} 84 | , Trace 85 | ). 86 | 87 | %% Check that all commit logs intercepted are received by an agent 88 | all_intercepted_commit_logs_received(Trace0) -> 89 | ReplicantAgentNodePairs1 = 90 | [ {UpstreamNode, DownstreamNode} 91 | || #{ ?snk_kind := "Connected to the core node" 92 | , ?snk_meta := #{node := DownstreamNode} 93 | , node := UpstreamNode 94 | } <- Trace0], 95 | ReplicantAgentNodePairs = 96 | lists:foldl( 97 | fun({UpstreamNode, DownstreamNode}, Acc) -> 98 | maps:put(DownstreamNode, UpstreamNode, Acc) 99 | end, 100 | #{}, 101 | ReplicantAgentNodePairs1), 102 | ct:pal("replicant and agent node pairs: ~p~n", [ReplicantAgentNodePairs]), 103 | Trace = [ Event 104 | || Event = #{?snk_kind := Kind} <- Trace0, 105 | lists:member(Kind, [ mria_rlog_intercept_trans 106 | , rlog_replica_store_trans 107 | ]), 108 | case Event of 109 | #{schema_ops := [_ | _]} -> false; 110 | #{?snk_meta := #{shard := ?mria_meta_shard}} -> false; 111 | #{ram_copies := [{{mria_schema, _}, _, _} | _]} -> false; 112 | _ -> true 113 | end], 114 | [?assert( 115 | ?strict_causality( 116 | #{ ?snk_kind := mria_rlog_intercept_trans 117 | , ?snk_meta := #{node := UpstreamNode} 118 | , tid := _Tid 119 | } 120 | , #{ ?snk_kind := rlog_replica_store_trans 121 | , ?snk_meta := #{node := DownstreamNode} 122 | , tid := _Tid 123 | } 124 | , Trace 125 | )) 126 | || {DownstreamNode, UpstreamNode} <- maps:to_list(ReplicantAgentNodePairs)], 127 | ok. 128 | 129 | %% Check that the replicant processed all batches sent by its agent 130 | all_batches_received(Trace0) -> 131 | Trace = ?of_domain([mria, rlog|_], Trace0), 132 | ?strict_causality( 133 | #{?snk_kind := rlog_realtime_op, agent := _A, seqno := _S} 134 | , #{?snk_kind := K, agent := _A, seqno := _S} when K =:= rlog_replica_import_trans; 135 | K =:= rlog_replica_store_trans 136 | , Trace). 137 | 138 | %% Check that transactions are imported in an order that guarantees 139 | %% that the end result is consistent. 140 | -spec counter_import_check(term(), node(), snabbkaffe:trace()) -> integer(). 141 | counter_import_check(CounterKey, Node, Trace0) -> 142 | Trace1 = ?of_node(Node, Trace0), 143 | %% Shard bootstrap resets the import sequence, so we should 144 | %% consider them individually (assuming that the bootstrap 145 | %% procedure is correct): 146 | Sequences = ?splitr_trace(#{?snk_kind := shard_bootstrap_complete}, Trace1), 147 | %% Now check each sequence and return the number of import operations: 148 | lists:foldl( 149 | fun(Trace, N) -> 150 | N + do_counter_import_check(CounterKey, Trace) 151 | end, 152 | 0, 153 | Sequences). 154 | 155 | %% Check sequence of numbers. It should be increasing by no more than 156 | %% 1, with possible restarts from an earler point. If restart 157 | %% happened, then the last element of the sequence must be greater 158 | %% than any other element seen before. 159 | check_transaction_replay_sequence([]) -> 160 | true; 161 | check_transaction_replay_sequence([First|Rest]) -> 162 | check_transaction_replay_sequence(First, First, Rest). 163 | 164 | %% Check that there are no gaps in the transaction log 165 | no_tlog_gaps(Trace) -> 166 | ?assertEqual([], ?of_kind(gap_in_the_tlog, Trace)), 167 | true. 168 | 169 | %%================================================================================ 170 | %% Internal functions 171 | %%================================================================================ 172 | 173 | do_counter_import_check(CounterKey, Trace) -> 174 | Writes = [Val || #{ ?snk_kind := rlog_import_trans 175 | , ops := [{write, test_tab, {test_tab, K, Val}}] 176 | } <- Trace, K =:= CounterKey], 177 | ?assert(check_transaction_replay_sequence(Writes)), 178 | length(Writes). 179 | 180 | check_transaction_replay_sequence(Max, LastElem, []) -> 181 | LastElem >= Max orelse 182 | ?panic("invalid sequence restart", 183 | #{ maximum => Max 184 | , last_elem => LastElem 185 | }), 186 | true; 187 | check_transaction_replay_sequence(Max, Prev, [Next|Rest]) when Next =:= Prev + 1 -> 188 | check_transaction_replay_sequence(max(Max, Prev), Next, Rest); 189 | check_transaction_replay_sequence(Max, Prev, [Next|Rest]) when Next =< Prev -> 190 | check_transaction_replay_sequence(max(Max, Prev), Next, Rest); 191 | check_transaction_replay_sequence(Max, Prev, [Next|_]) -> 192 | ?panic("gap in the sequence", 193 | #{ maximum => Max 194 | , elem => Prev 195 | , next_elem => Next 196 | }). 197 | 198 | %%================================================================================ 199 | %% Unit tests 200 | %%================================================================================ 201 | 202 | %% Find all node/shard pairs for the replicants: 203 | %% -spec find_replicant_shards(snabbkaffe:trace()) -> {node(), mria_rlog:shard()}. 204 | %% find_replicant_shards(Trace) -> 205 | %% lists:usort([{Node, Shard} || #{ ?snk_kind := "starting_rlog_shard" 206 | %% , shard := Shard 207 | %% , ?snk_meta := #{node := Node, domain := [mria, rlog, replica]} 208 | %% } <- Trace]). 209 | 210 | check_transaction_replay_sequence_test() -> 211 | ?assert(check_transaction_replay_sequence([])), 212 | ?assert(check_transaction_replay_sequence([1, 2])), 213 | ?assert(check_transaction_replay_sequence([2, 3, 4])), 214 | %% Gap: 215 | ?assertError(_, check_transaction_replay_sequence([0, 1, 3])), 216 | ?assertError(_, check_transaction_replay_sequence([0, 1, 13, 14])), 217 | %% Replays: 218 | ?assert(check_transaction_replay_sequence([1, 1, 2, 3, 3])), 219 | ?assert(check_transaction_replay_sequence([1, 2, 3, 1, 2, 3, 4])), 220 | ?assert(check_transaction_replay_sequence([1, 2, 3, 1, 2, 3, 4, 3, 4])), 221 | %% Invalid replays: 222 | ?assertError(_, check_transaction_replay_sequence([1, 2, 3, 2])), 223 | ?assertError(_, check_transaction_replay_sequence([1, 2, 3, 2, 4])), 224 | ?assertError(_, check_transaction_replay_sequence([1, 2, 3, 2, 3, 4, 5, 3, 4])). 225 | -------------------------------------------------------------------------------- /test/mria_rlog_tests.erl: -------------------------------------------------------------------------------- 1 | -module(mria_rlog_tests). 2 | 3 | -include_lib("proper/include/proper.hrl"). 4 | -include_lib("eunit/include/eunit.hrl"). 5 | 6 | shuffle_test() -> 7 | ?FORALL(L, list(), 8 | ?assertEqual(lists:sort(L), list:sort(mria_lib:shuffle(L)))). 9 | -------------------------------------------------------------------------------- /test/mria_transaction_gen.erl: -------------------------------------------------------------------------------- 1 | %%-------------------------------------------------------------------- 2 | %% Copyright (c) 2021-2023 EMQ Technologies Co., Ltd. All Rights Reserved. 3 | %% 4 | %% Licensed under the Apache License, Version 2.0 (the "License"); 5 | %% you may not use this file except in compliance with the License. 6 | %% You may obtain a copy of the License at 7 | %% 8 | %% http://www.apache.org/licenses/LICENSE-2.0 9 | %% 10 | %% Unless required by applicable law or agreed to in writing, software 11 | %% distributed under the License is distributed on an "AS IS" BASIS, 12 | %% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | %% See the License for the specific language governing permissions and 14 | %% limitations under the License. 15 | %%-------------------------------------------------------------------- 16 | -module(mria_transaction_gen). 17 | 18 | -include_lib("stdlib/include/assert.hrl"). 19 | -include_lib("snabbkaffe/include/snabbkaffe.hrl"). 20 | 21 | -export([ init/0 22 | , create_data/0 23 | , delete/1 24 | , abort/2 25 | , benchmark/3 26 | , start_async_counter/3 27 | , counter/2 28 | , counter/3 29 | , ro_read_all_keys/0 30 | 31 | , verify_trans_sum/2 32 | ]). 33 | 34 | -record(test_tab, {key, val}). 35 | 36 | -record(test_bag, {key, val}). 37 | 38 | init() -> 39 | ok = mria_helper_tab:init(), 40 | ok = mria:create_table(test_tab, [{type, ordered_set}, 41 | {rlog_shard, test_shard}, 42 | {storage, ram_copies}, 43 | {record_name, test_tab}, 44 | {attributes, record_info(fields, test_tab)} 45 | ]), 46 | ok = mria:create_table(test_bag, [{type, bag}, 47 | {rlog_shard, test_shard}, 48 | {storage, ram_copies}, 49 | {record_name, test_bag}, 50 | {attributes, record_info(fields, test_bag)} 51 | ]), 52 | mria_rlog:wait_for_shards([test_shard], infinity). 53 | 54 | verify_trans_sum(N, Delay) -> 55 | mnesia:wait_for_tables([test_tab], 10000), 56 | do_trans_gen(), 57 | verify_trans_sum_loop(N, Delay). 58 | 59 | create_data() -> 60 | ok = mria:wait_for_tables([test_tab]), 61 | mria:transaction( 62 | test_shard, 63 | fun() -> 64 | [mnesia:write(#test_tab{ key = I 65 | , val = 0 66 | }) || I <- lists:seq(0, 4)] 67 | end). 68 | 69 | ro_read_all_keys() -> 70 | mria:ro_transaction( 71 | test_shard, 72 | fun() -> 73 | Keys = mnesia:all_keys(test_tab), 74 | [mria_ct:read(test_tab, K) || K <- Keys] 75 | end). 76 | 77 | delete(K) -> 78 | mria:transaction( 79 | test_shard, 80 | fun() -> 81 | mnesia:delete({test_tab, K}) 82 | end). 83 | 84 | start_async_counter(Node, Key, N) -> 85 | rpc:cast(Node, proc_lib, spawn, [?MODULE, counter, [Key, N]]). 86 | 87 | counter(Key, N) -> 88 | process_flag(trap_exit, true), 89 | counter(Key, N, 0). 90 | 91 | counter(_Key, 0, _) -> 92 | ok; 93 | counter(Key, NIter, Delay) -> 94 | ?tp(info, trans_gen_counter_update_start, 95 | #{ key => Key 96 | }), 97 | {atomic, Val} = 98 | mria:transaction( 99 | test_shard, 100 | fun() -> 101 | case mria_ct:read(test_tab, Key) of 102 | [] -> V = 0; 103 | [#test_tab{val = V}] -> V 104 | end, 105 | ok = mria_ct:write(#test_tab{key = Key, val = V + 1}), 106 | V 107 | end), 108 | ?tp(info, trans_gen_counter_update, 109 | #{ key => Key 110 | , value => Val 111 | }), 112 | timer:sleep(Delay), 113 | receive 114 | {'EXIT', From, Reason} -> 115 | error({exit, From, Reason}) 116 | after 0 -> 117 | counter(Key, NIter - 1, Delay) 118 | end. 119 | 120 | %% Test that behavior of mria_mnesia is the same when transacion aborts: 121 | abort(Backend, AbortKind) -> 122 | Fun = fun() -> 123 | mnesia:write(#test_tab{key = canary_key, val = canary_dead}), 124 | do_abort(AbortKind) 125 | end, 126 | case Backend of 127 | mnesia -> mnesia:transaction(Fun); 128 | mria_mnesia -> mria:transaction(test_shard, Fun) 129 | end. 130 | 131 | do_abort(abort) -> 132 | mnesia:abort(deliberate); 133 | do_abort(error) -> 134 | error(deliberate); 135 | do_abort(exit) -> 136 | exit(deliberate); 137 | do_abort(throw) -> 138 | throw(deliberate). 139 | 140 | benchmark(ResultFile, 141 | #{ delays := Delays 142 | , trans_size := NKeys 143 | , max_time := MaxTime 144 | }, NNodes) -> 145 | TransTimes = 146 | [begin 147 | mria_ct:set_network_delay(Delay), 148 | do_benchmark(NKeys, MaxTime) 149 | end 150 | || Delay <- Delays], 151 | Backend = mria_rlog:backend(), 152 | [snabbkaffe:push_stat({Backend, Delay}, NNodes, T) 153 | || {Delay, T} <- lists:zip(Delays, TransTimes)], 154 | ok = file:write_file( ResultFile 155 | , mria_ct:vals_to_csv([NNodes | TransTimes]) 156 | , [append] 157 | ). 158 | 159 | do_benchmark(NKeys, MaxTime) -> 160 | {T, NTrans} = timer:tc(fun() -> 161 | timer:send_after(MaxTime, complete), 162 | loop(0, NKeys) 163 | end), 164 | T / NTrans. 165 | 166 | loop(Cnt, NKeys) -> 167 | receive 168 | complete -> Cnt 169 | after 0 -> 170 | {atomic, _} = mria:transaction( 171 | test_shard, 172 | fun() -> 173 | [begin 174 | mnesia:read({test_tab, Key}), 175 | mnesia:write(#test_tab{key = Key, val = Cnt}) 176 | end || Key <- lists:seq(1, NKeys)] 177 | end), 178 | loop(Cnt + 1, NKeys) 179 | end. 180 | 181 | verify_trans_sum_loop(0, _Delay) -> 182 | ?tp(verify_trans_sum, 183 | #{ result => ok 184 | , node => node() 185 | }); 186 | verify_trans_sum_loop(N, Delay) -> 187 | ?tp(verify_trans_step, #{n => N}), 188 | %% Perform write transaction: 189 | N rem 2 =:= 0 andalso 190 | do_trans_gen(), 191 | %% Perform r/o transaction: 192 | case do_trans_verify(Delay) of 193 | {atomic, true} -> 194 | verify_trans_sum_loop(N - 1, Delay); 195 | Result -> 196 | ?tp(verify_trans_sum, 197 | #{ result => nok 198 | , reason => Result 199 | , node => node() 200 | }) 201 | end. 202 | 203 | do_trans_gen() -> 204 | mria:transaction( 205 | test_shard, 206 | fun() -> 207 | [mnesia:write(#test_tab{key = I, val = rand:uniform()}) 208 | || I <- lists:seq(1, 10)], 209 | mnesia:write(#test_tab{key = sum, val = sum_keys()}), 210 | true 211 | end). 212 | 213 | do_trans_verify(Delay) -> 214 | mria:ro_transaction( 215 | test_shard, 216 | fun() -> 217 | case mnesia:all_keys(test_tab) of 218 | [] -> 219 | %% The replica hasn't got any data yet, ignore. 220 | %% FIXME: https://github.com/emqx/mria/issues/2 221 | timer:sleep(Delay), 222 | true; 223 | _ -> 224 | Sum = sum_keys(), 225 | timer:sleep(Delay), 226 | [#test_tab{val = Expected}] = mnesia:read(test_tab, sum), 227 | ?assertEqual(Sum, Expected), 228 | true 229 | end 230 | end). 231 | 232 | sum_keys() -> 233 | L = lists:map( fun(K) -> [#test_tab{val = V}] = mnesia:read(test_tab, K), V end 234 | , lists:seq(1, 10) 235 | ), 236 | lists:sum(L). 237 | --------------------------------------------------------------------------------