├── .gitignore ├── LICENSE ├── Makefile ├── README.md ├── docs └── img │ └── schema.png ├── include └── zraft.hrl ├── priv └── zraft_lib.schema ├── rebar ├── rebar.config ├── src ├── zraft_backend.erl ├── zraft_client.erl ├── zraft_consensus.erl ├── zraft_dict_backend.erl ├── zraft_fs_log.erl ├── zraft_fsm.erl ├── zraft_lib.app.src ├── zraft_lib_app.erl ├── zraft_lib_sup.erl ├── zraft_log_util.erl ├── zraft_peer_proxy.erl ├── zraft_peer_route.erl ├── zraft_quorum_counter.erl ├── zraft_session.erl ├── zraft_session_obj.erl ├── zraft_snapshot_receiver.erl ├── zraft_snapshot_writer.erl └── zraft_util.erl ├── test ├── basic_zraft_progress.erl ├── full_zraft_progress.erl └── session_zraft_client.erl └── tools.mk /.gitignore: -------------------------------------------------------------------------------- 1 | .eunit 2 | deps 3 | *.o 4 | *.beam 5 | *.plt 6 | ebin 7 | .project 8 | .settings 9 | doc 10 | *.iml 11 | .rebar 12 | .idea 13 | .local_dialyzer_plt 14 | .combo_dialyzer_plt 15 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "{}" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright {yyyy} {name of copyright owner} 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | 203 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | REBAR = $(shell pwd)/rebar 2 | .PHONY: rel deps test 3 | 4 | all: deps compile 5 | 6 | ## 7 | ## Compilation targets 8 | ## 9 | 10 | deps: 11 | $(REBAR) get-deps 12 | 13 | compile: deps 14 | $(REBAR) compile 15 | 16 | clean: 17 | $(REBAR) clean 18 | 19 | distclean: clean 20 | $(REBAR) delete-deps 21 | 22 | DIALYZER_APPS = kernel stdlib sasl erts ssl tools os_mon runtime_tools crypto inets \ 23 | xmerl webtool eunit syntax_tools compiler mnesia public_key snmp 24 | 25 | include tools.mk 26 | 27 | typer: 28 | typer --annotate -I ../ --plt $(PLT) -r src 29 | 30 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # zraft_lib 2 | 3 | Erlang [raft consensus protocol](https://raftconsensus.github.io) implementation . 4 | 5 | Supported features: 6 | - Runtime membership reconfiguration. 7 | - Log truncation via snapshotting. 8 | - Peer asynchronous RPC. 9 | - Pluggable state machine. 10 | - Optimistic log replication. 11 | - Snapshot transfer via kernel sendfile command. 12 | - Client sessions. 13 | - Temporary data (like ephemeral nodes) 14 | - Data change triggers. 15 | 16 | ## Erlang Architecture 17 | ![schema](docs/img/schema.png?raw=true) 18 | 19 | ## General Configuration 20 | Example app configuration file. 21 | ``` 22 | [{zraft_lib, 23 | [{snapshot_listener_port,0}, 24 | {election_timeout,500}, 25 | {request_timeout,1000}, 26 | {snapshot_listener_addr,"0,0,0,0"}, 27 | {snapshot_backup,false}, 28 | {log_dir,"./data"}, 29 | {snapshot_dir,"./data"}, 30 | {max_segment_size,10485760}, 31 | {max_log_count,1000}]}, 32 | {lager, 33 | [{error_logger_hwm,100}, 34 | {error_logger_redirect,true}, 35 | {crash_log_date,"$D0"}, 36 | {crash_log_size,10485760}, 37 | {crash_log_msg_size,65536}, 38 | {handlers, 39 | [{lager_file_backend, 40 | [{file,"./log/console.log"}, 41 | {level,info}, 42 | {size,10485760}, 43 | {date,"$D0"}, 44 | {count,5}]}, 45 | {lager_file_backend, 46 | [{file,"./log/error.log"}, 47 | {level,error}, 48 | {size,10485760}, 49 | {date,"$D0"}, 50 | {count,5}]}]}, 51 | {crash_log,"./log/crash.log"}, 52 | {crash_log_count,5}]}, 53 | {sasl,[{sasl_error_logger,false}]}]. 54 | ``` 55 | - "election_timeout" - Timeout(in ms) used by Follower to start new election process (default 500). 56 | - "request_timeout" - Timeout(in ms) used by Leader to wait replication RPC reply from Follower (default 2*election_timeout). 57 | - "snapshot_listener_port" - Default port used for snapshot transfer.(0 - any free port). 58 | - "snapshot_listener_addr" - Bind Address for accepting snapshot transfer connections. 59 | - "snapshot_backup" - If it turns on, all snapshot will be archived. 60 | - "log_dir" - Directory to store RAFT logs and metadata. 61 | - "snapshot_dir" - Directory to store snapshots. 62 | - "max_segment_size" - Log segment Maximum size in bytes.(A New segment will be created after reach in that threshold.) 63 | - "max_log_count" - Snapshot/LogTruncation process will be started after every "max_log_count" entries applied. 64 | 65 | ## Create and Config RAFT Cluster. 66 | 67 | ``` 68 | zraft_client:create(Peers,BackEnd). 69 | ``` 70 | Parameters: 71 | - `Peers` - lists of cluster peers, e.g. `[{test1,'test1@host1'},{test1,'test2@host2'},{other_test,'test3@host3'}]`. 72 | - `BackEnd` - module name used to apply user's requests. 73 | 74 | Possible return values: 75 | - `{ok,Peers}` - cluster has been created. 76 | - `{error,{PeerID,Error}}` - PeerID can't be created due to "Error". 77 | - `{error,[{PeerID,Error}]}` - Peers can't be created. 78 | - `{error,Reason}` - cluster has been created, but the application of new configuration has failed due to "Reason". 79 | 80 | 81 | ## Basic operations. 82 | 83 | #### Light Session Object. 84 | Light session object used to track current raft cluster state, e.g. leader,failed peers, etc... 85 | 86 | Create session object by PeerID: 87 | 88 | ``` 89 | zraft_client:light_session(PeerID,FailTimeout,ElectionTimeout). 90 | ``` 91 | Parameters: 92 | - `PeerID` - ID of peer from luster. 93 | - `FailTimeout` - If we detect that peer has failed,then we will not send any request to this peer during this Interval. 94 | - `ElectionTimeout` - If we detect that peer isn't a leader,then we will not send any request to this peer during this Interval. 95 | 96 | Possible return values: 97 | - `LightSession` - Light Session object. 98 | - `{error,Reason}` - Can't read cluster configuration. 99 | 100 | Create session by list PeerID: 101 | ``` 102 | zraft_client:light_session(PeersList,FailTimeout,ElectionTimeout). 103 | ``` 104 | This function will not try read configuration from cluster. 105 | 106 | #### Write operation. 107 | 108 | ``` 109 | zraft_client:write(PeerID,Data,Timeout). 110 | 111 | ``` 112 | 113 | Parameters: 114 | - `PeerID` - PeerID. 115 | - `Data` - Request Data specific for BackEndModule. 116 | 117 | Return: 118 | - `{Result,LeaderPeerID}` - Result is result of applying Data to BackEndModule. LeaderPeerID is current leader ID. 119 | - `{error,Error}` - Operation has failed. Typical reason is timeout,noproc. 120 | 121 | Write using session object. 122 | ``` 123 | zraft_client:write(LightSessionObj,Data,Timeout). 124 | 125 | ``` 126 | 127 | Parameters: 128 | - `LightSessionObj` - Light Sesssion Object. 129 | - `Data` - Request Data specific for BackEndModule. 130 | 131 | Return: 132 | - `{Result,LightSessionObj}` - Result is result of applying Data to BackEndModule. LightSessionObj is update session object. 133 | - `{error,Error}` - Operation has failed. Typical reason is timeout,all_failed. `all_failed` means,there are not alive peers. 134 | 135 | ``` 136 | WARNING: during this request Data may be applyed to backend module twice. 137 | ``` 138 | 139 | #### Read request: 140 | 141 | ``` 142 | zraft_client:query(PeerID,Query,Timeout). 143 | 144 | ``` 145 | Parameters: 146 | - `PeerID` - PeerID. 147 | - `Query` - Request Data specific for backend module. 148 | 149 | Return: 150 | - `{Result,LeaderPeerID}` - Result is result of query. LeaderPeerID is current leader ID. 151 | - `{error,Error}` - Operation has failed. Typical reason is timeout,noproc. 152 | 153 | Or read data using light session object: 154 | 155 | ``` 156 | zraft_client:query(LightSessionObj,Query,Timeout). 157 | 158 | ``` 159 | 160 | Return: 161 | - `{Result,LightSessionObj}` - Result is result of query. LightSessionObj is update session object. 162 | - `{error,Error}` - Operation has failed. Typical reason is timeout,all_failed. `all_failed` means,there are not alive peers. 163 | 164 | 165 | #### Change Configuration: 166 | 167 | ``` 168 | zraft_client:set_new_conf(Peer,NewPeers,OldPeers,Timeout). 169 | ``` 170 | 171 | ## Use Session: 172 | 173 | You can create long lived session to RAFT cluster. It can be used triggers and temporary datas. 174 | 175 | ``` 176 | zraft_session:start_link(PeerOrPeers,SessionTimeout)->{ok,Session}. 177 | ``` 178 | 179 | If first parameter is PeerID other available Peer will be readed from that Peer. 180 | 181 | #### Write Data and Ephemeral data. 182 | 183 | ``` 184 | zraft_session:write(Session,Data, Temporary, Timeout). 185 | ``` 186 | If Temporary is true then data will be deleted after session wil be expired. 187 | 188 | #### Read Data and Set watchers 189 | 190 | ``` 191 | zraft_session:query(Session,Query,Watch,Timeout). 192 | ``` 193 | 194 | Watch is trigger reference that will be triggered after future changes.Trigger will be triggered only once, if you need new trigger you must data again. 195 | 196 | Example: 197 | ``` 198 | zraft_session:query(S1,1,my_watcher,1000). 199 | %%Result = not_found. 200 | zraft_session:write(S2,{1,2},1000). 201 | receive 202 | {swatch_trigger,my_watcher,Reason}-> 203 | %%Data changed. Change Reason is data_chaged or leader chaged. 204 | ok 205 | end. 206 | zraft_session:query(S1,1,my_watcher,1000). %%watch again 207 | ``` 208 | 209 | 210 | 211 | 212 | 213 | 214 | ##Standalone Server. 215 | 216 | You can use it for tests from erlang console. 217 | 218 | https://github.com/dreyk/zraft 219 | 220 | 221 | ## TODO: 222 | - Write External API documentation. 223 | - Add backend based on ets table 224 | - Add "watcher" support (notify client about backend state changes). 225 | 226 | 227 | -------------------------------------------------------------------------------- /docs/img/schema.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dreyk/zraft_lib/ead65c45df576be3758639e3fe3a46edefdeae1d/docs/img/schema.png -------------------------------------------------------------------------------- /include/zraft.hrl: -------------------------------------------------------------------------------- 1 | 2 | -record(vote_request,{from,term,epoch,last_index,last_term}). 3 | -record(vote_reply,{from_peer,epoch,request_term,peer_term,granted,commit}). 4 | 5 | -record(append_entries, { 6 | term =0, 7 | epoch=0, 8 | from, 9 | request_ref, 10 | prev_log_index=0, 11 | prev_log_term=0, 12 | entries :: term(), 13 | commit_index=0}). 14 | -record(append_reply, { 15 | epoch, 16 | request_ref, 17 | term = 0, 18 | from_peer, 19 | last_index=0, 20 | success=false, 21 | agree_index=0 22 | }). 23 | 24 | -record(install_snapshot,{from,request_ref,term,epoch,index,data}). 25 | -record(install_snapshot_reply,{epoch,request_ref,term,from_peer,addr,port,result,index}). 26 | 27 | -define(UPDATE_CMD,update). 28 | -define(BECOME_LEADER_CMD,become_leader). 29 | -define(LOST_LEADERSHIP_CMD,lost_leadership). 30 | -define(OPTIMISTIC_REPLICATE_CMD,optimistic_replicate). 31 | -define(VOTE_CMD,vote). 32 | 33 | 34 | -define(OP_CONFIG,1). 35 | -define(OP_DATA,2). 36 | -define(OP_NOOP,3). 37 | 38 | -define(BLANK_CONF,blank). 39 | -define(STABLE_CONF,stable). 40 | -define(STAGING_CONF,staging). 41 | -define(TRANSITIONAL_CONF,transactional). 42 | 43 | -define(ELECTION_TIMEOUT_PARAM,election_timeout). 44 | -define(ELECTION_TIMEOUT,500). 45 | 46 | -define(CLIENT_PING,'$zraftc_ping'). 47 | -define(CLIENT_CONNECT,'$zraftc_connect'). 48 | -define(CLIENT_CLOSE,'$zraftc_close'). 49 | -define(EXPIRE_SESSION,'$zraft_expire'). 50 | -define(DISCONNECT_MSG, disconnected). 51 | 52 | -record(snapshot_info,{index=0,term=0,conf_index=0,conf=?BLANK_CONF}). 53 | 54 | -record(log_op_result,{log_state,last_conf,result}). 55 | 56 | -record(entry,{index,term,type,data,global_time=0}). 57 | 58 | -record(pconf,{old_peers=[],new_peers=[]}). 59 | 60 | -record(raft_meta,{id,voted_for,current_term=0,back_end}). 61 | 62 | -record(peer,{id,next_index=1,has_vote=false,last_agree_index=0,epoch=0}). 63 | 64 | -record(log_descr,{first_index,last_index,last_term,commit_index}). 65 | 66 | -record(read,{from,request,watch=false,global_time}). 67 | 68 | -record(fsm_stat,{session_number,watcher_number,tmp_count}). 69 | -record(peer_start,{epoch,term,allow_commit,leader,back_end,log_state,snapshot_info,conf,conf_state,state_name,proxy_peer_stats=[],fsm_stat}). 70 | -record(proxy_peer_stat,{peer_state,is_snapshoting}). 71 | -record(swrite,{data,message_id,acc_upto,from,temporary=false}).%%write in session 72 | -record(write,{data,from}).%%optimistic write 73 | 74 | -record(swrite_reply,{sequence,data}). 75 | -record(swrite_error,{sequence,leader,error}). 76 | -record(sread_reply,{data,ref}). 77 | -record(swatch_trigger,{ref,reason}). 78 | 79 | -ifdef(TEST). 80 | -include_lib("eunit/include/eunit.hrl"). 81 | -define(MINFO(S, As), ?debugFmt("[INFO] " ++ S, As)). 82 | -define(MINFO(S), ?debugMsg("[INFO] " ++ S)). 83 | -define(MWARNING(S, As), ?debugFmt("[WARNING] " ++ S, As)). 84 | -define(MWARNING(S), ?debugMsg("[WARNING] " ++ S)). 85 | -define(MERROR(S, As), ?debugFmt("[ERROR] " ++ S, As)). 86 | -define(MERROR(S), ?debugMsg("[ERROR] " ++ S)). 87 | -define(MDEBUG(S, As), ?debugFmt("[DEBUG] " ++ S, As)). 88 | -define(MDEBUG(S), ?debugMsg("[DEBUG] " ++ S)). 89 | -else. 90 | -define(MINFO(S, As), lager:info(S, As)). 91 | -define(MINFO(S), lager:info(S)). 92 | -define(MWARNING(S, As), lager:warning(S, As)). 93 | -define(MWARNING(S), lager:warning(S)). 94 | -define(MERROR(S, As), lager:error(S, As)). 95 | -define(MERROR(S), lager:error(S)). 96 | -define(MDEBUG(S, As), lager:debug(S, As)). 97 | -define(MDEBUG(S), lager:debug(S)). 98 | -endif. -------------------------------------------------------------------------------- /priv/zraft_lib.schema: -------------------------------------------------------------------------------- 1 | %%-*- mode: erlang -*- 2 | 3 | %% @doc Where to emit the default log messages (typically at 'info' 4 | %% severity): 5 | %% off: disabled 6 | %% file: the file specified by log.console.file 7 | %% console: to standard output (seen when using `zraft attach-direct`) 8 | %% both: log.console.file and standard out. 9 | {mapping, "log.console", "lager.handlers", [ 10 | {default, {{console_log_default}} }, 11 | {datatype, {enum, [off, file, console, both]}} 12 | ]}. 13 | 14 | %% @doc The severity level of the console log, default is 'info'. 15 | {mapping, "log.console.level", "lager.handlers", [ 16 | {default, info}, 17 | {datatype, {enum, [debug, info, notice, warning, error, critical, alert, emergency, none]}} 18 | ]}. 19 | 20 | %% @doc When 'log.console' is set to 'file' or 'both', the file where 21 | %% console messages will be logged. 22 | {mapping, "log.console.file", "lager.handlers", [ 23 | {default, "$(platform_log_dir)/console.log"}, 24 | {datatype, file} 25 | ]}. 26 | 27 | %% @doc The file where error messages will be logged. 28 | {mapping, "log.error.file", "lager.handlers", [ 29 | {default, "$(platform_log_dir)/error.log"}, 30 | {datatype, file} 31 | ]}. 32 | 33 | 34 | {translation, 35 | "lager.handlers", 36 | fun(Conf) -> 37 | ErrorHandler = case cuttlefish:conf_get("log.error.file", Conf) of 38 | undefined -> []; 39 | ErrorFilename -> [{lager_file_backend, [{file, ErrorFilename}, 40 | {level, error}, 41 | {size, 10485760}, 42 | {date, "$D0"}, 43 | {count, 5}]}] 44 | end, 45 | 46 | ConsoleLogLevel = cuttlefish:conf_get("log.console.level", Conf), 47 | ConsoleLogFile = cuttlefish:conf_get("log.console.file", Conf), 48 | 49 | ConsoleHandler = {lager_console_backend, ConsoleLogLevel}, 50 | ConsoleFileHandler = {lager_file_backend, [{file, ConsoleLogFile}, 51 | {level, ConsoleLogLevel}, 52 | {size, 10485760}, 53 | {date, "$D0"}, 54 | {count, 5}]}, 55 | 56 | ConsoleHandlers = case cuttlefish:conf_get("log.console", Conf) of 57 | off -> []; 58 | file -> [ConsoleFileHandler]; 59 | console -> [ConsoleHandler]; 60 | both -> [ConsoleHandler, ConsoleFileHandler]; 61 | _ -> [] 62 | end, 63 | ConsoleHandlers ++ ErrorHandler 64 | end 65 | }. 66 | 67 | 68 | %% @doc Whether to enable Erlang's built-in error logger. 69 | {mapping, "sasl", "sasl.sasl_error_logger", [ 70 | {default, off}, 71 | {datatype, flag}, 72 | hidden 73 | ]}. 74 | 75 | %% @doc Whether to enable the crash log. 76 | {mapping, "log.crash", "lager.crash_log", [ 77 | {default, on}, 78 | {datatype, flag} 79 | ]}. 80 | 81 | %% @doc If the crash log is enabled, the file where its messages will 82 | %% be written. 83 | {mapping, "log.crash.file", "lager.crash_log", [ 84 | {default, "$(platform_log_dir)/crash.log"}, 85 | {datatype, file} 86 | ]}. 87 | 88 | {translation, 89 | "lager.crash_log", 90 | fun(Conf) -> 91 | case cuttlefish:conf_get("log.crash", Conf) of 92 | false -> undefined; 93 | _ -> 94 | cuttlefish:conf_get("log.crash.file", Conf, "{{platform_log_dir}}/crash.log") 95 | end 96 | end}. 97 | 98 | %% @doc Maximum size in bytes of individual messages in the crash log 99 | {mapping, "log.crash.maximum_message_size", "lager.crash_log_msg_size", [ 100 | {default, "64KB"}, 101 | {datatype, bytesize} 102 | ]}. 103 | 104 | %% @doc Maximum size of the crash log in bytes, before it is rotated 105 | {mapping, "log.crash.size", "lager.crash_log_size", [ 106 | {default, "10MB"}, 107 | {datatype, bytesize} 108 | ]}. 109 | 110 | %% @doc The schedule on which to rotate the crash log. For more 111 | %% information see: 112 | %% https://github.com/basho/lager/blob/master/README.md#internal-log-rotation 113 | {mapping, "log.crash.rotation", "lager.crash_log_date", [ 114 | {default, "$D0"} 115 | ]}. 116 | 117 | %% @doc The number of rotated crash logs to keep. When set to 118 | %% 'current', only the current open log file is kept. 119 | {mapping, "log.crash.rotation.keep", "lager.crash_log_count", [ 120 | {default, 5}, 121 | {datatype, [integer, {atom, current}]}, 122 | {validators, ["rotation_count"]} 123 | ]}. 124 | 125 | {validator, 126 | "rotation_count", 127 | "must be 'current' or a positive integer", 128 | fun(current) -> true; 129 | (Int) when is_integer(Int) andalso Int >= 0 -> true; 130 | (_) -> false 131 | end}. 132 | 133 | {translation, 134 | "lager.crash_log_count", 135 | fun(Conf) -> 136 | case cuttlefish:conf_get("log.crash.rotation.keep", Conf) of 137 | current -> 0; 138 | Int -> Int 139 | end 140 | end}. 141 | 142 | %% @doc Whether to redirect error_logger messages into lager - 143 | %% defaults to true 144 | {mapping, "log.error.redirect", "lager.error_logger_redirect", [ 145 | {default, on}, 146 | {datatype, flag}, 147 | hidden 148 | ]}. 149 | 150 | %% @doc Maximum number of error_logger messages to handle in a second 151 | {mapping, "log.error.messages_per_second", "lager.error_logger_hwm", [ 152 | {default, 100}, 153 | {datatype, integer}, 154 | hidden 155 | ]}. 156 | 157 | 158 | %% @doc Cookie for distributed node communication. All nodes in the 159 | %% same cluster should use the same cookie or they will not be able to 160 | %% communicate. 161 | {mapping, "distributed_cookie", "vm_args.-setcookie", [ 162 | {default, "zfat_secret"} 163 | ]}. 164 | 165 | {mapping, "erlang.process_limit", "vm_args.+P", [ 166 | {datatype, integer}, 167 | {default, 100000000} 168 | ]}. 169 | 170 | %% override zdbbl from 1mb to 32mb 171 | {mapping, "erlang.distribution_buffer_size", "vm_args.+zdbbl", [ 172 | {default, "32MB"}, 173 | merge 174 | ]}. 175 | 176 | %% VM scheduler collapse, part 1 of 2 177 | {mapping, "erlang.schedulers.force_wakeup_interval", "vm_args.+sfwi", [ 178 | {default, 500}, 179 | {datatype, integer}, 180 | merge 181 | ]}. 182 | 183 | %% VM scheduler collapse, part 2 of 2 184 | {mapping, "erlang.schedulers.compaction_of_load", "vm_args.+scl", [ 185 | {default, false}, 186 | merge 187 | ]}. 188 | 189 | 190 | %% @doc erlang vm shutdown_time is useful when running a riak_test devrel 191 | {mapping, "erlang.shutdown_time", "vm_args.-shutdown_time", [ 192 | {default, "10s"}, 193 | {datatype, {duration, ms}} 194 | ]}. 195 | 196 | %% @doc Election timeout. 197 | {mapping, "zraft.election-timeout", "zraft_lib.election_timeout", [ 198 | {default,"500ms"}, 199 | {datatype, {duration, ms}} 200 | ]}. 201 | 202 | %% @doc Request timeout. Default is double election timeout. 203 | {mapping, "zraft.request-timeout", "zraft_lib.request_timeout", [ 204 | {datatype, {duration, ms}} 205 | ]}. 206 | 207 | %% @doc Log entry count before snapshot. 208 | {mapping, "zraft.snapshot-threshold", "zraft_lib.max_log_count", [ 209 | {default,1000}, 210 | {datatype,integer} 211 | ]}. 212 | 213 | %% @doc Maximum size of raft log file in bytes. 214 | {mapping, "zraft.log-segment-size", "zraft_lib.max_segment_size", [ 215 | {default,10485760}, 216 | {datatype,integer} 217 | ]}. 218 | 219 | %% @doc Directory to write snapshots. 220 | {mapping, "zraft.raft-snapshot-dir", "zraft_lib.snapshot_dir", [ 221 | {default,"$(platform_data_dir)"}, 222 | {datatype,file} 223 | ]}. 224 | 225 | %% @doc Directory to write raft log files. 226 | {mapping, "zraft.raft-log-dir", "zraft_lib.log_dir", [ 227 | {default,"$(platform_data_dir)"}, 228 | {datatype,file} 229 | ]}. 230 | 231 | %% @doc Whether to enable backup snapshot. 232 | {mapping, "zraft.snapshot-backup", "zraft_lib.snapshot_backup", [ 233 | {default, off}, 234 | {datatype, flag} 235 | ]}. 236 | 237 | %% @doc Address that will be used for snapshot receive. 238 | {mapping, "zraft.snapshot-listen-if", "zraft_lib.snapshot_listener_addr", [ 239 | {default,"0.0.0.0"} 240 | ]}. 241 | 242 | %% @doc Port that will be used for snapshot receive. Default 0 - any free port. 243 | {mapping, "zraft.snapshot-listen-port", "zraft_lib.snapshot_listener_port", [ 244 | {default,0}, 245 | {datatype,integer} 246 | ]}. 247 | 248 | %% @doc Platform-specific installation paths (substituted by rebar) 249 | {mapping, "platform_bin_dir", "zraft_lib.platform_bin_dir", [ 250 | {datatype, directory}, 251 | {default, "./bin"} 252 | ]}. 253 | 254 | %% @see platform_bin_dir 255 | {mapping, "platform_data_dir", "zraft_lib.platform_data_dir", [ 256 | {datatype, directory}, 257 | {default, "./data"} 258 | ]}. 259 | 260 | %% @see platform_bin_dir 261 | {mapping, "platform_etc_dir", "zraft_lib.platform_etc_dir", [ 262 | {datatype, directory}, 263 | {default, "./etc"} 264 | ]}. 265 | 266 | %% @see platform_bin_dir 267 | {mapping, "platform_lib_dir", "zraft_lib.platform_lib_dir", [ 268 | {datatype, directory}, 269 | {default, "./lib"} 270 | ]}. 271 | 272 | %% @see platform_bin_dir 273 | {mapping, "platform_log_dir", "zraft_lib.platform_log_dir", [ 274 | {datatype, directory}, 275 | {default, "./log"} 276 | ]}. -------------------------------------------------------------------------------- /rebar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dreyk/zraft_lib/ead65c45df576be3758639e3fe3a46edefdeae1d/rebar -------------------------------------------------------------------------------- /rebar.config: -------------------------------------------------------------------------------- 1 | {erl_opts, [{parse_transform, lager_transform}, debug_info, warnings_as_errors]}. 2 | {cover_enabled, true}. 3 | {eunit_opts, [verbose, {report, {eunit_surefire, [{dir, "."}]}}]}. 4 | {eunit_compile_opts, [{parse_transform, lager_transform}, debug_info]}. 5 | {edoc_opts, [{preprocess, true}]}. 6 | {deps, [ 7 | {lager, "2.2.3", {git, "git://github.com/basho/lager.git", {tag, "2.2.3"}}}, 8 | {meck, ".*", {git, "git://github.com/eproxus/meck.git", {tag,"0.8.2"}}} 9 | ]}. 10 | {xref_checks, []}. 11 | {xref_queries, [{"(XC - UC) || (XU - X - B - \"(dtrace)\" : Mod)", []}]}. 12 | -------------------------------------------------------------------------------- /src/zraft_backend.erl: -------------------------------------------------------------------------------- 1 | %% ------------------------------------------------------------------- 2 | %% @author Gunin Alexander 3 | %% Copyright (c) 2015 Gunin Alexander. All Rights Reserved. 4 | %% 5 | %% This file is provided to you under the Apache License, 6 | %% Version 2.0 (the "License"); you may not use this file 7 | %% except in compliance with the License. You may obtain 8 | %% a copy of the License at 9 | %% 10 | %% http://www.apache.org/licenses/LICENSE-2.0 11 | %% 12 | %% Unless required by applicable law or agreed to in writing, 13 | %% software distributed under the License is distributed on an 14 | %% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | %% KIND, either express or implied. See the License for the 16 | %% specific language governing permissions and limitations 17 | %% under the License. 18 | %% 19 | %% ------------------------------------------------------------------- 20 | -module(zraft_backend). 21 | -author("dreyk"). 22 | 23 | %% API 24 | -export([]). 25 | 26 | -type state() :: term(). 27 | -type read_cmd()::term(). 28 | -type write_cmd()::term(). 29 | -type snapshot_fun()::fun((file:filename()) -> ok). 30 | -type keys()::list(term()). 31 | 32 | %% init backend FSM 33 | -callback init(PeerId :: zraft_consensus:peer_id()) -> state(). 34 | 35 | %% read/query data from FSM 36 | -callback query(ReadCmd :: read_cmd(), State :: state()) -> {ok, Data :: term()} | {ok,WatchKeys :: keys(),Data::term()}. 37 | 38 | %% write data to FSM 39 | -callback apply_data(WriteCmd :: write_cmd(), State :: state()) -> 40 | {Result, State :: state()} | {Result,TriggerKesy::keys(),State::state()} 41 | when Result :: term(). 42 | 43 | %% write data to FSM 44 | -callback apply_data(WriteCmd :: write_cmd(),Session :: zraft_consensus:csession(),State::state()) -> 45 | {Result, State :: state()} | {Result,TriggerKesy::keys(),State::state()} 46 | when Result :: term(). 47 | 48 | -callback expire_session(Session :: zraft_consensus:csession(), State :: state())-> 49 | {ok,State::state()}|{ok,TriggerKesy::keys(),State::state()}. 50 | 51 | %% Prepare FSM to take snapshot async if it's possible otherwice return function to take snapshot immediatly 52 | -callback snapshot(State :: state())->{sync, snapshot_fun(), state()} | {async, snapshot_fun(), state()}. 53 | 54 | %% Notify that snapshot has done. 55 | -callback snapshot_done(State :: state())->{ok, NewState :: state()}. 56 | 57 | %% Notify that snapshot has failed. 58 | -callback snapshot_failed(Reason :: term(), State :: state())->{ok,state()}. 59 | 60 | %% Read data from snapshot file or directiory. 61 | -callback install_snapshot(FileName :: file:filename(), State :: state()|undefined)-> {ok,state()} | {error, Reason :: term}. 62 | -------------------------------------------------------------------------------- /src/zraft_client.erl: -------------------------------------------------------------------------------- 1 | %% ------------------------------------------------------------------- 2 | %% @author Gunin Alexander 3 | %% Copyright (c) 2015 Gunin Alexander. All Rights Reserved. 4 | %% 5 | %% This file is provided to you under the Apache License, 6 | %% Version 2.0 (the "License"); you may not use this file 7 | %% except in compliance with the License. You may obtain 8 | %% a copy of the License at 9 | %% 10 | %% http://www.apache.org/licenses/LICENSE-2.0 11 | %% 12 | %% Unless required by applicable law or agreed to in writing, 13 | %% software distributed under the License is distributed on an 14 | %% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | %% KIND, either express or implied. See the License for the 16 | %% specific language governing permissions and limitations 17 | %% under the License. 18 | %% 19 | %% ------------------------------------------------------------------- 20 | -module(zraft_client). 21 | -author("dreyk"). 22 | 23 | %% API 24 | -export([ 25 | query/3, 26 | write/3, 27 | get_conf/1, 28 | get_conf/2, 29 | light_session/1, 30 | light_session/3, 31 | create/2, 32 | create/3, 33 | set_new_conf/4, 34 | check_exists/1 35 | ]). 36 | 37 | -export_type([ 38 | apply_conf_error/0 39 | ]). 40 | 41 | -include("zraft.hrl"). 42 | 43 | -ifdef(TEST). 44 | -include_lib("eunit/include/eunit.hrl"). 45 | -endif. 46 | 47 | 48 | -define(TIMEOUT, 5000). 49 | -define(CREATE_TIMEOUT, 5000). 50 | -define(BACKOFF, 3000). 51 | 52 | 53 | %%%=================================================================== 54 | %%% Read/Write 55 | %%%=================================================================== 56 | -spec light_session(Conf) -> zraft_session_obj:light_session() | {error, Reason} when 57 | Conf :: list(zraft_consensus:peer_id())|zraft_consensus:peer_id(), 58 | Reason :: no_peers|term(). 59 | %% @doc Create light session for read/write operations. 60 | %% @equiv light_session(Conf,zraft_consensus:get_election_timeout()*2,zraft_consensus:get_election_timeout()) 61 | %% @end 62 | light_session(Conf) -> 63 | E = zraft_consensus:get_election_timeout(), 64 | light_session(Conf, E * 2, E). 65 | 66 | -spec light_session(Conf, BackOff, Election) -> zraft_session_obj:light_session() | {error, Reason} when 67 | Conf :: list(zraft_consensus:peer_id())|zraft_consensus:peer_id(), 68 | BackOff :: timeout(), 69 | Election :: timeout(), 70 | Reason :: no_peers|term(). 71 | %% @doc Create light session for read/write operations 72 | %% 73 | %% Use it for create object that will be used to batch read/write operation. 74 | %% 75 | %% If Conf is single PeerID then quorum configuration will be read. 76 | %% In that case it may return error. 77 | %% 78 | %% You must crate new object scine you know that quorum configuration has been changed. 79 | %% 80 | %% If Conf is empty list then {error,no_peers} will be returned. 81 | %% @end 82 | light_session([_F | _] = Peers, BackOff, Election) -> 83 | zraft_session_obj:create(Peers, BackOff, Election); 84 | light_session([], _BackOff, _Election) -> 85 | {error, no_peers}; 86 | light_session(PeerID, BackOff, Election) -> 87 | case get_conf(PeerID) of 88 | {ok, {Leader, Peers}} -> 89 | S1 = zraft_session_obj:create(Peers, BackOff, Election), 90 | zraft_session_obj:set_leader(Leader, S1); 91 | Error -> 92 | Error 93 | end. 94 | 95 | -spec query(Raft, Query, Timeout) -> {Result, NewRaftConf}|RuntimeError when 96 | Raft :: zraft_session_obj:light_session()|zraft_consensus:peer_id(), 97 | Query :: term(), 98 | Timeout :: timeout(), 99 | Result :: term(), 100 | NewRaftConf :: zraft_session_obj:light_session()|zraft_consensus:peer_id(), 101 | RuntimeError :: {error, timeout}|{error, noproc}. 102 | %% @doc Read data from state machine. 103 | %% 104 | %% Query parameter value depends on backend type used for state machine. 105 | %% 106 | %% If Raft is single peer than it will try to read data from it. Request will be redirected to leader 107 | %% if that peer is follower or canditate. If peer is unreachable or is going down request will fail with error {error,noproc}. 108 | %% 109 | %% If Raft is light session object and current leader is going down it will retry requet to other peer and so on 110 | %% until receive respose or timeout. 111 | %% @end 112 | query(Raft, Query, Timeout) -> 113 | Fun = fun(ID) -> zraft_consensus:query(ID, Query, Timeout) end, 114 | peer_execute(Raft, Fun, Timeout). 115 | 116 | -spec write(Raft, Data, Timeout) -> {Result, NewRaftConf}|RuntimeError when 117 | Raft :: zraft_session_obj:light_session()|zraft_consensus:peer_id(), 118 | Data :: term(), 119 | Timeout :: timeout(), 120 | Result :: term(), 121 | NewRaftConf :: zraft_session_obj:light_session()|zraft_consensus:peer_id(), 122 | RuntimeError :: {error, timeout}|{error, noproc}. 123 | %% @doc Write data to state machine. 124 | %% 125 | %% Data parameter value depends on backend type used for state machine. 126 | %% 127 | %% If Raft is single peer than it will try to write data from it. Request will be redirected to leader 128 | %% if that peer is follower or canditate. If peer is unreachable or is going down request will fail with error {error,noproc}. 129 | %% 130 | %% If Raft is light session object and current leader is going down it will retry requet to other peer and so on 131 | %% until receive respose or timeout. 132 | %% @end 133 | write(Raft, Data, Timeout) -> 134 | Fun = fun(ID) -> zraft_consensus:write(ID, Data, Timeout) end, 135 | peer_execute(Raft, Fun, Timeout). 136 | 137 | 138 | %%%=================================================================== 139 | %%% Configuration 140 | %%%=================================================================== 141 | -spec get_conf(PeerID) -> {ok, {Leader, Peers}}|{error, term()} when 142 | PeerID :: zraft_consensus:peer_id(), 143 | Leader :: zraft_consensus:peer_id(),%%Current leader 144 | Peers :: list(zraft_consensus:peer_id()). 145 | %% @doc Read raft consensus configuaration. 146 | %% @equiv get_conf(PeerID,5000) 147 | %% @end 148 | get_conf(PeerID) -> 149 | get_conf(PeerID, ?TIMEOUT). 150 | 151 | -spec get_conf(PeerID, Timeout) -> {ok, {Leader, Peers}}|{error, term()} when 152 | PeerID :: zraft_consensus:peer_id(), 153 | Timeout :: timeout(), 154 | Leader :: zraft_consensus:peer_id(), 155 | Peers :: list(zraft_consensus:peer_id()). 156 | %% @doc Read raft consensus configuaration. 157 | %% 158 | %% PeerID may be any peer in quorum. If it's not a leader, request will be redirected to the current leader 159 | %% If leader losts lidership or goes down during execution it may return runtime error or {error,timeout}. 160 | %% In that case you may retry request. 161 | %% @end 162 | get_conf(PeerID, Timeout) -> 163 | case wait_stable_conf(PeerID, Timeout) of 164 | {ok, {Leader, _Index, Peers}} -> 165 | {ok, {Leader, Peers}}; 166 | Error -> 167 | Error 168 | end. 169 | 170 | -type apply_conf_error() :: leader_changed|not_stable|newer_exists|process_prev_change|timeout. 171 | %% 172 | -spec set_new_conf(Peer, NewPeers, OldPeers, Timeout) -> Result when 173 | Peer :: zraft_consensus:peer_id(), 174 | NewPeers :: list(zraft_consensus:peer_id()), 175 | OldPeers :: list(zraft_consensus:peer_id()), 176 | Timeout :: timeout(), 177 | Result :: {ok, list(zraft_consensus:peer_id())}|{error, apply_conf_error()}. 178 | set_new_conf(PeerID, NewPeers, OldPeers, Timeout) -> 179 | NewSorted = ordsets:from_list(NewPeers), 180 | case wait_stable_conf(PeerID, Timeout) of 181 | {ok, {_Leader, _Index, NewSorted}} -> 182 | {ok, NewPeers}; 183 | {ok, {Leader, Index, HasPeers}} -> 184 | case ordsets:from_list(OldPeers) of 185 | HasPeers -> 186 | case catch zraft_consensus:set_new_configuration(Leader, Index, NewSorted, Timeout) of 187 | ok -> {ok, NewPeers}; 188 | {leader, _NewLeader} -> 189 | {error, leader_changed}; 190 | Else -> 191 | format_error(Else) 192 | end; 193 | _ -> 194 | {error, peers_changed} 195 | end; 196 | Else -> 197 | Else 198 | end. 199 | %%%=================================================================== 200 | %%% Create new quorum 201 | %%%=================================================================== 202 | -spec create(Peers, BackEnd) -> {ok, ResultPeers}|{error, term()} when 203 | Peers :: list(zraft_consensus:peer_id()), 204 | BackEnd :: module(), 205 | ResultPeers :: list(zraft_consensus:peer_id()). 206 | %% @doc Create new quorum. 207 | %% @equiv create(lists:nth(1,Peers),Peers,UseBackend) 208 | %% @end 209 | create(Peers, UseBackend) -> 210 | [FirstPeer | _] = Peers, 211 | case FirstPeer of 212 | {_, Node} when Node =:= node() -> 213 | create(FirstPeer, Peers, UseBackend); 214 | {_, Node} -> 215 | rpc:call(Node, ?MODULE, create, [FirstPeer, Peers, UseBackend]) 216 | end. 217 | -spec create(FirstPeer, Peers, BackEnd) -> {ok, ResultPeers}|{error, StartError|StartErrors}|{error, ApplyConfError} when 218 | FirstPeer :: zraft_consensus:peer_id(), 219 | Peers :: list(zraft_consensus:peer_id()), 220 | BackEnd :: module(), 221 | ResultPeers :: list(zraft_consensus:peer_id()), 222 | StartError :: {zraft_consensus:peer_id(), already_present|nodedown, term()}, 223 | StartErrors :: list(StartError), 224 | ApplyConfError :: apply_conf_error(). 225 | %% @doc Create new quorum. 226 | %% 227 | %% First it will be initialized FirstPeer. After that all other peers will be started and new configuration 228 | %% will be applied to the FirstPeer and replicated to other. 229 | %% 230 | %% It returns error in following cases: 231 | %% 232 | %% 1. Some peer has been alredy started or can't be started 233 | %% 234 | %% 2. Can't apply new configuration to peers. 235 | %% @end 236 | create(FirstPeer, AllPeers, UseBackend) -> 237 | case lists:foldl(fun(P, Acc) -> 238 | case check_exists(P) of 239 | ok -> 240 | Acc; 241 | {error, Error} -> 242 | [{P, Error} | Acc] 243 | end end, [], AllPeers) of 244 | [] -> 245 | case start_peers(UseBackend, AllPeers) of 246 | ok -> 247 | case catch zraft_consensus:initial_bootstrap(FirstPeer) of 248 | ok -> 249 | set_new_conf(FirstPeer, AllPeers, [FirstPeer], ?CREATE_TIMEOUT); 250 | Else -> 251 | format_error(Else) 252 | end; 253 | Else -> 254 | Else 255 | end; 256 | Errors -> 257 | {error, Errors} 258 | end. 259 | 260 | -spec start_peers(module(), list(zraft_consensus:peer_id())) -> 261 | ok|{error, {already_present, zraft_consensus:peer_id()}}|{error, {zraft_consensus:peer_id(), term()}}. 262 | start_peers(UseBackEnd, [P | T]) -> 263 | Result = case P of 264 | {_Name, Node} when Node =:= node() -> 265 | zraft_lib_sup:start_consensus(P, UseBackEnd); 266 | {_Name, Node} -> 267 | rpc:call(Node, zraft_lib_sup, start_consensus, [P, UseBackEnd]) 268 | end, 269 | case Result of 270 | {ok, _} -> 271 | start_peers(UseBackEnd, T); 272 | {error, already_present} -> 273 | {error, {P, already_present}}; 274 | {error, {'already_started', _}} -> 275 | {error, {P, already_present}}; 276 | {badrpc, Error} -> 277 | {error, {P, Error}}; 278 | {error, Reason} -> 279 | {error, {P, Reason}} 280 | end; 281 | start_peers(_UseBackEnd, [])-> 282 | ok. 283 | 284 | -spec check_exists(zraft_consensus:peer_id()) -> ok | {error, exists}. 285 | check_exists(Peer = {Name, Node}) when Node =:= node() -> 286 | PeerDir = filename:join([zraft_util:get_env(log_dir, "data"), zraft_util:peer_name(Peer)]), 287 | case file:list_dir(PeerDir) of 288 | {ok, _} -> 289 | {error, already_present}; 290 | _ -> 291 | case erlang:whereis(Name) of 292 | P when is_pid(P) -> 293 | {error, already_present}; 294 | _ -> 295 | ok 296 | end 297 | end; 298 | check_exists(Peer = {_Name, Node}) -> 299 | case rpc:call(Node, ?MODULE, check_exists, [Peer]) of 300 | {badrpc, Error} -> 301 | {error, Error}; 302 | Result -> 303 | Result 304 | end. 305 | %%%=================================================================== 306 | %%% Private 307 | %%%=================================================================== 308 | 309 | peer_execute(Raft, Fun, Timeout) -> 310 | Start = os:timestamp(), 311 | case zraft_session_obj:is_session(Raft) of 312 | true -> 313 | peer_execute_sessions(Raft, Fun, Start, Timeout); 314 | _ -> 315 | peer_execute(Raft, Fun, Start, Timeout) 316 | end. 317 | peer_execute(PeerID, Fun, Start, Timeout) -> 318 | case catch Fun(PeerID) of 319 | {ok, Result} -> 320 | {Result, PeerID}; 321 | {leader, NewLeader} -> 322 | case zraft_util:is_expired(Start, Timeout) of 323 | true -> 324 | {error, timeout}; 325 | {false, _Timeout1} -> 326 | peer_execute(NewLeader, Fun, os:timestamp(), Timeout) 327 | end; 328 | {error, loading}-> 329 | case zraft_util:is_expired(Start, Timeout) of 330 | true -> 331 | {error, timeout}; 332 | {false, _Timeout1} -> 333 | peer_execute(PeerID, Fun, os:timestamp(), Timeout) 334 | end; 335 | Else -> 336 | format_error(Else) 337 | end. 338 | peer_execute_sessions(Session, Fun, Start, Timeout) -> 339 | Leader = zraft_session_obj:leader(Session), 340 | Next = case catch Fun(Leader) of 341 | {ok, Result} -> 342 | {Result, Session}; 343 | {leader, NewLeader} when NewLeader /= undefined -> 344 | case zraft_session_obj:change_leader(NewLeader, Session) of 345 | {error, etimeout} -> 346 | timer:sleep(zraft_consensus:get_election_timeout()), 347 | {continue, Session}; 348 | {error, all_failed} -> 349 | {error, all_failed}; 350 | Session1 -> 351 | {continue, Session1} 352 | end; 353 | {error, loading}-> 354 | {continue, Session}; 355 | _Else -> 356 | case zraft_session_obj:fail(Session) of 357 | {error, Err} -> 358 | {error, Err}; 359 | Session2 -> 360 | {continue, Session2} 361 | end 362 | end, 363 | case Next of 364 | {continue, NextSession} -> 365 | case zraft_util:is_expired(Start, Timeout) of 366 | true -> 367 | {error, timeout}; 368 | {false, _Timeout1} -> 369 | peer_execute_sessions(NextSession, Fun, Start, Timeout) 370 | end; 371 | Else -> 372 | Else 373 | end. 374 | 375 | 376 | %% @private 377 | wait_stable_conf(Peer, Timeout) -> 378 | wait_stable_conf(Peer,[], os:timestamp(), Timeout). 379 | 380 | %% @private 381 | wait_stable_conf(Peer,FallBack, Start, Timeout) -> 382 | case zraft_util:is_expired(Start, Timeout) of 383 | true -> 384 | {error, timeout}; 385 | {false, _Timeout1} -> 386 | case catch zraft_consensus:get_conf(Peer, Timeout) of 387 | {leader, undefined} -> 388 | timer:sleep(zraft_consensus:get_election_timeout()), 389 | wait_stable_conf(Peer,FallBack, Start, Timeout); 390 | {leader, NewLeader} -> 391 | wait_stable_conf(NewLeader,[Peer|FallBack],Start, Timeout); 392 | {error, loading}-> 393 | timer:sleep(zraft_consensus:get_election_timeout()), 394 | wait_stable_conf(Peer,FallBack,Start, Timeout); 395 | {ok, {0, _}} -> 396 | timer:sleep(zraft_consensus:get_election_timeout()), 397 | wait_stable_conf(Peer,FallBack,Start, Timeout); 398 | {ok, {Index, Peers}} -> 399 | {ok, {Peer, Index, Peers}}; 400 | retry -> 401 | timer:sleep(zraft_consensus:get_election_timeout()), 402 | wait_stable_conf(Peer,FallBack,Start, Timeout); 403 | Error -> 404 | lager:error("Can'r read conf from ~p:~p",[Peer,Error]), 405 | case FallBack of 406 | []-> 407 | format_error(Error); 408 | [TryOld|Other]-> 409 | wait_stable_conf(TryOld,Other,Start, Timeout) 410 | end 411 | end 412 | end. 413 | 414 | %% @private 415 | format_error({'EXIT', _Reason}) -> 416 | {error, noproc}; 417 | format_error({error, _} = Error) -> 418 | Error; 419 | format_error(Error) -> 420 | {error, Error}. 421 | -------------------------------------------------------------------------------- /src/zraft_dict_backend.erl: -------------------------------------------------------------------------------- 1 | %% ------------------------------------------------------------------- 2 | %% @author Gunin Alexander 3 | %% Copyright (c) 2015 Gunin Alexander. All Rights Reserved. 4 | %% 5 | %% This file is provided to you under the Apache License, 6 | %% Version 2.0 (the "License"); you may not use this file 7 | %% except in compliance with the License. You may obtain 8 | %% a copy of the License at 9 | %% 10 | %% http://www.apache.org/licenses/LICENSE-2.0 11 | %% 12 | %% Unless required by applicable law or agreed to in writing, 13 | %% software distributed under the License is distributed on an 14 | %% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | %% KIND, either express or implied. See the License for the 16 | %% specific language governing permissions and limitations 17 | %% under the License. 18 | %% 19 | %% ------------------------------------------------------------------- 20 | -module(zraft_dict_backend). 21 | -author("dreyk"). 22 | 23 | -behaviour(zraft_backend). 24 | 25 | -export([ 26 | init/1, 27 | query/2, 28 | apply_data/2, 29 | apply_data/3, 30 | snapshot/1, 31 | snapshot_done/1, 32 | snapshot_failed/2, 33 | install_snapshot/2, 34 | expire_session/2]). 35 | 36 | -record(session,{v,s}). 37 | 38 | %% @doc init backend FSM 39 | init(_) -> 40 | {ok,dict:new()}. 41 | 42 | query(Fn,Dict) when is_function(Fn)-> 43 | V = Fn(Dict), 44 | {ok,V}; 45 | query(Key,Dict) -> 46 | case dict:find(Key,Dict) of 47 | error-> 48 | {ok,[Key],not_found}; 49 | {ok,#session{v=V}}-> 50 | {ok,[Key],{ok,V}}; 51 | V-> 52 | {ok,[Key],V} 53 | end. 54 | 55 | %% @doc write data to FSM 56 | apply_data({K,V},Dict)-> 57 | Dict1 = dict:store(K,V,Dict), 58 | {ok,[K],Dict1}. 59 | 60 | apply_data({K,V},Session,Dict)-> 61 | Dict1 = dict:store(K,#session{v=V,s = Session},Dict), 62 | {ok,[K],Dict1}. 63 | 64 | expire_session(Session,Dict)-> 65 | {T,D}=dict:fold(fun(K,V,{A1,A2})-> 66 | case V of 67 | #session{s = Session}-> 68 | {[K|A1],A2}; 69 | _-> 70 | {A1,[{K,V}|A2]} 71 | end end,{[],[]},Dict), 72 | {ok,T,dict:from_list(D)}. 73 | 74 | %% @doc Prepare FSM to take snapshot async if it's possible otherwice return function to take snapshot immediatly 75 | snapshot(Dict)-> 76 | Fun = fun(ToDir)-> 77 | File = filename:join(ToDir,"state"), 78 | {ok,FD}=file:open(File,[write,raw,binary]), 79 | lists:foreach(fun(E)-> 80 | V1 = term_to_binary(E), 81 | Size = size(V1), 82 | Row = <<0:8,Size:64,V1/binary>>, 83 | file:write(FD,Row) 84 | end,dict:to_list(Dict)), 85 | ok = file:close(FD), 86 | ok 87 | end, 88 | {async,Fun,Dict}. 89 | 90 | %% @doc Notify that snapshot has done. 91 | snapshot_done(Dict)-> 92 | {ok,Dict}. 93 | 94 | %% @doc Notify that snapshot has failed. 95 | snapshot_failed(_Reason,Dict)-> 96 | {ok,Dict}. 97 | 98 | %% @doc Read data from snapshot file or directiory. 99 | install_snapshot(Dir,_)-> 100 | File = filename:join(Dir,"state"), 101 | {ok,FD}=file:open(File,[read,raw,binary]), 102 | Res = case read(FD,[]) of 103 | {ok,Data}-> 104 | {ok,dict:from_list(Data)}; 105 | Else-> 106 | Else 107 | end, 108 | file:close(FD), 109 | Res. 110 | 111 | read(FD,Acc)-> 112 | case file:read(FD,9) of 113 | {ok,<<0:8,Size:64>>}-> 114 | case file:read(FD,Size) of 115 | {ok,B}-> 116 | case catch binary_to_term(B) of 117 | {'EXIT', _}-> 118 | {error,file_corrupted}; 119 | {K,V}-> 120 | read(FD,[{K,V}|Acc]); 121 | _-> 122 | {error,file_corrupted} 123 | end; 124 | _-> 125 | {error,file_corrupted} 126 | end; 127 | eof-> 128 | {ok,Acc}; 129 | _-> 130 | {error,file_corrupted} 131 | end. -------------------------------------------------------------------------------- /src/zraft_lib.app.src: -------------------------------------------------------------------------------- 1 | {application, zraft_lib, 2 | [ 3 | {description, "Erlang RAFT protocol implementation"}, 4 | {vsn, "0.5.0"}, 5 | {registered, []}, 6 | {applications, [ 7 | kernel, 8 | stdlib, 9 | lager 10 | ]}, 11 | {mod, {zraft_lib_app, []}}, 12 | {env, []} 13 | ]}. -------------------------------------------------------------------------------- /src/zraft_lib_app.erl: -------------------------------------------------------------------------------- 1 | %% ------------------------------------------------------------------- 2 | %% @author Gunin Alexander 3 | %% Copyright (c) 2015 Gunin Alexander. All Rights Reserved. 4 | %% 5 | %% This file is provided to you under the Apache License, 6 | %% Version 2.0 (the "License"); you may not use this file 7 | %% except in compliance with the License. You may obtain 8 | %% a copy of the License at 9 | %% 10 | %% http://www.apache.org/licenses/LICENSE-2.0 11 | %% 12 | %% Unless required by applicable law or agreed to in writing, 13 | %% software distributed under the License is distributed on an 14 | %% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | %% KIND, either express or implied. See the License for the 16 | %% specific language governing permissions and limitations 17 | %% under the License. 18 | %% 19 | %% ------------------------------------------------------------------- 20 | -module(zraft_lib_app). 21 | -author("dreyk"). 22 | 23 | -behaviour(application). 24 | 25 | -export([ 26 | start/0, 27 | start/2, 28 | stop/1 29 | ]). 30 | 31 | start(_StartType, _StartArgs) -> 32 | zraft_lib_sup:start_link(). 33 | 34 | stop(_State) -> 35 | ok. 36 | 37 | 38 | %%Start in test console 39 | start()-> 40 | spawn(fun()-> 41 | ok = zraft_util:start_app(zraft_lib) end). 42 | -------------------------------------------------------------------------------- /src/zraft_lib_sup.erl: -------------------------------------------------------------------------------- 1 | %% ------------------------------------------------------------------- 2 | %% @author Gunin Alexander 3 | %% Copyright (c) 2015 Gunin Alexander. All Rights Reserved. 4 | %% 5 | %% This file is provided to you under the Apache License, 6 | %% Version 2.0 (the "License"); you may not use this file 7 | %% except in compliance with the License. You may obtain 8 | %% a copy of the License at 9 | %% 10 | %% http://www.apache.org/licenses/LICENSE-2.0 11 | %% 12 | %% Unless required by applicable law or agreed to in writing, 13 | %% software distributed under the License is distributed on an 14 | %% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | %% KIND, either express or implied. See the License for the 16 | %% specific language governing permissions and limitations 17 | %% under the License. 18 | %% 19 | %% ------------------------------------------------------------------- 20 | -module(zraft_lib_sup). 21 | -author("dreyk"). 22 | 23 | -behaviour(supervisor). 24 | 25 | -export([start_link/0]). 26 | 27 | -export([init/1,start_consensus/2,start_consensus/1]). 28 | 29 | -include("zraft.hrl"). 30 | 31 | -spec(start_link() -> 32 | {ok, Pid :: pid()} | ignore | {error, Reason :: term()}). 33 | start_link() -> 34 | supervisor:start_link({local, ?MODULE}, ?MODULE, []). 35 | 36 | -spec(init(Args :: term()) -> 37 | {ok, {SupFlags :: {RestartStrategy :: supervisor:strategy(), 38 | MaxR :: non_neg_integer(), MaxT :: non_neg_integer()}, 39 | [ChildSpec :: supervisor:child_spec()] 40 | }}). 41 | init([]) -> 42 | Timeout = max(1,round(zraft_consensus:get_election_timeout()*4/1000)), 43 | SupFlags = {one_for_one,2,Timeout}, 44 | Peers = read_peers(), 45 | {ok, {SupFlags, Peers}}. 46 | 47 | -spec start_consensus(zraft_consensus:peer_id(),module()) -> supervisor:startchild_ret(). 48 | start_consensus(PeerID,BackEnd)-> 49 | Spec = consensus_spec([PeerID,BackEnd]), 50 | start_result(supervisor:start_child(?MODULE, Spec)). 51 | 52 | -spec start_consensus(zraft_consensus:peer_id()) -> supervisor:startchild_ret(). 53 | start_consensus(PeerID)-> 54 | Spec = consensus_spec([PeerID]), 55 | start_result(supervisor:start_child(?MODULE, Spec)). 56 | 57 | start_result({ok,P})-> 58 | {ok,P}; 59 | start_result({error,{already_started,_}})-> 60 | {error,already_created}; 61 | start_result({error,already_present})-> 62 | {error,already_created}; 63 | start_result(Err)-> 64 | Err. 65 | 66 | %% @private 67 | consensus_spec([{PeerName,_}|_]=Args) -> 68 | { 69 | PeerName, 70 | {zraft_consensus, start_link,Args}, 71 | permanent, 72 | 5000, 73 | worker, 74 | [zraft_consensus] 75 | }. 76 | 77 | %%@private 78 | read_peers()-> 79 | DataDir = zraft_util:get_env(log_dir, "data"), 80 | case file:list_dir(DataDir) of 81 | {ok,Dirs}-> 82 | read_peers(DataDir,Dirs,[]); 83 | _-> 84 | [] 85 | end. 86 | 87 | read_peers(DataDir,[Dir|T],Acc)-> 88 | RaftDir = filename:join(DataDir,Dir), 89 | case zraft_fs_log:load_raft_meta(RaftDir) of 90 | {ok,#raft_meta{id = Peer,back_end = BackEnd}}-> 91 | Spec = consensus_spec([Peer,BackEnd]), 92 | read_peers(DataDir,T,[Spec|Acc]); 93 | _-> 94 | lager:warning("~p does't contain peer meta",[RaftDir]), 95 | read_peers(DataDir,T,Acc) 96 | end; 97 | read_peers(_DataDir,[],Acc)-> 98 | Acc. 99 | -------------------------------------------------------------------------------- /src/zraft_log_util.erl: -------------------------------------------------------------------------------- 1 | %% ------------------------------------------------------------------- 2 | %% @author Gunin Alexander 3 | %% Copyright (c) 2015 Gunin Alexander. All Rights Reserved. 4 | %% 5 | %% This file is provided to you under the Apache License, 6 | %% Version 2.0 (the "License"); you may not use this file 7 | %% except in compliance with the License. You may obtain 8 | %% a copy of the License at 9 | %% 10 | %% http://www.apache.org/licenses/LICENSE-2.0 11 | %% 12 | %% Unless required by applicable law or agreed to in writing, 13 | %% software distributed under the License is distributed on an 14 | %% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | %% KIND, either express or implied. See the License for the 16 | %% specific language governing permissions and limitations 17 | %% under the License. 18 | %% 19 | %% ------------------------------------------------------------------- 20 | -module(zraft_log_util). 21 | -author("dreyk"). 22 | 23 | -include("zraft.hrl"). 24 | 25 | -export([ 26 | append_request/6 27 | ]). 28 | 29 | append_request(Epoch,CurentTerm,CommitIndex,PrevIndex, PrevTerm,Entries) -> 30 | Commit = min(CommitIndex, PrevIndex + length(Entries)), 31 | #append_entries{ 32 | epoch = Epoch, 33 | term = CurentTerm, 34 | entries = Entries, 35 | prev_log_index = PrevIndex, 36 | prev_log_term = PrevTerm, 37 | commit_index = Commit}. 38 | -------------------------------------------------------------------------------- /src/zraft_peer_proxy.erl: -------------------------------------------------------------------------------- 1 | %% ------------------------------------------------------------------- 2 | %% @author Gunin Alexander 3 | %% Copyright (c) 2015 Gunin Alexander. All Rights Reserved. 4 | %% 5 | %% This file is provided to you under the Apache License, 6 | %% Version 2.0 (the "License"); you may not use this file 7 | %% except in compliance with the License. You may obtain 8 | %% a copy of the License at 9 | %% 10 | %% http://www.apache.org/licenses/LICENSE-2.0 11 | %% 12 | %% Unless required by applicable law or agreed to in writing, 13 | %% software distributed under the License is distributed on an 14 | %% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | %% KIND, either express or implied. See the License for the 16 | %% specific language governing permissions and limitations 17 | %% under the License. 18 | %% 19 | %% ------------------------------------------------------------------- 20 | -module(zraft_peer_proxy). 21 | -author("dreyk"). 22 | 23 | -include("zraft.hrl"). 24 | 25 | -define(INFO(State,S, As),?MINFO("~p: "++S,[print_id(State)|As])). 26 | -define(INFO(State,S), ?MINFO("~p: "++S,[print_id(State)])). 27 | -define(ERROR(State,S, As),?MERROR("~p: "++S,[print_id(State)|As])). 28 | -define(ERROR(State,S), ?MERROR("~p: "++S,[print_id(State)])). 29 | -define(DEBUG(State,S, As),?MDEBUG("~p: "++S,[print_id(State)|As])). 30 | -define(DEBUG(State,S), ?MDEBUG("~p: "++S,[print_id(State)])). 31 | -define(WARNING(State,S, As),?MWARNING("~p: "++S,[print_id(State)|As])). 32 | -define(WARNING(State,S), ?MWARNING("~p: "++S,[print_id(State)])). 33 | 34 | -behaviour(gen_server). 35 | 36 | -export([start_link/4]). 37 | 38 | -export([init/1, 39 | handle_call/3, 40 | handle_cast/2, 41 | handle_info/2, 42 | terminate/2, 43 | code_change/3, 44 | value/3, 45 | stop/1, 46 | cmd/2, 47 | stat/2, 48 | stop_sync/1, 49 | lists_flatten/1]). 50 | 51 | -define(REQUEST_TIMEOUT,zraft_util:get_env(request_timeout,zraft_consensus:get_election_timeout()*2)). 52 | 53 | -record(snapshot_progress, {snapshot_dir, process, mref, index}). 54 | -record(state, { 55 | peer, 56 | remote_peer_id, 57 | raft, 58 | quorum_counter, 59 | request_timer, 60 | hearbeat_timer, 61 | request_ref, 62 | request_time, 63 | force_hearbeat = false, 64 | force_request = false, 65 | append_buffer, 66 | current_term = 0, 67 | current_epoch = 0, 68 | back_end, request_timeout, snapshot_progres,backoff,backoff_timeout}). 69 | 70 | 71 | stat(Peer,From)-> 72 | gen_server:cast(Peer,{stat,From}). 73 | 74 | value(Peer, From, GetIndex) -> 75 | gen_server:cast(Peer, {get, From, GetIndex}). 76 | 77 | stop(Peer) -> 78 | gen_server:cast(Peer, stop). 79 | 80 | stop_sync(Peer) -> 81 | gen_server:call(Peer, stop). 82 | 83 | cmd(Peer, Cmd) -> 84 | gen_server:cast(Peer, Cmd). 85 | 86 | start_link(Raft,QuorumCounter,PeerID, BackEnd) -> 87 | gen_server:start_link(?MODULE, [Raft,QuorumCounter,PeerID, BackEnd], []). 88 | 89 | init([Raft,QuorumCounter,PeerID, BackEnd]) -> 90 | gen_server:cast(self(), start_peer), 91 | %%brodcast up message. To prevent restarted peer become leader. ReqTimeout is double election timeout 92 | case Raft of 93 | {PeerID,_}-> 94 | ok; 95 | _-> 96 | zraft_peer_route:cmd(PeerID,{peer_up,Raft}) 97 | end, 98 | ReqTimeout = ?REQUEST_TIMEOUT, 99 | {ok, #state{ 100 | raft = Raft, 101 | quorum_counter = QuorumCounter, 102 | back_end = BackEnd, 103 | peer = #peer{id = PeerID}, 104 | request_timeout = ReqTimeout 105 | }}. 106 | 107 | handle_call(force_hearbeat_timeout, _, State = #state{hearbeat_timer = Timer}) -> 108 | if 109 | Timer == undefined -> 110 | {reply, no_timer, State}; 111 | true -> 112 | _ = zraft_util:gen_server_cancel_timer(Timer), 113 | Timer1 = zraft_util:gen_server_cast_after(0, hearbeat_timeout), 114 | {reply, ok, State#state{hearbeat_timer = Timer1}} 115 | end; 116 | handle_call(force_request_timeout, _, State = #state{request_timer = Timer}) -> 117 | if 118 | Timer == undefined -> 119 | {reply, no_timer, State}; 120 | true -> 121 | _ = zraft_util:gen_server_cancel_timer(Timer), 122 | Timer1 = zraft_util:gen_server_cast_after(1, request_timeout), 123 | {reply, ok, State#state{request_timer = Timer1}} 124 | end; 125 | handle_call(stop, _From, State) -> 126 | {stop, normal, ok, State}; 127 | handle_call(_Request, _From, State) -> 128 | {reply, ok, State}. 129 | 130 | handle_cast(stop, State) -> 131 | {stop, normal, State}; 132 | 133 | handle_cast(start_peer, State) -> 134 | {noreply, State}; 135 | 136 | handle_cast({peer_up,From},State=#state{request_ref = Ref})-> 137 | %%Need to prevent restarted peer try become leader. ReqTimeout is double election timeout 138 | if 139 | State#state.remote_peer_id==From-> 140 | {noreply,State}; 141 | Ref == undefined-> 142 | %%remote peer has been restarted 143 | %%no active request jsut set new pid 144 | State1 = cancel_backoff(State), 145 | {noreply,State1#state{remote_peer_id = From}}; 146 | true-> 147 | %%remote peer has been restarted 148 | %%send new requests 149 | State1 = cancel_backoff(State), 150 | progress(State1#state{remote_peer_id = From}) 151 | end; 152 | handle_cast(?LOST_LEADERSHIP_CMD, 153 | State = #state{peer = Peer}) -> 154 | State1 = reset_timers(true, State), 155 | State2 = reset_snapshot(State1), 156 | State3 = cancel_backoff(State2), 157 | State4 = State3#state{append_buffer = undefined,peer = Peer#peer{has_vote = false, epoch = 0}, current_term = 0}, 158 | {noreply, State4}; 159 | 160 | handle_cast(backoff_timeout, State = #state{backoff = Ref}) when Ref /= undefined -> 161 | ?INFO(State,"wake up backoff"), 162 | progress(State#state{backoff = undefined,force_hearbeat = true}); 163 | handle_cast(hearbeat_timeout, State = #state{request_ref = Ref}) when Ref /= undefined -> 164 | {noreply, State}; 165 | handle_cast(hearbeat_timeout, State) ->%%send new hearbeat 166 | case State#state.snapshot_progres of 167 | undefined -> 168 | State1 = start_replication(State), 169 | {noreply, State1}; 170 | _ -> 171 | %%Send herabeat to check update peer state 172 | State2 = install_snapshot_hearbeat(hearbeat, State), 173 | {noreply, State2} 174 | end; 175 | handle_cast(request_timeout, State = #state{request_ref = undefined}) -> 176 | ?WARNING(State,"There is't active request"), 177 | {noreply, State}; 178 | handle_cast(request_timeout, State) ->%%send new request 179 | State1 = backoff(State), 180 | {noreply, State1}; 181 | 182 | handle_cast({?BECOME_LEADER_CMD, HearBeat}, 183 | State = #state{peer = Peer}) ->%%peer has elected 184 | #append_entries{term = CurrentTerm, epoch = Epoch, prev_log_index = LastLogIndex} = HearBeat, 185 | State1 = reset_timers(true, State),%%discard all active requets 186 | State2 = reset_snapshot(State1),%%stop copy snaphsot 187 | State3 = cancel_backoff(State2), 188 | State4 = State3#state{ 189 | force_hearbeat = true,%% We must match followers log before start replication 190 | current_term = CurrentTerm, 191 | current_epoch = Epoch, 192 | peer = Peer#peer{last_agree_index = 0, next_index = LastLogIndex + 1}, 193 | append_buffer = undefined 194 | }, 195 | State5 = replicate(HearBeat, State4), 196 | {noreply, State5}; 197 | 198 | handle_cast({?OPTIMISTIC_REPLICATE_CMD, _Req}, 199 | State = #state{backoff_timeout = T}) when T /= undefined -> 200 | {noreply,State}; 201 | handle_cast({?OPTIMISTIC_REPLICATE_CMD, Req}, 202 | State = #state{request_ref = Ref}) when Ref /= undefined -> 203 | %%prev request has't finished yet 204 | #append_entries{term = Term, epoch = Epoch} = Req, 205 | State1 = case State#state.snapshot_progres of 206 | undefined-> 207 | Req1 = add_append(State#state.append_buffer,Req), 208 | State#state{append_buffer = Req1}; 209 | _-> 210 | State 211 | end, 212 | {noreply, State1#state{force_request = true, current_term = Term, current_epoch = Epoch}}; 213 | 214 | handle_cast({?OPTIMISTIC_REPLICATE_CMD, Req}, 215 | State = #state{snapshot_progres = undefined}) -> 216 | #append_entries{term = Term, epoch = Epoch} = Req, 217 | State1 = State#state{current_term = Term, current_epoch = Epoch}, 218 | State2 = replicate(Req, State1), 219 | {noreply, State2}; 220 | 221 | handle_cast({?OPTIMISTIC_REPLICATE_CMD, Req}, State) ->%%snapshot are being copied 222 | #append_entries{term = Term, epoch = Epoch} = Req, 223 | State1 = State#state{current_term = Term, current_epoch = Epoch}, 224 | %%Just send hearbeat 225 | State2 = install_snapshot_hearbeat(hearbeat, State1), 226 | {noreply, State2}; 227 | 228 | handle_cast(#append_reply{},State = #state{backoff = Ref}) when Ref /= undefined -> 229 | State1 = cancel_backoff(State), 230 | ?INFO(State,"Backof repler force hearbeat"), 231 | progress(State1#state{force_hearbeat = true}); 232 | handle_cast(#append_reply{from_peer = From,epoch = Epoch, success = true, agree_index = Index, request_ref = RF}, 233 | State = #state{force_request = FR, request_ref = RF}) -> 234 | State1 = update_peer(Index, Index + 1, Epoch,From,State), 235 | State2 = reset_timers(true, State1), 236 | State3 = if 237 | FR -> 238 | %%We have new entries to replicate 239 | start_replication(State2); 240 | true -> 241 | start_hearbeat_timer(State2) 242 | end, 243 | {noreply, State3}; 244 | 245 | handle_cast(#append_reply{term = PeerTerm}, 246 | State = #state{current_term = CurrentTerm,raft = Raft}) when PeerTerm > CurrentTerm -> 247 | %%Actualy CurrentTerm maybe out of date now, but it's not problem. We will receive new term or shutdown soon. 248 | ?WARNING(State,"Peer has new term(leader)"), 249 | zraft_consensus:maybe_step_down(Raft, PeerTerm), 250 | State1 = reset_timers(true, State), 251 | {noreply, State1#state{append_buffer = undefined}}; 252 | 253 | handle_cast(#append_reply{from_peer = From,request_ref = RF, last_index = LastIndex, epoch = Epoch}, 254 | State = #state{peer = Peer, request_ref = RF}) -> 255 | ?WARNING(State,"Peer out of date"), 256 | DecNext = Peer#peer.next_index - 1, 257 | NextIndex = max(1, min(LastIndex, DecNext)), 258 | State1 = update_peer(NextIndex, Epoch,From, State), 259 | progress(State1#state{append_buffer = undefined}); 260 | 261 | handle_cast(#append_reply{}, State) ->%%Out of date responce 262 | {noreply, State}; 263 | 264 | 265 | handle_cast(Req = #install_snapshot{request_ref = RF, term = Term, epoch = Epoch}, 266 | State = #state{request_ref = RF}) -> 267 | State1 = reset_timers(false, State), 268 | ?INFO(State,"Need Install snaphsot"), 269 | %%try start snapshot copy process 270 | State2 = install_snapshot(Req, State1#state{current_epoch = Epoch, current_term = Term,append_buffer = undefined}), 271 | {noreply, State2}; 272 | 273 | handle_cast(#install_snapshot{}, State) ->%%Out of date responce 274 | %%close all opened files 275 | {noreply, State}; 276 | 277 | handle_cast(Resp = #install_snapshot_reply{from_peer = From,result = start, request_ref = RF, epoch = Epoch}, 278 | State = #state{request_ref = RF, force_request = FR}) -> 279 | State1 = update_peer(Epoch,From, State), 280 | State2 = reset_timers(true, State1), 281 | State3 = start_copy_snapshot(Resp, State2), 282 | State4 = if 283 | FR -> 284 | install_snapshot_hearbeat(hearbeat, State3); 285 | true -> 286 | start_hearbeat_timer(State3) 287 | end, 288 | {noreply, State4}; 289 | handle_cast(#install_snapshot_reply{from_peer = From,result = hearbeat, request_ref = RF, epoch = Epoch}, 290 | State = #state{request_ref = RF, force_request = FR}) -> 291 | State1 = update_peer(Epoch,From, State), 292 | State2 = reset_timers(true, State1), 293 | State3 = if 294 | FR -> 295 | install_snapshot_hearbeat(hearbeat, State2); 296 | true -> 297 | start_hearbeat_timer(State2) 298 | end, 299 | {noreply, State3}; 300 | handle_cast(#install_snapshot_reply{index = Index,from_peer = From, result = finish, request_ref = RF, epoch = Epoch}, 301 | State = #state{request_ref = RF}) -> 302 | State1 = update_peer(Index, Index + 1, Epoch,From, State), 303 | State2 = reset_snapshot(State1), 304 | progress(State2#state{force_request = true}); 305 | 306 | %%Snapshot RPC failed 307 | handle_cast(#install_snapshot_reply{term = PeerTerm}, 308 | State = #state{current_term = CurrentTerm,raft = Raft}) when PeerTerm > CurrentTerm -> 309 | %%Actualy CurrentTerm maybe out of date now but is's not problem. We will receive new term or shutdown soon. 310 | ?WARNING(State,"Peer has new term(leader)"), 311 | zraft_consensus:maybe_step_down(Raft, PeerTerm), 312 | State1 = reset_timers(true, State), 313 | State2 = reset_snapshot(State1), 314 | {noreply, State2}; 315 | handle_cast(#install_snapshot_reply{from_peer = From,request_ref = RF, epoch = Epoch}, 316 | State = #state{request_ref = RF}) -> 317 | ?WARNING(State,"Copy snapshot failed"), 318 | State1 = update_peer(Epoch,From, State), 319 | State2 = reset_snapshot(State1), 320 | progress(State2#state{force_request = true}); 321 | 322 | handle_cast(#install_snapshot_reply{}, State) ->%%Out of date responce 323 | {noreply, State}; 324 | 325 | handle_cast({?UPDATE_CMD, Fun}, State = #state{peer = Peer,quorum_counter = C}) -> 326 | Peer1 = Fun(Peer), 327 | zraft_quorum_counter:sync(C,Peer1), 328 | {noreply, State#state{peer = Peer1}}; 329 | handle_cast({get, From, GetIndex}, State = #state{peer = Peer}) -> 330 | reply(From, erlang:element(GetIndex, Peer)), 331 | {noreply, State}; 332 | handle_cast({stat, From}, State = #state{peer = Peer,snapshot_progres = Progress}) -> 333 | IsSnapshoting = (Progress /= undefined), 334 | Stat = #proxy_peer_stat{peer_state = Peer,is_snapshoting = IsSnapshoting}, 335 | reply(From, {Peer#peer.id,Stat}), 336 | {noreply, State}; 337 | handle_cast(_Request, State) -> 338 | {noreply, State}. 339 | 340 | handle_info({timeout,_,{'$zraft_timeout', Event}},State)-> 341 | handle_cast(Event,State); 342 | handle_info({'DOWN', Ref, process, _, normal}, 343 | State = #state{snapshot_progres = #snapshot_progress{mref = Ref}}) -> 344 | ?INFO(State,"Snapshot has transfered."), 345 | State1 = reset_timers(false, State), 346 | State2 = install_snapshot_hearbeat(finish, State1), 347 | {noreply, State2#state{snapshot_progres = undefined, force_request = true,append_buffer = undefined}}; 348 | handle_info({'DOWN', Ref, process, _, Reason}, 349 | State = #state{snapshot_progres = #snapshot_progress{mref = Ref}}) -> 350 | ?ERROR(State,"Snapshot transfer failed ~p",[Reason]), 351 | progress(State#state{force_request = true, snapshot_progres = undefined,append_buffer = undefined}); 352 | handle_info(_, State) -> 353 | {noreply, State}. 354 | 355 | terminate(Reason, State=#state{snapshot_progres = Progress}) -> 356 | Reason==normal orelse ?WARNING(State,"Proxy is being stoped ~p",[Reason]), 357 | if 358 | Progress == undefined -> 359 | ok; 360 | true -> 361 | if 362 | is_pid(Progress#snapshot_progress.process) -> 363 | exit(Progress#snapshot_progress.process, kill); 364 | true -> 365 | ok 366 | end 367 | end. 368 | 369 | code_change(_OldVsn, State, _Extra) -> 370 | {ok, State}. 371 | 372 | start_replication(State = #state{append_buffer = Buffer}) when Buffer /= undefined-> 373 | replicate(undefined,State); 374 | start_replication(State) -> 375 | #state{peer = Peer, raft = Raft, force_hearbeat = FH, request_timeout = Timeout} = State, 376 | #peer{next_index = NextIndex, id = PeerID} = Peer, 377 | PrevIndex = NextIndex - 1, 378 | RequestRef = erlang:make_ref(), 379 | Req = #append_entries{ 380 | prev_log_index = PrevIndex, 381 | request_ref = RequestRef, 382 | entries = not FH, 383 | from = from_addr(State) 384 | }, 385 | zraft_consensus:replicate_log(Raft, PeerID, Req), 386 | Timer = zraft_util:gen_server_cast_after(Timeout, request_timeout), 387 | State#state{request_ref = RequestRef, request_timer = Timer,request_time = os:timestamp()}. 388 | replicate(Req, State) -> 389 | #state{peer = Peer,request_timeout = Timeout,append_buffer = Buffer} = State, 390 | Req1 = add_append(Buffer,Req), 391 | Req2 = append_flatten(Req1), 392 | #peer{id = PeerID} = Peer, 393 | RequestRef = erlang:make_ref(), 394 | #append_entries{commit_index = Commit,prev_log_index = Prev,entries = Entries}=Req2, 395 | NewCommitIndex = min(Commit,Prev+length(Entries)), 396 | zraft_peer_route:cmd( 397 | PeerID, 398 | Req2#append_entries{commit_index = NewCommitIndex,request_ref = RequestRef, from = from_addr(State)} 399 | ), 400 | Timer = zraft_util:gen_server_cast_after(Timeout, request_timeout), 401 | State#state{request_ref = RequestRef, request_timer = Timer,request_time = os:timestamp(),append_buffer = undefined}. 402 | 403 | install_snapshot(Req, State) -> 404 | #state{peer = Peer,request_timeout = Timeout} = State, 405 | #peer{id = PeerID} = Peer, 406 | SnapsotProgress = #snapshot_progress{snapshot_dir = Req#install_snapshot.data, index = Req#install_snapshot.index}, 407 | RequestRef = erlang:make_ref(), 408 | NewReq = Req#install_snapshot{data = start, request_ref = RequestRef, from = from_addr(State)}, 409 | zraft_peer_route:cmd(PeerID, NewReq), 410 | Timer = zraft_util:gen_server_cast_after(Timeout, request_timeout), 411 | State#state{ 412 | request_ref = RequestRef, 413 | request_timer = Timer, 414 | snapshot_progres = SnapsotProgress, 415 | request_time = os:timestamp()}. 416 | install_snapshot_hearbeat(Type, State) -> 417 | #state{ 418 | peer = Peer, 419 | request_timeout = Timeout, 420 | snapshot_progres = Progress, 421 | current_term = Term, 422 | current_epoch = Epoch 423 | } = State, 424 | #peer{id = PeerID} = Peer, 425 | #snapshot_progress{index = Index} = Progress, 426 | RequestRef = erlang:make_ref(), 427 | NewReq = #install_snapshot{ 428 | data = Type, 429 | request_ref = RequestRef, 430 | from = from_addr(State), 431 | index = Index, 432 | term = Term, 433 | epoch = Epoch 434 | }, 435 | zraft_peer_route:cmd(PeerID, NewReq), 436 | Timer = zraft_util:gen_server_cast_after(Timeout, request_timeout), 437 | State#state{request_ref = RequestRef, request_timer = Timer,request_time = os:timestamp()}. 438 | 439 | cancel_backoff(State=#state{backoff = undefined,backoff_timeout = undefined})-> 440 | State; 441 | cancel_backoff(State=#state{backoff = Ref})-> 442 | _ = cancel_timer(Ref), 443 | State#state{backoff = undefined,backoff_timeout = undefined}. 444 | 445 | backoff(State)-> 446 | ?INFO(State,"To backoff"), 447 | State1 = reset_timers(true, State), 448 | State2 = reset_snapshot(State1),%%May be snapshot is being copied 449 | State3 = State2#state{append_buffer = undefined}, 450 | #state{backoff_timeout = T,backoff = BackOffTimer} = State3, 451 | _ = cancel_timer(BackOffTimer), 452 | T1 = if 453 | T == undefined-> 454 | 1; 455 | T>9-> 456 | T; 457 | true -> 458 | T+1 459 | end, 460 | ETimeout = zraft_consensus:get_election_timeout()*T1, 461 | Timer = zraft_util:gen_server_cast_after(ETimeout,backoff_timeout), 462 | State3#state{backoff = Timer,backoff_timeout = T1}. 463 | 464 | reset_snapshot(State = #state{snapshot_progres = undefined}) -> 465 | State; 466 | reset_snapshot(State = #state{snapshot_progres = #snapshot_progress{mref = Ref, process = P}}) -> 467 | ?INFO(State,"Reseting snapshot transfer"), 468 | if 469 | Ref /= undefined -> 470 | erlang:demonitor(Ref, [flush]), 471 | erlang:exit(P, kill); 472 | true -> 473 | ok 474 | end, 475 | State#state{snapshot_progres = undefined,append_buffer = undefined}. 476 | 477 | reset_timers(Result, State = #state{request_timer = RT, hearbeat_timer = HT}) -> 478 | _ = cancel_timer(RT), 479 | _ = cancel_timer(HT), 480 | State1 = State#state{ 481 | request_ref = undefined, 482 | request_timer = undefined, 483 | hearbeat_timer = undefined}, 484 | if 485 | Result -> 486 | State1#state{force_hearbeat = false, force_request = false}; 487 | true -> 488 | State1 489 | end. 490 | 491 | progress(State = #state{force_hearbeat = FH, force_request = FR}) -> 492 | State1 = reset_timers(false, State), 493 | State2 = if 494 | FH -> 495 | %%Attemt new heabeat scince last one failed 496 | %%if hearbeat accpetd we must start replicate log immediatly 497 | start_replication(State1#state{force_request = true}); 498 | FR -> 499 | %%Prev Hearbeat or replication failed 500 | start_replication(State1); 501 | true -> 502 | %%Start hearbeat timer 503 | start_hearbeat_timer(State1) 504 | end, 505 | {noreply, State2}. 506 | 507 | start_hearbeat_timer(State=#state{request_time = ReqTime}) -> 508 | ElectionTimeout = zraft_consensus:get_election_timeout(), 509 | Timeout = if 510 | ReqTime==undefined-> 511 | ElectionTimeout; 512 | true-> 513 | case (timer:now_diff(os:timestamp(),ReqTime) div 1000) of 514 | T1 when T1>ElectionTimeout-> 515 | 0; 516 | T1-> 517 | ElectionTimeout-T1 518 | end 519 | end, 520 | Ref = zraft_util:gen_server_cast_after(Timeout, hearbeat_timeout), 521 | State#state{hearbeat_timer = Ref,request_time = undefined}. 522 | 523 | reply({Ref, Pid}, Msg) -> 524 | Pid ! {Ref, Msg}; 525 | reply(_, _) -> 526 | ok. 527 | 528 | cancel_timer(undefined) -> 529 | 0; 530 | cancel_timer(Ref) -> 531 | zraft_util:gen_server_cancel_timer(Ref). 532 | 533 | 534 | start_copy_snapshot(#install_snapshot_reply{port = Port, addr = Addr}, 535 | State = #state{snapshot_progres = P}) -> 536 | ?INFO(State,"Starting transfer snapshot via tcp ~p:~p",[Addr,Port]), 537 | Fun = fun() -> 538 | FilesToCopy = zraft_snapshot_receiver:copy_info(P#snapshot_progress.snapshot_dir), 539 | case catch zraft_snapshot_receiver:copy_files(print_id(State),FilesToCopy, Addr, Port) of 540 | ok-> 541 | zraft_snapshot_receiver:discard_files_info(FilesToCopy), 542 | ok; 543 | Else-> 544 | zraft_snapshot_receiver:discard_files_info(FilesToCopy), 545 | exit(Else) 546 | end 547 | end, 548 | {PID, MRef} = spawn_monitor(Fun), 549 | State#state{snapshot_progres = P#snapshot_progress{mref = MRef, process = PID}}. 550 | 551 | update_peer(E2,From,State = #state{peer = Peer=#peer{epoch = E1}}) -> 552 | if 553 | E1==E2-> 554 | State#state{remote_peer_id = From}; 555 | true-> 556 | Peer1 = Peer#peer{epoch = E2}, 557 | change_peer(From,Peer1,State) 558 | end. 559 | update_peer(NextIndex,E2,From,State = #state{peer = Peer=#peer{epoch = E1}}) -> 560 | if 561 | E1==E2-> 562 | Peer1 = Peer#peer{next_index = NextIndex}, 563 | State#state{remote_peer_id = From,peer = Peer1}; 564 | true-> 565 | Peer1 = Peer#peer{epoch = E2,next_index = NextIndex}, 566 | change_peer(From,Peer1,State) 567 | end. 568 | update_peer(L2, NextIndex, E2,From, State = #state{peer = Peer}) -> 569 | #peer{last_agree_index = L1,epoch = E1}=Peer, 570 | if 571 | L1==L2 andalso E1==E2-> 572 | Peer1 = Peer#peer{next_index = NextIndex}, 573 | State#state{remote_peer_id = From,peer = Peer1}; 574 | true-> 575 | Peer1 = Peer#peer{epoch = E2,next_index = NextIndex,last_agree_index = L2}, 576 | change_peer(From,Peer1,State) 577 | end. 578 | 579 | change_peer(From,Peer,State=#state{quorum_counter = C})-> 580 | zraft_quorum_counter:sync(C,Peer), 581 | State#state{peer = Peer,remote_peer_id = From}. 582 | 583 | print_id(#state{raft = Raft,peer = #peer{id = ProxyID}})-> 584 | PeerID = zraft_util:peer_id(Raft), 585 | {PeerID,'->',ProxyID}. 586 | 587 | from_addr(#state{raft = Raft})-> 588 | ID = zraft_util:peer_id(Raft), 589 | {ID,self()}. 590 | 591 | add_append(Req1,undefined)-> 592 | Req1; 593 | add_append(undefined,Req=#append_entries{entries = E})-> 594 | Req#append_entries{entries = [E]}; 595 | add_append(Req1 = #append_entries{entries = Entries1}, 596 | #append_entries{entries = Entries2,epoch = NewEpoch,commit_index = NewCommit})-> 597 | Req1#append_entries{epoch = NewEpoch,commit_index = NewCommit,entries = [Entries2|Entries1]}. 598 | 599 | append_flatten(Req = #append_entries{entries = Entries})-> 600 | Flatten = lists_flatten(Entries), 601 | Req#append_entries{entries = Flatten}. 602 | 603 | lists_flatten(L)-> 604 | lists:foldl(fun(L1,Acc1)-> 605 | lists:foldr(fun(E,Acc2)-> 606 | [E|Acc2] end,Acc1,L1) end,[],L). 607 | 608 | -ifdef(TEST). 609 | setup_peer() -> 610 | meck:new(zraft_peer_route, [passthrough]), 611 | meck:new(zraft_consensus), 612 | meck:new(zraft_snapshot_receiver,[passthrough]), 613 | meck:new(zraft_quorum_counter), 614 | meck:expect(zraft_consensus, get_election_timeout, fun() -> 1000 end), 615 | meck:expect(zraft_peer_route, start_peer, fun(PeerToStart, BackEnd) -> 616 | ?debugFmt("Starting ~p:~s", [PeerToStart, BackEnd]), ok end), 617 | meck:expect(zraft_snapshot_receiver,copy_info,fun(_)-> [] end), 618 | meck:expect(zraft_snapshot_receiver,copy_files,fun(A1,A2,A3,A4)-> 619 | ?debugFmt("copy snapshot: ~p",[{A1,A2,A3,A4}]) end), 620 | ok. 621 | stop_peer(_) -> 622 | meck:unload(zraft_peer_route), 623 | meck:unload(zraft_quorum_counter), 624 | meck:unload(zraft_consensus), 625 | meck:unload(zraft_snapshot_receiver), 626 | ok. 627 | 628 | proxy_test_() -> 629 | { 630 | setup, 631 | fun setup_peer/0, 632 | fun stop_peer/1, 633 | fun(_X) -> 634 | [ 635 | commands() 636 | ] 637 | end 638 | }. 639 | 640 | commands() -> 641 | {"test communication", fun() -> 642 | Me = self(), 643 | Raft = {{test, node()}, Me}, 644 | PeerID = {test1, node()}, 645 | {ok, Proxy} = start_link(Raft,Me, PeerID, zraft_dict_backend), 646 | meck:expect(zraft_consensus, replicate_log, 647 | fun(_, _, Req) -> 648 | Me ! {replicate_log, Req} 649 | end), 650 | meck:expect(zraft_quorum_counter, sync, 651 | fun(_,P) -> 652 | Me ! P 653 | end), 654 | meck:expect(zraft_peer_route, cmd, fun(_, Req) -> 655 | Me ! {cmd, Req} 656 | end), 657 | cmd(Proxy, {?BECOME_LEADER_CMD, 658 | #append_entries{ 659 | commit_index = 0, 660 | entries = [], 661 | epoch = 3, 662 | prev_log_index = 5, 663 | prev_log_term = 5, 664 | term = 5}}), 665 | R1 = gen_server:call(Proxy, force_hearbeat_timeout), 666 | ?assertEqual(no_timer, R1), 667 | R2 = wait_request(), 668 | ?assertMatch( 669 | {cmd, #append_entries{entries = [], prev_log_term = 5, prev_log_index = 5, epoch = 3, term = 5, commit_index = 0}}, 670 | R2 671 | ), 672 | R3 = gen_server:call(Proxy, force_request_timeout), 673 | ?assertEqual(ok, R3), 674 | R4 = wait_request(), 675 | ?assertMatch( 676 | {replicate_log, #append_entries{entries = false, prev_log_index = 5, prev_log_term = 0, term = 0, epoch = 0}}, 677 | R4 678 | ), 679 | fake_append_reply(Proxy, R4, #append_reply{term = 5, agree_index = 0, last_index = 7, success = false}), 680 | R6 = wait_request(), 681 | ?assertMatch( 682 | {replicate_log, #append_entries{entries = false, prev_log_index = 4, prev_log_term = 0, term = 0, epoch = 0}}, 683 | R6 684 | ), 685 | fake_append_reply(Proxy, R6, #append_reply{term = 5, agree_index = 4, last_index = 4, success = true}), 686 | R7 = wait_request(), 687 | ?assertMatch(#peer{last_agree_index = 4}, R7), 688 | %%start replicaterion 689 | R8 = wait_request(), 690 | ?assertMatch( 691 | {replicate_log, #append_entries{entries = true, prev_log_index = 4, prev_log_term = 0, term = 0, epoch = 0}}, 692 | R8 693 | ), 694 | fake_append_reply(Proxy, R8, #append_reply{term = 5, last_index = 4, success = false}), 695 | R10 = wait_request(), 696 | ?assertMatch( 697 | {replicate_log, #append_entries{entries = true, prev_log_index = 3, prev_log_term = 0, term = 0, epoch = 0}}, 698 | R10 699 | ), 700 | fake_append_reply(Proxy, R10, #append_reply{term = 5, last_index = 5, agree_index = 5, success = true}), 701 | R11 = wait_request(), 702 | ?assertMatch(#peer{last_agree_index = 5}, R11), 703 | R12 = gen_server:call(Proxy, force_request_timeout), 704 | ?assertEqual(no_timer, R12), 705 | R13 = gen_server:call(Proxy, force_hearbeat_timeout), 706 | ?assertEqual(ok, R13), 707 | R14 = wait_request(), 708 | ?assertMatch( 709 | {replicate_log, #append_entries{entries = true, prev_log_index = 5, prev_log_term = 0, term = 0, epoch = 0}}, 710 | R14 711 | ), 712 | fake_append_reply(Proxy, R14, #append_reply{term = 5, last_index = 5, agree_index = 5, success = true}), 713 | R16 = gen_server:call(Proxy, force_request_timeout), 714 | ?assertEqual(no_timer, R16), 715 | cmd(Proxy, {?OPTIMISTIC_REPLICATE_CMD, 716 | #append_entries{ 717 | commit_index = 0, 718 | entries = [1], 719 | epoch = 4, 720 | prev_log_index = 5, 721 | prev_log_term = 5, 722 | term = 6}}), 723 | R17 = wait_request(), 724 | ?assertMatch( 725 | {replicate_log,#append_entries{entries = true, prev_log_index = 5}}, 726 | R17 727 | ), 728 | fake_append_reply(Proxy, R17, #append_reply{term = 6, last_index = 6, agree_index = 6, success = true}), 729 | R18 = wait_request(), 730 | ?assertMatch(#peer{last_agree_index = 6}, R18), 731 | S1 = sys:get_state(Proxy), 732 | ?assertMatch( 733 | #state{ 734 | current_epoch = 4, 735 | current_term = 6, 736 | force_hearbeat = false, 737 | force_request = false, 738 | request_ref = undefined, 739 | request_timer = undefined, 740 | snapshot_progres = undefined, 741 | peer = #peer{epoch = 4, has_vote = false, id = PeerID, last_agree_index = 6, next_index = 7} 742 | }, 743 | S1 744 | ), 745 | gen_server:call(Proxy, force_hearbeat_timeout), 746 | R19 = wait_request(), 747 | ?assertMatch( 748 | {replicate_log, #append_entries{entries = true, prev_log_index = 6}}, 749 | R19 750 | ), 751 | fake_need_snapshot(R19), 752 | R20 = wait_request(), 753 | ?assertMatch( 754 | { 755 | cmd, 756 | #install_snapshot{index = 3, term = 6, epoch = 7, data = start} 757 | }, 758 | R20 759 | ), 760 | fake_snapshot_reply(PeerID,R20,#install_snapshot_reply{result=start,index = 3,addr = 1,port = 1,term = 6}), 761 | R21 = wait_request(), 762 | ?assertMatch(#peer{epoch = 7}, R21), 763 | R22 = wait_request(), 764 | ?assertMatch( 765 | { 766 | cmd, 767 | #install_snapshot{index = 3, term = 6, epoch = 7, data = finish} 768 | }, 769 | R22 770 | ), 771 | fake_snapshot_reply(PeerID,R22,#install_snapshot_reply{result=finish,index = 3,term = 6}), 772 | R23 = wait_request(), 773 | ?assertMatch(#peer{last_agree_index = 3}, R23), 774 | R24 = wait_request(), 775 | ?assertMatch( 776 | {replicate_log, #append_entries{entries = true, prev_log_index = 3, prev_log_term = 0, term = 0, epoch = 0}}, 777 | R24 778 | ), 779 | ok = stop_sync(Proxy) 780 | end}. 781 | 782 | wait_request() -> 783 | receive 784 | Req -> 785 | Req 786 | after 2000 -> 787 | ?assert(false) 788 | end. 789 | 790 | fake_append_reply(PeerID, {_, #append_entries{request_ref = Ref, from = From, epoch = Epoch}}, Reply) -> 791 | zraft_peer_route:reply_proxy(From, Reply#append_reply{from_peer = {PeerID,self()}, request_ref = Ref, epoch = Epoch}). 792 | fake_need_snapshot({_, #append_entries{request_ref = Ref, from = From}}) -> 793 | Snapshot = #install_snapshot{from = From, request_ref = Ref, data = [], epoch = 7, index = 3, term = 6}, 794 | zraft_peer_route:reply_proxy(From, Snapshot). 795 | fake_snapshot_reply(PeerID, {_, #install_snapshot{request_ref = Ref, from = From, epoch = Epoch}}, Reply) -> 796 | Reply1 = Reply#install_snapshot_reply{ 797 | from_peer = PeerID, 798 | epoch = Epoch, 799 | request_ref = Ref 800 | }, 801 | zraft_peer_route:reply_proxy(From, Reply1). 802 | 803 | -endif. -------------------------------------------------------------------------------- /src/zraft_peer_route.erl: -------------------------------------------------------------------------------- 1 | %% ------------------------------------------------------------------- 2 | %% @author Gunin Alexander 3 | %% Copyright (c) 2015 Gunin Alexander. All Rights Reserved. 4 | %% 5 | %% This file is provided to you under the Apache License, 6 | %% Version 2.0 (the "License"); you may not use this file 7 | %% except in compliance with the License. You may obtain 8 | %% a copy of the License at 9 | %% 10 | %% http://www.apache.org/licenses/LICENSE-2.0 11 | %% 12 | %% Unless required by applicable law or agreed to in writing, 13 | %% software distributed under the License is distributed on an 14 | %% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | %% KIND, either express or implied. See the License for the 16 | %% specific language governing permissions and limitations 17 | %% under the License. 18 | %% 19 | %% ------------------------------------------------------------------- 20 | -module(zraft_peer_route). 21 | -author("dreyk"). 22 | 23 | %% API 24 | -export([cmd/2,reply_consensus/2,reply_proxy/2,start_peer/2]). 25 | 26 | -spec cmd(zraft_consensus:peer_id(),zraft_consensus:rpc_cmd())->ok. 27 | cmd({Name,Node},Command) when is_atom(Name) andalso is_atom(Node)-> 28 | gen_fsm:send_event({Name,Node},Command); 29 | cmd({_Name,{_Ref,From}}=Peer,Command)-> 30 | From ! {Peer,Command}. 31 | 32 | -spec reply_consensus(zraft_consensus:from_peer_addr(),term())->ok. 33 | reply_consensus({_,Pid},Reply) when is_pid(Pid)-> 34 | gen_fsm:send_event(Pid,Reply); 35 | reply_consensus({_,{_Ref,Pid}}=Peer,Reply) when is_pid(Pid)-> 36 | Pid ! {Peer,Reply}. 37 | -spec reply_proxy(zraft_consensus:from_peer_addr(),term())->ok. 38 | reply_proxy({_,Pid},Reply) when is_pid(Pid)-> 39 | gen_server:cast(Pid,Reply); 40 | reply_proxy({_,{_Ref,Pid}}=Peer,Reply) when is_pid(Pid)-> 41 | Pid ! {Peer,Reply}. 42 | 43 | start_peer({Name,Node},BackEnd) when is_atom(Node)-> 44 | spawn(fun()-> 45 | Res = rpc:call(Node,zraft_lib_sup,start_consensus,[{Name,Node},BackEnd]), 46 | lager:info("Start remote ~p: ~p",[{Name,Node},Res]) 47 | end); 48 | start_peer(_,_)-> 49 | ok. -------------------------------------------------------------------------------- /src/zraft_quorum_counter.erl: -------------------------------------------------------------------------------- 1 | %% ------------------------------------------------------------------- 2 | %% @author Gunin Alexander 3 | %% Copyright (c) 2015 Gunin Alexander. All Rights Reserved. 4 | %% 5 | %% This file is provided to you under the Apache License, 6 | %% Version 2.0 (the "License"); you may not use this file 7 | %% except in compliance with the License. You may obtain 8 | %% a copy of the License at 9 | %% 10 | %% http://www.apache.org/licenses/LICENSE-2.0 11 | %% 12 | %% Unless required by applicable law or agreed to in writing, 13 | %% software distributed under the License is distributed on an 14 | %% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | %% KIND, either express or implied. See the License for the 16 | %% specific language governing permissions and limitations 17 | %% under the License. 18 | %% 19 | %% ------------------------------------------------------------------- 20 | -module(zraft_quorum_counter). 21 | -author("dreyk"). 22 | 23 | -behaviour(gen_server). 24 | 25 | %% API 26 | -export([start_link/1]). 27 | 28 | 29 | %% gen_server callbacks 30 | -export([init/1, 31 | handle_call/3, 32 | handle_cast/2, 33 | handle_info/2, 34 | terminate/2, 35 | code_change/3]). 36 | 37 | -export([ 38 | set_conf/3, 39 | sync/2, 40 | set_state/2 41 | ]). 42 | 43 | -include("zraft.hrl"). 44 | 45 | -record(state, {raft,conf_id,old,new,conf_state,epoch_qourum,index_quorum,vote_quorum,raft_state}). 46 | 47 | set_conf(P,Conf,ConfState)-> 48 | gen_server:cast(P,{set_conf,Conf,ConfState}). 49 | 50 | sync(P,PeerState)-> 51 | gen_server:cast(P,PeerState). 52 | 53 | set_state(P,StateName)-> 54 | gen_server:cast(P,{raft_state,StateName}). 55 | 56 | start_link(Raft) -> 57 | gen_server:start_link(?MODULE, [Raft], []). 58 | 59 | 60 | init([Raf]) -> 61 | {ok, #state{ 62 | raft = Raf, 63 | old = [], 64 | new = [], 65 | conf_state = ?STABLE_CONF, 66 | conf_id = 0, 67 | epoch_qourum = 0, 68 | index_quorum = 0, 69 | vote_quorum = false, 70 | raft_state = follower}}. 71 | 72 | handle_call(_Request, _From, State) -> 73 | {reply, ok, State}. 74 | 75 | handle_cast({raft_state,StateName},State)-> 76 | {noreply,State#state{raft_state = StateName}}; 77 | handle_cast({set_conf,?BLANK_CONF,ConfState},State)-> 78 | State1 = State#state{ 79 | conf_id = 0, 80 | conf_state = ConfState, 81 | vote_quorum = true, 82 | epoch_qourum = 0, 83 | index_quorum = 0, 84 | new = [], 85 | old = [] 86 | }, 87 | {noreply,State1}; 88 | handle_cast({set_conf,{ConfID,#pconf{new_peers = New,old_peers = Old}},ConfState},State)-> 89 | State1 = change_conf(ConfID,Old,New,ConfState,State), 90 | {noreply,State1}; 91 | handle_cast(#peer{}=P,State)-> 92 | State1 = change_peer(P,State), 93 | {noreply,State1}; 94 | handle_cast(_Request, State) -> 95 | {noreply, State}. 96 | 97 | handle_info(_Info, State) -> 98 | {noreply, State}. 99 | 100 | 101 | terminate(_Reason, _State) -> 102 | ok. 103 | 104 | code_change(_OldVsn, State, _Extra) -> 105 | {ok, State}. 106 | 107 | 108 | change_peer(#peer{id = ID}=P,State=#state{old = O,new=N})-> 109 | O1 = update(ID,P,O), 110 | N1 = update(ID,P,N), 111 | State1 = State#state{old = O1,new = N1}, 112 | State2 = change_vote(State1), 113 | State3 = change_epoch(State2), 114 | change_last_agree_index(State3). 115 | 116 | change_conf(ConfID,OldPeer,NewPeers,ConfState,State=#state{old = O1,new = N1})-> 117 | O2 = merge(OldPeer,O1), 118 | N2 = merge(NewPeers,N1), 119 | State1 = State#state{old = O2,new = N2,conf_id = ConfID,conf_state = ConfState}, 120 | State2 = change_vote(State1), 121 | State3 = change_epoch(State2), 122 | change_last_agree_index(State3). 123 | 124 | change_epoch(State=#state{epoch_qourum = E,raft_state = leader,conf_id = ConfID})-> 125 | case quorumMin(State,#peer.epoch) of 126 | E-> 127 | State; 128 | E1-> 129 | zraft_consensus:sync_peer(State#state.raft,{sync_epoch,ConfID,E1}), 130 | State#state{epoch_qourum = E1} 131 | end; 132 | change_epoch(State)-> 133 | State. 134 | change_last_agree_index(State=#state{index_quorum = I,raft_state = leader,conf_id = ConfID})-> 135 | case quorumMin(State,#peer.last_agree_index) of 136 | I-> 137 | State; 138 | I1-> 139 | zraft_consensus:sync_peer(State#state.raft,{sync_index,ConfID,I1}), 140 | State#state{index_quorum = I1} 141 | end; 142 | change_last_agree_index(State)-> 143 | State. 144 | 145 | change_vote(State=#state{vote_quorum = V,raft_state = candidate,conf_id = ConfID})-> 146 | case quorumAll(State,#peer.has_vote) of 147 | V-> 148 | State; 149 | V1-> 150 | zraft_consensus:sync_peer(State#state.raft,{sync_vote,ConfID,V1}), 151 | State#state{vote_quorum = V1} 152 | end; 153 | change_vote(State)-> 154 | State. 155 | 156 | create_peers(Peers)-> 157 | lists:foldr(fun(PeerID,Acc)-> 158 | [{PeerID,#peer{id = PeerID,epoch = 0,has_vote = false,last_agree_index = 0,next_index = 1}}|Acc] 159 | end,[],Peers). 160 | 161 | merge([P1|D1], [{P2,_}=E2|D2]) when P1 < P2 -> 162 | [{P1,#peer{id=P1,epoch = 0,has_vote = false,last_agree_index = 0,next_index = 1}}|merge(D1, [E2|D2])]; 163 | merge([P1|D1], [{P2,_}|D2]) when P1 > P2 -> 164 | merge([P1|D1], D2); 165 | merge([_P1|D1], [E2|D2]) -> %P1 == P2 166 | [E2|merge(D1, D2)]; 167 | merge([], _D2)-> []; 168 | merge(D1, [])-> 169 | create_peers(D1). 170 | 171 | quorumMin(#state{conf_id = 0}, _GetIndex) -> 172 | 0; 173 | quorumMin(#state{conf_state = ConfState,old = Old,new = New}, GetIndex) -> 174 | case ConfState of 175 | ?TRANSITIONAL_CONF -> 176 | erlang:min( 177 | quorumMin1(Old, GetIndex), 178 | quorumMin1(New, GetIndex) 179 | ); 180 | _ -> 181 | quorumMin1(Old, GetIndex) 182 | end. 183 | 184 | quorumAll(#state{conf_id = 0},_GetIndex)-> 185 | true; 186 | quorumAll(#state{conf_state = ConfState,old = Old,new = New}, GetIndex) -> 187 | case ConfState of 188 | ?TRANSITIONAL_CONF -> 189 | quorumAll1(Old, GetIndex) andalso quorumAll1(New, GetIndex); 190 | _ -> 191 | quorumAll1(Old, GetIndex) 192 | end. 193 | 194 | quorumMin1([], _GetIndex) -> 195 | 0; 196 | quorumMin1(Peers, GetIndex) -> 197 | {Vals, Count} = quorumMin(Peers,GetIndex, 0,[]), 198 | Vals1 = lists:sort(Vals), 199 | At = erlang:trunc((Count - 1) / 2), 200 | lists:nth(At + 1, Vals1). 201 | 202 | quorumMin([], _GetIndex, Count,Acc) -> 203 | {Acc, Count}; 204 | quorumMin([{_,Peer} | T2], GetIndex, Count,Acc)-> 205 | V = element(GetIndex,Peer), 206 | quorumMin(T2,GetIndex, Count + 1,[V|Acc]). 207 | 208 | 209 | quorumAll1([],_GetIndex) -> 210 | true; 211 | quorumAll1(Peers, GetIndex) -> 212 | quorumAll(Peers,GetIndex,0,0). 213 | 214 | quorumAll([], _GetIndex, Count,TrueCount) -> 215 | TrueCount >= (erlang:trunc(Count / 2) + 1); 216 | quorumAll([{_, Peer} | T2], GetIndex,Count,TrueCount) ->%%ID1==ID2 217 | V = element(GetIndex,Peer), 218 | V1 = if 219 | V-> 220 | TrueCount+1; 221 | true-> 222 | TrueCount 223 | end, 224 | quorumAll(T2,GetIndex, Count + 1,V1). 225 | 226 | update(Key,V,[{K,_}=E|Dict]) when Key > K -> 227 | [E|update(Key,V, Dict)]; 228 | update(Key,V, [{K,_Val}|Dict]) when Key == K -> 229 | [{Key,V}|Dict]; 230 | update(_,_,Dict)-> 231 | Dict. -------------------------------------------------------------------------------- /src/zraft_session.erl: -------------------------------------------------------------------------------- 1 | %% ------------------------------------------------------------------- 2 | %% @author Gunin Alexander 3 | %% Copyright (c) 2015 Gunin Alexander. All Rights Reserved. 4 | %% 5 | %% This file is provided to you under the Apache License, 6 | %% Version 2.0 (the "License"); you may not use this file 7 | %% except in compliance with the License. You may obtain 8 | %% a copy of the License at 9 | %% 10 | %% http://www.apache.org/licenses/LICENSE-2.0 11 | %% 12 | %% Unless required by applicable law or agreed to in writing, 13 | %% software distributed under the License is distributed on an 14 | %% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | %% KIND, either express or implied. See the License for the 16 | %% specific language governing permissions and limitations 17 | %% under the License. 18 | %% 19 | %% ------------------------------------------------------------------- 20 | -module(zraft_session). 21 | -author("dreyk"). 22 | 23 | -behaviour(gen_server). 24 | 25 | %% API 26 | -export([ 27 | start_link/2, 28 | start_link/3, 29 | query/3, 30 | query/4, 31 | write/3, 32 | write/4, 33 | stop/1 34 | ]). 35 | 36 | -export_type([ 37 | session/0 38 | ]). 39 | 40 | -export([init/1, 41 | handle_call/3, 42 | handle_cast/2, 43 | handle_info/2, 44 | terminate/2, 45 | code_change/3]). 46 | 47 | -include("zraft.hrl"). 48 | 49 | 50 | -define(CONNECTING, connecting). 51 | -define(CONNECTED, connected). 52 | -define(PENDFING, pending). 53 | 54 | -type session() :: pid(). 55 | 56 | -record(state, { 57 | session, 58 | requests, 59 | watchers, 60 | message_id, 61 | acc_upto, 62 | leader_mref, 63 | epoch, 64 | timer, 65 | timeout, 66 | pending, 67 | connected, 68 | last_send}). 69 | 70 | 71 | -spec query(session(), term(), timeout()) -> {ok, term()}. 72 | query(Session, Query, Timeout) -> 73 | query(Session, Query, false, Timeout). 74 | -spec query(session(), term(), reference()|false, timeout()) -> {ok, term()}. 75 | query(Session, Query, Watch, Timeout) -> 76 | gen_server:call(Session, {query, {Query, Watch, Timeout}}, inc_timeout(Timeout)). 77 | 78 | -spec write(session(), term(), timeout()) -> {ok, term()}. 79 | write(Session, Data, Timeout) -> 80 | write(Session, Data, false, Timeout). 81 | -spec write(session(), term(), true|false, timeout()) -> {ok, term()}. 82 | write(Session, Data, Temporary, Timeout) -> 83 | gen_server:call(Session, {write, {Data, Temporary, Timeout}}, inc_timeout(Timeout)). 84 | 85 | -spec inc_timeout(timeout())->timeout(). 86 | inc_timeout(T) when is_integer(T)-> 87 | round(T*1.5); 88 | inc_timeout(T)-> 89 | T. 90 | 91 | -spec stop(session()) -> ok. 92 | stop(Session) -> 93 | gen_server:cast(Session, stop). 94 | 95 | -spec start_link(atom(),zraft_consensus:peer_id()|list(zraft_consensus:peer_id()), timeout()) -> 96 | {ok, pid()} | {error, Reason :: term()}. 97 | start_link(Name,Peer, Timeout) -> 98 | gen_server:start_link({local,Name},?MODULE, [Peer, Timeout], []). 99 | 100 | -spec start_link(zraft_consensus:peer_id()|list(zraft_consensus:peer_id()), timeout()) -> 101 | {ok, pid()} | {error, Reason :: term()}. 102 | start_link(Peer, Timeout) -> 103 | gen_server:start_link(?MODULE, [Peer, Timeout], []). 104 | 105 | init([Peer, Timeout]) -> 106 | ETime = zraft_consensus:get_election_timeout(), 107 | Session = zraft_client:light_session(Peer, ETime * 4, ETime*2), 108 | Requests = ets:new(client_session, [ordered_set, {write_concurrency, false}, {read_concurrency, false}]), 109 | Watchers = ets:new(client_watchers, [bag, {write_concurrency, false}, {read_concurrency, false}]), 110 | ConnectTimer = erlang:start_timer(Timeout, self(), connect_timeout), 111 | State = #state{ 112 | session = Session, 113 | requests = Requests, 114 | watchers = Watchers, 115 | message_id = 1, 116 | epoch = 1, 117 | timeout = Timeout, 118 | connected = false, 119 | timer = ConnectTimer, 120 | pending = false 121 | }, 122 | State1 = connect(State), 123 | {ok, State1}. 124 | 125 | 126 | handle_call({write, Req}, From, State) -> 127 | handle_write(Req, From, State); 128 | handle_call({query, Req}, From, State) -> 129 | handle_query(Req, From, State). 130 | 131 | handle_cast(stop, State) -> 132 | stop_session(State), 133 | {stop, normal, State}; 134 | handle_cast(_Request, State) -> 135 | {noreply, State}. 136 | 137 | 138 | 139 | handle_info(#swrite_error{sequence = Sequence, error = not_leader, leader = NewLeader}, State) -> 140 | %%attempt resend only one failed request. Other maybe ok. 141 | case change_leader(NewLeader, State) of 142 | {false, State1} -> 143 | {noreply, State1}; 144 | {true, State1} -> 145 | if 146 | Sequence == ?CLIENT_PING -> 147 | NewState = repeat_ping(State1); 148 | Sequence == ?CLIENT_CONNECT -> 149 | NewState = connect(State1); 150 | is_integer(Sequence) -> 151 | NewState = repeat_write(Sequence, State1) 152 | end, 153 | {noreply, NewState} 154 | end; 155 | 156 | 157 | handle_info(#swrite_reply{sequence = ?CLIENT_PING}, State) -> 158 | {noreply, State}; 159 | 160 | handle_info(#swrite_reply{sequence = ?CLIENT_CONNECT}, State = #state{connected = true}) -> 161 | %%we can send connect request twice 162 | {noreply,State}; 163 | handle_info(#swrite_reply{sequence = ?CLIENT_CONNECT}, State = #state{epoch = E,timer = TRef}) -> 164 | _ = cancel_timer(TRef), 165 | State1 = restart_requets(State#state{connected = true,epoch = E+1,timer=undefined}), 166 | State2 = ping(State1), 167 | {noreply, State2}; 168 | 169 | handle_info(#swrite_reply{sequence = ID, data = Result}, State) when is_integer(ID) -> 170 | NewState = write_reply(ID, Result, State), 171 | {noreply, NewState}; 172 | 173 | 174 | handle_info({{read, ReadRef}, #sread_reply{data = Data}}, State) -> 175 | NewState = read_reply(ReadRef, Data, State), 176 | {noreply, NewState}; 177 | handle_info({{read, ReadRef}, {leader, NewLeader}}, State) -> 178 | %%repeat only this request. other requests will be restart on session change leader event 179 | case change_leader(NewLeader, State) of 180 | {false,State1}-> 181 | {noreply,State1}; 182 | {true,State1}-> 183 | NewState = repeat_read(ReadRef, State1) , 184 | {noreply, NewState} 185 | end; 186 | handle_info({{read, ReadRef}, Data}, State) -> 187 | Reply = case Data of 188 | {ok, R1} -> 189 | R1; 190 | _ -> 191 | Data 192 | end, 193 | NewState = read_reply(ReadRef, Reply, State), 194 | {noreply, NewState}; 195 | handle_info(#swatch_trigger{ref = {Caller, Watch}, reason = Reson}, State = #state{watchers = Watcher}) -> 196 | Caller ! #swatch_trigger{ref = Watch, reason = Reson}, 197 | ets:delete_object(Watcher, {Caller, Watch}), 198 | {noreply, State}; 199 | 200 | handle_info(?DISCONNECT_MSG, State) -> 201 | {stop, ?DISCONNECT_MSG, State}; 202 | 203 | handle_info({leader, NewLeader}, State) -> 204 | lager:warning("Leader changed to ~p", [NewLeader]), 205 | case set_leader(NewLeader,State) of 206 | {false,State1}-> 207 | {noreply, State1}; 208 | {true,State1}-> 209 | State2 = restart_requets(State1), 210 | {noreply, State2} 211 | end; 212 | handle_info({'DOWN', Ref, process, _, _}, State = #state{leader_mref = Ref}) -> 213 | lager:warning("Current leader has failed"), 214 | State1 = State#state{leader_mref = undefined}, 215 | case change_leader(failed, State1) of 216 | {false,State2}-> 217 | {noreply,State2}; 218 | {true,State2}-> 219 | State3 = restart_requets(State2), 220 | {noreply, State3} 221 | end; 222 | handle_info({'DOWN', Ref, process, Caller, _}, State) -> 223 | State1 = caller_down(Caller, Ref, State), 224 | {noreply, State1}; 225 | 226 | handle_info({timeout, TimerRef, pending}, State = #state{pending = TimerRef}) -> 227 | FreashSession = zraft_session_obj:reset(State#state.session), 228 | case install_leader(State#state{session = FreashSession,pending = false}) of 229 | {false,State1}-> 230 | {noreply,State1}; 231 | {true,State1}-> 232 | State2 = restart_requets(State1), 233 | {noreply,State2} 234 | end; 235 | handle_info({timeout, TimerRef, ping_timeout}, State = #state{timer = TimerRef}) -> 236 | State1 = ping(State), 237 | {noreply, State1}; 238 | handle_info({timeout, TimerRef, connect_timeout}, State = #state{timer = TimerRef}) -> 239 | lager:warning("Connection timeout"), 240 | {stop, connection_timeout, State}; 241 | handle_info({timeout, TimerRef, {request, ReqRef}}, State) -> 242 | State1 = request_timeout(TimerRef, ReqRef, State), 243 | {noreply, State1}; 244 | 245 | handle_info(_Info, State) -> 246 | {noreply, State}. 247 | 248 | 249 | terminate(_Reason, _State) -> 250 | ok. 251 | 252 | code_change(_OldVsn, State, _Extra) -> 253 | {ok, State}. 254 | 255 | 256 | pending(State = #state{pending = P}) when P /= false -> 257 | State; 258 | pending(State) -> 259 | NewTimer = erlang:start_timer(zraft_consensus:get_election_timeout()*2, self(), pending), 260 | State#state{pending = NewTimer}. 261 | 262 | 263 | ping(State = #state{timer = Timer, timeout = Timeout,last_send = Last}) -> 264 | _ = cancel_timer(Timer), 265 | PingTimeout = Timeout div 2, 266 | case zraft_util:is_expired(Last,PingTimeout) of 267 | true-> 268 | NewTimer = erlang:start_timer(PingTimeout, self(),ping_timeout), 269 | repeat_ping(State#state{timer = NewTimer}); 270 | {false,T1}-> 271 | NewTimer = erlang:start_timer(T1, self(),ping_timeout), 272 | State#state{timer = NewTimer} 273 | 274 | end. 275 | repeat_ping(State = #state{acc_upto = To})-> 276 | Req = #swrite{message_id = ?CLIENT_PING, acc_upto = To, from = self(), data = <<>>}, 277 | write_to_raft(Req, State), 278 | State#state{last_send = os:timestamp()}. 279 | 280 | connect(State = #state{connected = true}) -> 281 | State; 282 | connect(State = #state{timeout = Timeout}) -> 283 | case install_leader(State) of 284 | {false,State1}-> 285 | State1; 286 | {true,State1}-> 287 | Req = #swrite{message_id = ?CLIENT_CONNECT, acc_upto = 0, from = self(), data = Timeout}, 288 | write_to_raft(true,Req, State), 289 | State1#state{last_send = os:timestamp()} 290 | end. 291 | 292 | stop_session(State = #state{acc_upto = To, timer = Timer}) -> 293 | _ = cancel_timer(Timer), 294 | Req = #swrite{message_id = ?CLIENT_CLOSE, from = self(), data = <<>>, acc_upto = To}, 295 | write_to_raft(true,Req, State). 296 | 297 | 298 | %%send timeout error to client and clean temporary data 299 | request_timeout(TRef, ReqRef, State = #state{requests = Requests}) -> 300 | case ets:lookup(Requests, ReqRef) of 301 | [{ReqRef, _, From, TRef, _}] -> 302 | gen_server:reply(From, {error, timeout}), 303 | erlang:demonitor(ReqRef), 304 | ets:delete(Requests, ReqRef), 305 | State; 306 | [{ReqRef, _Req, From, TRef, MRef, _E}] -> 307 | gen_server:reply(From, {error, timeout}), 308 | erlang:demonitor(MRef), 309 | ets:delete(Requests, ReqRef), 310 | ets:delete(Requests, MRef), 311 | update_upto(State); 312 | _ -> 313 | State 314 | end. 315 | 316 | %%clean all temporary data. 317 | caller_down(Caller, MRef, State = #state{requests = Requests, watchers = Watchers}) -> 318 | ets:delete(Watchers, Caller), 319 | case ets:lookup(Requests, MRef) of 320 | [{MRef, _, _From, TRef, _}] -> 321 | _ = cancel_timer(TRef), 322 | ets:delete(Requests, MRef), 323 | State; 324 | [{MRef, ID}] -> 325 | case ets:lookup(Requests, ID) of 326 | [{ID, _Req, _From, TRef, MRef, _E}] -> 327 | _ = cancel_timer(TRef), 328 | ets:delete(Requests, ID), 329 | ets:delete(Requests, MRef), 330 | update_upto(State); 331 | _ -> 332 | State 333 | end; 334 | _ -> 335 | State 336 | end. 337 | 338 | handle_query({Query, Watch, Timeout}, From, State = #state{requests = Requests, epoch = E}) -> 339 | {Caller, _} = From, 340 | Req = case Watch of 341 | false -> 342 | {Query, Watch, Timeout}; 343 | _ -> 344 | {Query, {Caller, Watch}, Timeout} 345 | end, 346 | MRef = erlang:monitor(process, Caller), 347 | TRef = erlang:start_timer(Timeout, self(), {request, MRef}), 348 | read_from_raft(MRef, Req, State), 349 | ets:insert(Requests, {MRef, Req, From, TRef, E}), 350 | {noreply, State}. 351 | 352 | repeat_read(Ref, State = #state{requests = Requests, epoch = E}) -> 353 | case ets:lookup(Requests, Ref) of 354 | [{Ref, Req, _From, _TRef, E0}] when E0 < E -> 355 | ets:update_element(Requests, Ref, {5, E}), 356 | read_from_raft(Ref, Req, State), 357 | State; 358 | _ -> 359 | State 360 | end. 361 | 362 | read_reply(Ref, Result, State = #state{requests = Requests}) -> 363 | case ets:lookup(Requests, Ref) of 364 | [{Ref, Req, From, TRef, _Epoch}] -> 365 | register_watcher(Req, State), 366 | gen_server:reply(From, Result), 367 | erlang:demonitor(Ref), 368 | _ = cancel_timer(TRef), 369 | ets:delete(Requests, Ref), 370 | State; 371 | _ -> 372 | State 373 | end. 374 | 375 | handle_write({Request, Temporary, Timeout}, From, State = #state{message_id = ID, acc_upto = To, requests = Requests, epoch = E}) -> 376 | Req = #swrite{message_id = ID, acc_upto = To, from = self(), temporary = Temporary, data = Request}, 377 | write_to_raft(Req, State), 378 | {Caller, _} = From, 379 | TRef = erlang:start_timer(Timeout, self(), {request, ID}), 380 | MRef = erlang:monitor(process, Caller), 381 | ets:insert(Requests, {ID, Req, From, TRef, MRef, E}), 382 | ets:insert(Requests, {MRef, ID}), 383 | {noreply, State#state{message_id = ID + 1,last_send = os:timestamp()}}. 384 | repeat_write(ID, State = #state{requests = Requests, acc_upto = To, epoch = E}) -> 385 | case ets:lookup(Requests, ID) of 386 | [{ID, Req, _From, _TRef, _MRef, E0}] when E0 < E -> 387 | ets:update_element(Requests, ID, {6, E}), 388 | Req1 = Req#swrite{acc_upto = To}, 389 | write_to_raft(Req1, State), 390 | State#state{last_send = os:timestamp()}; 391 | _ -> 392 | State 393 | end. 394 | 395 | write_reply(ID, Result, State = #state{requests = Requests}) -> 396 | case ets:lookup(Requests, ID) of 397 | [{ID, _Req, From, TRef, MRef, _Epoch}] -> 398 | gen_server:reply(From, Result), 399 | erlang:demonitor(MRef), 400 | _ = cancel_timer(TRef), 401 | ets:delete(Requests, ID), 402 | ets:delete(Requests, MRef), 403 | State1 = update_upto(State), 404 | State1; 405 | _ -> 406 | State 407 | end. 408 | 409 | write_to_raft(Req, State = #state{connected = Connected}) -> 410 | write_to_raft(Connected,Req,State). 411 | 412 | write_to_raft(false,_Req,_State) -> 413 | ok; 414 | write_to_raft(_,_Req, #state{pending = P}) when P /= false-> 415 | ok; 416 | write_to_raft(_,Req, #state{session = Session}) -> 417 | Leader = zraft_session_obj:leader(Session), 418 | zraft_consensus:send_swrite(Leader, Req). 419 | read_from_raft(_,_, #state{pending = P}) when P /= false-> 420 | ok; 421 | read_from_raft(_,_, #state{connected = false}) -> 422 | ok; 423 | read_from_raft(Ref, {Query, Watch, Timeout}, #state{session = Session}) -> 424 | Leader = zraft_session_obj:leader(Session), 425 | zraft_consensus:async_query(Leader, {read, Ref}, Watch, Query, Timeout). 426 | 427 | register_watcher({_, false, _}, _State) -> 428 | ok; 429 | register_watcher({_, Watch, _}, #state{watchers = Watchers}) -> 430 | ets:insert(Watchers, Watch). 431 | 432 | update_upto(State = #state{requests = Requests, message_id = ID}) -> 433 | case ets:next(Requests, 0) of 434 | K when is_integer(K) -> 435 | State#state{acc_upto = K-1}; 436 | _ -> 437 | State#state{acc_upto = ID - 1} 438 | end. 439 | 440 | 441 | set_leader(NewLeader, State = #state{session = Session,pending = Pending}) -> 442 | if 443 | Pending /= false-> 444 | %%if pending state just resend all reset pending 445 | NewSession = zraft_session_obj:set_leader(NewLeader,Session), 446 | install_leader(State#state{session = NewSession}); 447 | true-> 448 | %%it's not pending state. change leader then nessary 449 | case zraft_session_obj:leader(Session) of 450 | NewLeader -> 451 | {true,State}; 452 | _ -> 453 | NewSession = zraft_session_obj:set_leader(NewLeader,Session), 454 | install_leader(State#state{session = NewSession}) 455 | end 456 | end. 457 | 458 | change_leader(failed, State) -> 459 | Fun = fun zraft_session_obj:fail/1, 460 | change_leader_fn(Fun, State); 461 | change_leader(undefined, State) -> 462 | Fun = fun zraft_session_obj:next/1, 463 | change_leader_fn(Fun, State); 464 | change_leader(NewLeader, State = #state{session = Session,pending = Pending}) -> 465 | case zraft_session_obj:leader(Session) of 466 | NewLeader when Pending==false-> 467 | {true,State}; 468 | NewLeader-> 469 | {false,State}; 470 | _ -> 471 | Fun = fun(S) -> 472 | zraft_session_obj:change_leader(NewLeader, S) end, 473 | change_leader_fn(Fun, State) 474 | end. 475 | 476 | change_leader_fn(Fun, State = #state{session = Session}) -> 477 | case Fun(Session) of 478 | {error, etimeout} -> 479 | State1 = pending(State), 480 | {false, State1}; 481 | {error, all_failed} -> 482 | exit({error, no_peer_available}); 483 | NewSession -> 484 | State1 = State#state{session = NewSession}, 485 | install_leader(State1) 486 | end. 487 | 488 | install_leader(State = #state{session = Session, leader_mref = PrevRef, epoch = E}) -> 489 | case PrevRef of 490 | undefined -> 491 | ok; 492 | _ -> 493 | erlang:demonitor(PrevRef) 494 | end, 495 | Leader = zraft_session_obj:leader(Session), 496 | MRef = erlang:monitor(process, Leader), 497 | receive 498 | {'DOWN', MRef, process, _, _} -> 499 | change_leader(failed, State) 500 | after 0 -> 501 | if 502 | State#state.pending /= false-> 503 | cancel_timer(State#state.pending); 504 | true-> 505 | ok 506 | end, 507 | trigger_all_watcher(State), 508 | {true, State#state{leader_mref = MRef, epoch = E + 1,pending = false}} 509 | end. 510 | 511 | trigger_all_watcher(#state{watchers = Watchers}) -> 512 | ets:foldl(fun({Caller, Watch}, Acc) -> 513 | Caller ! #swatch_trigger{ref = Watch, reason = change_leader}, Acc end, 0, Watchers), 514 | ets:delete_all_objects(Watchers). 515 | 516 | restart_requets(State = #state{connected = false})-> 517 | connect(State); 518 | restart_requets(State = #state{requests = Requests, epoch = E}) -> 519 | MatchWrite = [{{'$1', '_', '_', '_', '_', '$2'}, [{'<', '$2', {const, E}}], ['$1']}], 520 | W = ets:select(Requests, MatchWrite), 521 | State1 = lists:foldl(fun(ID,StateAcc1) -> 522 | repeat_write(ID, StateAcc1) end,State, W), 523 | MatchRead = [{{'$1', '_', '_', '_', '$2'}, [{'<', '$2', {const, E}}], ['$1']}], 524 | R = ets:select(Requests, MatchRead), 525 | State2 = lists:foldl(fun(Ref,StateAcc2) -> 526 | repeat_read(Ref, StateAcc2) end,State1,R), 527 | State2. 528 | 529 | cancel_timer(Timer) -> 530 | if 531 | Timer /= undefined -> 532 | erlang:cancel_timer(Timer); 533 | true -> 534 | ok 535 | end. -------------------------------------------------------------------------------- /src/zraft_session_obj.erl: -------------------------------------------------------------------------------- 1 | %% ------------------------------------------------------------------- 2 | %% @author Gunin Alexander 3 | %% Copyright (c) 2015 Gunin Alexander. All Rights Reserved. 4 | %% 5 | %% This file is provided to you under the Apache License, 6 | %% Version 2.0 (the "License"); you may not use this file 7 | %% except in compliance with the License. You may obtain 8 | %% a copy of the License at 9 | %% 10 | %% http://www.apache.org/licenses/LICENSE-2.0 11 | %% 12 | %% Unless required by applicable law or agreed to in writing, 13 | %% software distributed under the License is distributed on an 14 | %% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | %% KIND, either express or implied. See the License for the 16 | %% specific language governing permissions and limitations 17 | %% under the License. 18 | %% 19 | %% ------------------------------------------------------------------- 20 | -module(zraft_session_obj). 21 | -author("dreyk"). 22 | 23 | -export([ 24 | create/3, 25 | set_leader/2, 26 | change_leader/2, 27 | next/1, 28 | fail/1, 29 | reset/1, 30 | leader/1, 31 | is_session/1 32 | ]). 33 | 34 | -export_type([ 35 | light_session/0 36 | ]). 37 | 38 | -ifdef(TEST). 39 | -include_lib("eunit/include/eunit.hrl"). 40 | -endif. 41 | 42 | -define(NORMAL, ok). 43 | 44 | -record(light_session, {peers, leader, backoff,election_timeout}). 45 | 46 | -type light_session() :: #light_session{}. 47 | 48 | is_session(#light_session{}) -> 49 | true; 50 | is_session(_) -> 51 | false. 52 | 53 | -spec create(list(zraft_consensus:peer_id()), timeout(),timeout()) -> light_session(). 54 | create([], _BackOff,_ElectionTimeout) -> 55 | throw({error, no_peers}); 56 | create([F | _] = Peers, BackOff,ElectionTimeout) -> 57 | PeersStatus = [{P, {?NORMAL,?NORMAL}} || P <- Peers], 58 | #light_session{ 59 | peers = orddict:from_list(PeersStatus), 60 | leader = F, 61 | backoff = BackOff * 1000, 62 | election_timeout = ElectionTimeout*1000 63 | }. 64 | 65 | -spec set_leader(zraft_consensus:peer_id(), light_session())->light_session(). 66 | set_leader(NewLeader,SObj = #light_session{peers = Peers})-> 67 | [_|T] = reorder(NewLeader,Peers,[]), 68 | Peers1 = [{NewLeader,{?NORMAL,?NORMAL}}|T], 69 | SObj#light_session{peers = Peers1,leader = NewLeader}. 70 | 71 | -spec change_leader(zraft_consensus:peer_id(), light_session()) -> 72 | light_session()|{error, all_failed}|{error,etimeout}. 73 | change_leader(NewLeader,SObj = #light_session{peers = Peers})-> 74 | [{Old,{O1,_O2}}|T1] = Peers, 75 | Peers1 = [{Old,{O1,os:timestamp()}}|T1], 76 | Peers2 = reorder(NewLeader,Peers1,[]), 77 | find_leader(SObj#light_session{peers = Peers2}). 78 | 79 | find_leader(SObj = #light_session{peers = Peers,backoff = BackOff,election_timeout = Election})-> 80 | Check = fun({_,{N1,N2}})-> 81 | case is_expired(BackOff,N1) of 82 | true-> 83 | case is_expired(Election,N2) of 84 | true-> 85 | true; 86 | _-> 87 | false 88 | end; 89 | _-> 90 | failed 91 | end 92 | end, 93 | case next_candidate(Check,Peers,[],failed) of 94 | {failed,_,_}-> 95 | {error, all_failed}; 96 | {false,_,_}-> 97 | {error,etimeout}; 98 | {true,Leader,Peer3}-> 99 | SObj#light_session{leader = Leader,peers = Peer3} 100 | end. 101 | 102 | next_candidate(_Check,[],Acc,Status)-> 103 | {Status,undefined,lists:reverse(Acc)}; 104 | next_candidate(Check,[{P,_}=E|T],Acc,Status)-> 105 | case Check(E) of 106 | failed-> 107 | next_candidate(Check,T,[E|Acc],Status); 108 | true-> 109 | {true,P,[{P,{?NORMAL,?NORMAL}}|T]++lists:reverse(Acc)}; 110 | false-> 111 | next_candidate(Check,T,[E|Acc],false) 112 | end. 113 | 114 | reorder(P,[{P,_}|_]=H,Acc)-> 115 | H++lists:reverse(Acc); 116 | reorder(P,[E|T],Acc)-> 117 | reorder(P,T,[E|Acc]); 118 | reorder(P,[],Acc)-> 119 | [{P,{?NORMAL,?NORMAL}}|lists:reverse(Acc)]. 120 | 121 | -spec next(light_session()) -> light_session()|{error, all_failed}|{error,etimeout}. 122 | next(SObj = #light_session{peers = Peers}) -> 123 | [{Old,{O1,_O2}}|T1] = Peers, 124 | Peers1 = [{Old,{O1,os:timestamp()}}|T1], 125 | find_leader(SObj#light_session{peers = Peers1}). 126 | 127 | -spec fail(light_session()) -> light_session()|{error, all_failed}|{error,etimeout}. 128 | fail(SObj = #light_session{peers = Peers}) -> 129 | [{Old,{_O1,O2}}|T1] = Peers, 130 | Peers1 = [{Old,{os:timestamp(),O2}}|T1], 131 | find_leader(SObj#light_session{peers = Peers1}). 132 | 133 | -spec reset(light_session()) -> light_session(). 134 | reset(SObj = #light_session{peers = Peers}) -> 135 | Peers1 = [{P,{?NORMAL,?NORMAL}}||{P,_}<-Peers], 136 | [{Leader,_}|_]=lists:keysort(1,Peers1), 137 | SObj#light_session{peers = Peers1,leader = Leader}. 138 | 139 | 140 | -spec leader(light_session()) -> zraft_consensus:peer_id(). 141 | leader(#light_session{leader = Leader}) -> 142 | Leader. 143 | 144 | 145 | is_expired(_Timeout,?NORMAL)-> 146 | true; 147 | is_expired(Timeout,V)-> 148 | case timer:now_diff(os:timestamp(), V) of 149 | D when D >= Timeout -> 150 | true; 151 | _ -> 152 | false 153 | end. 154 | 155 | -ifdef(TEST). 156 | 157 | next_leader_test() -> 158 | S1 = create([1, 2, 3, 4, 5],100,100), 159 | L1 = leader(S1), 160 | ?assertEqual(1, L1), 161 | S2 = set_leader(3, S1), 162 | L2 = leader(S2), 163 | ?assertEqual(3, L2), 164 | S3 = fail(S2), 165 | L3 = leader(S3), 166 | ?assertEqual(4, L3), 167 | S4 = fail(S3), 168 | L4 = leader(S4), 169 | ?assertEqual(5, L4), 170 | S5 = fail(S4), 171 | L5 = leader(S5), 172 | ?assertEqual(1, L5), 173 | S6 = fail(S5), 174 | L6 = leader(S6), 175 | ?assertEqual(2, L6), 176 | Fail = fail(S6), 177 | ?assertEqual({error, all_failed}, Fail), 178 | S7 = reset(S6), 179 | L7 = leader(S7), 180 | ?assertEqual(1, L7). 181 | 182 | -endif. -------------------------------------------------------------------------------- /src/zraft_snapshot_receiver.erl: -------------------------------------------------------------------------------- 1 | %% ------------------------------------------------------------------- 2 | %% @author Gunin Alexander 3 | %% Copyright (c) 2015 Gunin Alexander. All Rights Reserved. 4 | %% 5 | %% This file is provided to you under the Apache License, 6 | %% Version 2.0 (the "License"); you may not use this file 7 | %% except in compliance with the License. You may obtain 8 | %% a copy of the License at 9 | %% 10 | %% http://www.apache.org/licenses/LICENSE-2.0 11 | %% 12 | %% Unless required by applicable law or agreed to in writing, 13 | %% software distributed under the License is distributed on an 14 | %% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | %% KIND, either express or implied. See the License for the 16 | %% specific language governing permissions and limitations 17 | %% under the License. 18 | %% 19 | %% ------------------------------------------------------------------- 20 | -module(zraft_snapshot_receiver). 21 | -author("dreyk"). 22 | 23 | -behaviour(gen_fsm). 24 | 25 | %% API 26 | -export([start/2]). 27 | 28 | %% gen_fsm callbacks 29 | -export([init/1, 30 | handle_event/3, 31 | handle_sync_event/4, 32 | handle_info/3, 33 | terminate/3, 34 | code_change/4, 35 | listen/2, 36 | listen/3, 37 | prepare/2, 38 | prepare/3, 39 | fileinfo/2, 40 | fileinfo/3, 41 | filedata/2, 42 | filedata/3]). 43 | 44 | -export([stop/1]). 45 | -export([copy_to/4, copy_info/1, copy_files/4, discard_files_info/1]). 46 | 47 | -define(LISTEN_TIMEOUT, 30000). 48 | -define(DATA_TIMEOUT, 10000). 49 | -define(NEXT_HEARBEAT, <<1:8>>). 50 | 51 | -include("zraft.hrl"). 52 | 53 | -ifdef(TEST). 54 | -define(SNAPHOT_LISTENER_ADDR, "127.0.0.1"). 55 | -else. 56 | -define(SNAPHOT_LISTENER_ADDR, zraft_util:get_env(snapshot_listener_addr, "0.0.0.0")). 57 | -endif. 58 | 59 | -define(INFO(State,S, As),?MINFO("~p: "++S,[print_id(State)|As])). 60 | -define(INFO(State,S), ?MINFO("~p: "++S,[print_id(State)])). 61 | -define(ERROR(State,S, As),?MERROR("~p: "++S,[print_id(State)|As])). 62 | -define(ERROR(State,S), ?MERROR("~p: "++S,[print_id(State)])). 63 | -define(DEBUG(State,S, As),?MDEBUG("~p: "++S,[print_id(State)|As])). 64 | -define(DEBUG(State,S), ?MDEBUG("~p: "++S,[print_id(State)])). 65 | -define(WARNING(State,S, As),?MWARNING("~p: "++S,[print_id(State)|As])). 66 | -define(WARNING(State,S), ?MWARNING("~p: "++S,[print_id(State)])). 67 | 68 | -record(state, {peer_id,dir, lsock, rsock, fd, size,expected_size, curfile}). 69 | 70 | start(PeerID,Dir) -> 71 | {ok, Pid} = gen_fsm:start(?MODULE, [PeerID], []), 72 | {ok, {Addr, Port}} = gen_fsm:sync_send_event(Pid, {start, Dir}), 73 | {ok, {{Addr, Port}, Pid}}. 74 | 75 | stop(P) -> 76 | gen_fsm:sync_send_all_state_event(P, stop). 77 | 78 | init([PeerID]) -> 79 | {ok, prepare, #state{peer_id = PeerID}, ?LISTEN_TIMEOUT}. 80 | 81 | 82 | prepare(timeout, State) -> 83 | ?ERROR(State,"No init info ~p msec.", [?LISTEN_TIMEOUT]), 84 | {stop, {error, timeout}, State}; 85 | prepare(_, State) -> 86 | {stop, {error, not_supported}, State}. 87 | 88 | prepare({start, Directory}, _From, State) -> 89 | Addr = ?SNAPHOT_LISTENER_ADDR, 90 | RAddr = case Addr of 91 | "0.0.0.0" -> 92 | Default = zraft_util:node_addr(node()), 93 | zraft_util:get_env(snapshot_receiver_addr, Default); 94 | Else -> 95 | zraft_util:get_env(snapshot_receiver_addr, Else) 96 | end, 97 | case inet_parse:address(Addr) of 98 | {ok, IpAdd} -> 99 | listen_on(RAddr, IpAdd, State#state{dir = Directory}); 100 | Err -> 101 | ?ERROR(State,"Can't start snapshot receiver listener on ~p: ~p.", [Addr, Err]), 102 | {stop, Err, Err, State} 103 | end; 104 | prepare(_, _, State) -> 105 | {stop, {error, not_supported}, {error, not_supported}, State}. 106 | 107 | listen(timeout, State) -> 108 | ?ERROR(State,"No connection to snapshot receiver during ~p msec.", [?LISTEN_TIMEOUT]), 109 | {stop, {error, timeout}, State}; 110 | listen(_, State) -> 111 | {stop, {error, not_supported}, State}. 112 | listen(_, _, State) -> 113 | {stop, {error, not_supported}, {error, not_supported}, State}. 114 | 115 | fileinfo(timeout, State) -> 116 | ?ERROR(State,"No data during ~p msec.", [?DATA_TIMEOUT]), 117 | {stop, {error, timeout}, State}; 118 | fileinfo(_, State) -> 119 | {stop, {error, not_supported}, State}. 120 | fileinfo(_, _, State) -> 121 | {stop, {error, not_supported}, {error, not_supported}, State}. 122 | 123 | filedata(timeout, State) -> 124 | ?ERROR(State,"No data during ~p msec.", [?DATA_TIMEOUT]), 125 | {stop, {error, timeout}, State}; 126 | filedata(_, State) -> 127 | {stop, {error, not_supported}, State}. 128 | filedata(_, _, State) -> 129 | {stop, {error, not_supported}, {error, not_supported}, State}. 130 | 131 | handle_event(_Event, _StateName, State) -> 132 | {stop, {error, not_supported}, State}. 133 | 134 | handle_sync_event(stop, _From, _StateName, State) -> 135 | {stop, normal, ok, State}; 136 | handle_sync_event(_Event, _From, _StateName, State) -> 137 | {stop, {error, not_supported}, {error, not_supported}, State}. 138 | 139 | handle_info({inet_async, _ListSock, _Ref, {ok, CliSocket}}, listen, State) -> 140 | inet_db:register_socket(CliSocket, inet_tcp), 141 | inet:setopts(CliSocket, [{active, once}, {packet, 4}, {linger, {true, 30}}]), 142 | {next_state, fileinfo, State#state{rsock = CliSocket}, ?DATA_TIMEOUT}; 143 | 144 | handle_info({tcp_closed, _Socket}, _StateName, State) -> 145 | ?WARNING(State,"Snapshot receiver socket closed."), 146 | {stop, tcp_closed, State}; 147 | handle_info({tcp_error, _Socket,Reason}, _StateName, State) -> 148 | ?WARNING(State,"Snaphot receiving socket error: ~p.",[Reason]), 149 | {stop, tcp_error, State}; 150 | handle_info({tcp, _, <<0:64, "done">>}, fileinfo, State) -> 151 | ok = gen_tcp:send(State#state.rsock, ?NEXT_HEARBEAT), 152 | {stop, normal, State}; 153 | handle_info({tcp, _, MsgData}, fileinfo, State = #state{dir = Dir, rsock = Sock}) -> 154 | <> = MsgData, 155 | FName = filename:join(Dir, binary_to_list(Name)), 156 | ?INFO(State,"Prepare receive file ~s of size ~p.", [FName, Size]), 157 | ok = gen_tcp:send(Sock, ?NEXT_HEARBEAT), 158 | if 159 | Size == 0 -> 160 | ?INFO(State,"Receive ~s direcory has created.", [FName]), 161 | zraft_util:make_dir(FName), 162 | ok = inet:setopts(Sock, [{active, once}]), 163 | {next_state, fileinfo, State#state{fd = undefined, size = 0}, ?DATA_TIMEOUT}; 164 | true -> 165 | case filename:dirname(FName) of 166 | "." -> 167 | ok; 168 | ParentDir -> 169 | ok = zraft_util:make_dir(ParentDir) 170 | end, 171 | {ok, FD} = file:open(FName, [binary, write, exclusive, raw, delayed_write]), 172 | ok = inet:setopts(Sock, [{active, once}, {packet, 0}]), 173 | {next_state, filedata, State#state{fd = FD, size = 0,expected_size = Size, curfile = FName}, ?DATA_TIMEOUT} 174 | end; 175 | handle_info({tcp, _, MsgData}, filedata, 176 | State = #state{fd = FD, size = Size,expected_size = ESize,rsock = Sock, curfile = File}) -> 177 | ok = file:write(FD, MsgData), 178 | NewSize = Size + size(MsgData), 179 | ?INFO(State,"Receive ~p of ~p bytes ~s", [NewSize,ESize,File]), 180 | if 181 | NewSize == ESize -> 182 | ok = file:datasync(FD), 183 | ok = file:close(FD), 184 | inet:setopts(Sock, [{active, once}, {packet, 4}]), 185 | ok = gen_tcp:send(Sock, ?NEXT_HEARBEAT), 186 | {next_state, fileinfo, State#state{fd = undefined, size = 0, curfile = undefined}, ?DATA_TIMEOUT}; 187 | NewSize > ESize -> 188 | file:datasync(FD), 189 | file:close(FD), 190 | {stop, {error, invalid_size}, State#state{fd = undefined, size = 0}}; 191 | true -> 192 | ok = inet:setopts(Sock, [{active, once}]), 193 | {next_state, filedata, State#state{size = NewSize}, ?DATA_TIMEOUT} 194 | end; 195 | 196 | handle_info(_Info, _StateName, State) -> 197 | {stop, {error, not_supported}, State}. 198 | 199 | terminate(Reason, _StateName, State) -> 200 | ?WARNING(State,"Receiver is being stoped. Reason is ~p",[Reason]), 201 | close_sock(State#state.rsock), 202 | close_sock(State#state.lsock), 203 | if 204 | State#state.fd /= undefined -> 205 | file:close(State#state.fd); 206 | true -> 207 | ok 208 | end, 209 | ok. 210 | 211 | close_sock(undefined) -> 212 | ok; 213 | close_sock(Sock) -> 214 | gen_tcp:close(Sock). 215 | 216 | code_change(_OldVsn, StateName, State, _Extra) -> 217 | {ok, StateName, State}. 218 | 219 | listen_on(RAddr, IpAddr, State) -> 220 | SockOpts = [{ip, IpAddr}, binary, {packet, 4}, {reuseaddr, true}, {keepalive, true}, {backlog, 1024}, {active, false}], 221 | ListenerPort = zraft_util:get_env(snapshot_listener_port, 0), 222 | case gen_tcp:listen(ListenerPort, SockOpts) of 223 | {ok, LSock} -> 224 | {ok, _Ref} = prim_inet:async_accept(LSock, -1), 225 | {ok, Port} = inet:port(LSock), 226 | {reply, {ok, {RAddr, Port}}, listen, State#state{lsock = LSock}, ?LISTEN_TIMEOUT}; 227 | Err -> 228 | ?ERROR(State,"Can't start snapshot receiver listener on ~p:~p. Reason is ~p", [IpAddr,ListenerPort,Err]), 229 | {stop, Err, Err, State} 230 | end. 231 | 232 | copy_to(PeerID,Dir, Addr, Port) -> 233 | Files = copy_info(Dir), 234 | copy_files(PeerID,Files, Addr, Port). 235 | 236 | copy_files(PeerID,CopyInfo, Addr, Port) -> 237 | case gen_tcp:connect(Addr, Port, [binary, {active, false}, {packet, 0}, {linger, {true, 30}}]) of 238 | {ok, Sock} -> 239 | Res = do_copy(PeerID,CopyInfo, Sock), 240 | Res1 = case Res of 241 | ok -> 242 | case send_fileinfo(0, "done", Sock) of 243 | ok -> 244 | ok; 245 | Else -> 246 | Else 247 | end; 248 | _ -> 249 | Res 250 | end, 251 | gen_tcp:close(Sock), 252 | Res1; 253 | Else -> 254 | Else 255 | end. 256 | 257 | 258 | do_copy(_PeerID,[], _Sock) -> 259 | ok; 260 | do_copy(PeerID,[{File, Size, FD} | T], Sock) -> 261 | case copy_file(PeerID,Size, File, FD, Sock) of 262 | ok -> 263 | do_copy(PeerID,T, Sock); 264 | Else -> 265 | Else 266 | end. 267 | 268 | copy_file(PeerID,Size, File, FD, Sock) -> 269 | Res = case send_fileinfo(Size, File, Sock) of 270 | ok -> 271 | ?INFO(PeerID,"Transfer snapshot file ~s of ~p bytes.", [File, Size]), 272 | case catch file:sendfile(FD, Sock, 0, Size, []) of 273 | {ok, Size} -> 274 | ?INFO(PeerID,"Snapshot file ~s has been transfered.", [File]), 275 | check_next_hearbeat(Sock); 276 | {ok, BadSize} -> 277 | ?INFO(PeerID,"Snapshot file ~s has been transfered wrong number of ~p bytes", [File, BadSize]), 278 | {error, less_byte_sended}; 279 | Else -> 280 | ?ERROR("Snapshot file ~s has failed to transfer. Reason is ~p.", [File, Else]), 281 | Else 282 | end; 283 | Else -> 284 | Else 285 | end, 286 | Res. 287 | 288 | send_fileinfo(Size, File, Sock) -> 289 | BName = list_to_binary(File), 290 | Packet = <<(size(BName) + 8):32, Size:64, BName/binary>>, 291 | case gen_tcp:send(Sock, Packet) of 292 | ok -> 293 | check_next_hearbeat(Sock); 294 | Else -> 295 | Else 296 | end. 297 | 298 | check_next_hearbeat(Sock) -> 299 | case gen_tcp:recv(Sock, 5, ?DATA_TIMEOUT) of 300 | {ok, <<1:32, 1:8>>} -> 301 | ok; 302 | {ok, _} -> 303 | {error, invalid_next_hearbeat}; 304 | Else -> 305 | Else 306 | end. 307 | 308 | dest_name([], Suffix) -> 309 | Suffix; 310 | dest_name(Preffix, Suffix) -> 311 | filename:join(Preffix, Suffix). 312 | 313 | copy_info(Dir) -> 314 | copy_info(Dir, [], []). 315 | copy_info(SrcDir, DestDir, FilesAcc) -> 316 | {ok, Files} = file:list_dir(SrcDir), 317 | copy_info(Files, SrcDir, DestDir, FilesAcc). 318 | 319 | copy_info([], _SrcDir, _DestDir, FilesAcc) -> 320 | lists:ukeysort(1, FilesAcc); 321 | copy_info([F | T], SrcDir, DestDir, FilesAcc) -> 322 | SrcName = filename:join(SrcDir, F), 323 | DestName = dest_name(DestDir, F), 324 | case filelib:is_dir(SrcName) of 325 | true -> 326 | FilesAcc1 = copy_info(SrcName, DestName, FilesAcc), 327 | copy_info(T, SrcDir, DestDir, FilesAcc1); 328 | _ -> 329 | {ok, File} = file:open(SrcName, [read, raw, binary]), 330 | Size = filelib:file_size(SrcName), 331 | copy_info(T, SrcDir, DestDir, [{DestName, Size, File} | FilesAcc]) 332 | end. 333 | 334 | discard_files_info(Files) -> 335 | lists:foreach(fun({_, _, FD}) -> 336 | file:close(FD) end, Files). 337 | 338 | print_id(#state{peer_id = ID})-> 339 | ID; 340 | print_id(ID)-> 341 | ID. 342 | 343 | -ifdef(TEST). 344 | setup() -> 345 | zraft_util:del_dir("test_fs_copy"), 346 | zraft_util:make_dir("test_fs_copy"), 347 | ok. 348 | clear_setup(_) -> 349 | zraft_util:del_dir("test_fs_copy"), 350 | ok. 351 | 352 | copy_snapshot_test_() -> 353 | { 354 | setup, 355 | fun setup/0, 356 | fun clear_setup/1, 357 | fun(_X) -> 358 | [ 359 | do_copy() 360 | ] 361 | end 362 | }. 363 | 364 | do_copy() -> 365 | {"copy_snapshot", fun() -> 366 | File1 = "test_fs_copy/test1", 367 | file:write_file(File1, <<"test1">>), 368 | File2 = "test_fs_copy/test2", 369 | file:write_file(File2, <<"test2">>), 370 | CopyTo = "test_fs_copy/copy", 371 | zraft_util:make_dir(CopyTo), 372 | {ok, {{Addr, Port}, _Pid}} = start(test,CopyTo), 373 | {ok, Sock} = gen_tcp:connect(Addr, Port, [binary, {active, false}, {packet, 0}, {linger, {true, 30}}]), 374 | make_dir_("d1", Sock), 375 | copy_file_(File1, "d1/test1", Sock), 376 | make_dir_("d2", Sock), 377 | copy_file_(File2, "d2/test2", Sock), 378 | Packet = <<12:32, 0:64, <<"done">>/binary>>, 379 | ok = gen_tcp:send(Sock, Packet), 380 | Res1 = gen_tcp:recv(Sock, 5, ?DATA_TIMEOUT), 381 | ?assertMatch({ok, <<1:32, 1:8>>}, Res1), 382 | C1 = file:read_file("test_fs_copy/copy/d1/test1"), 383 | ?assertMatch({ok, <<"test1">>}, C1), 384 | C2 = file:read_file("test_fs_copy/copy/d2/test2"), 385 | ?assertMatch({ok, <<"test2">>}, C2) 386 | end}. 387 | 388 | make_dir_(Name, Sock) -> 389 | Packet = <<(length(Name) + 8):32, 0:64, (list_to_binary(Name))/binary>>, 390 | ok = gen_tcp:send(Sock, Packet), 391 | Res = gen_tcp:recv(Sock, 5, ?DATA_TIMEOUT), 392 | ?assertMatch({ok, <<1:32, 1:8>>}, Res). 393 | copy_file_(SrcName, DestName, Sock) -> 394 | SndSize = filelib:file_size(SrcName), 395 | Packet = <<(length(DestName) + 8):32, SndSize:64, (list_to_binary(DestName))/binary>>, 396 | ok = gen_tcp:send(Sock, Packet), 397 | Res1 = gen_tcp:recv(Sock, 5, ?DATA_TIMEOUT), 398 | ?assertMatch({ok, <<1:32, 1:8>>}, Res1), 399 | Res2 = file:sendfile(SrcName, Sock), 400 | ?assertMatch({ok, _}, Res2), 401 | Res3 = gen_tcp:recv(Sock, 5, ?DATA_TIMEOUT), 402 | ?assertMatch({ok, <<1:32, 1:8>>}, Res3). 403 | 404 | copy_fs_test_() -> 405 | { 406 | setup, 407 | fun setup/0, 408 | fun clear_setup/1, 409 | fun(_X) -> 410 | [ 411 | do_copy_fs() 412 | ] 413 | end 414 | }. 415 | 416 | big_fs_test_() -> 417 | { 418 | setup, 419 | fun setup/0, 420 | fun clear_setup/1, 421 | fun(_X) -> 422 | [ 423 | big_copy_fs() 424 | ] 425 | end 426 | }. 427 | 428 | do_copy_fs() -> 429 | {"copy_fs", fun() -> 430 | zraft_util:make_dir("test_fs_copy/src/d1"), 431 | File1 = "test_fs_copy/src/d1/test1", 432 | file:write_file(File1, <<"test1">>), 433 | zraft_util:make_dir("test_fs_copy/src/d2"), 434 | File2 = "test_fs_copy/src/d2/test2", 435 | file:write_file(File2, <<"test2">>), 436 | File0 = "test_fs_copy/src/test0", 437 | file:write_file(File0, <<"test0">>), 438 | CopyTo = "test_fs_copy/copy", 439 | zraft_util:make_dir(CopyTo), 440 | {ok, {{Addr, Port}, _Pid}} = start(test,CopyTo), 441 | Files = copy_info("test_fs_copy/src"), 442 | ok = file:rename("test_fs_copy/src", "test_fs_copy/src1"), 443 | Res = copy_files(test,Files, Addr, Port), 444 | ?assertEqual(ok, Res), 445 | C1 = file:read_file("test_fs_copy/copy/d1/test1"), 446 | ?assertMatch({ok, <<"test1">>}, C1), 447 | C2 = file:read_file("test_fs_copy/copy/d2/test2"), 448 | ?assertMatch({ok, <<"test2">>}, C2), 449 | C0 = file:read_file("test_fs_copy/copy/test0"), 450 | ?assertMatch({ok, <<"test0">>}, C0) 451 | end}. 452 | 453 | 454 | big_copy_fs() -> 455 | {"copy big file", fun() -> 456 | zraft_util:make_dir("test_fs_copy/src/d1"), 457 | File1 = "test_fs_copy/src/d1/test1", 458 | {ok, FD} = file:open(File1, [binary, raw, write]), 459 | lists:foreach(fun(I) -> 460 | ok = file:write(FD, <>) end, lists:seq(1, 1024)), 461 | CopyTo = "test_fs_copy/copy", 462 | zraft_util:make_dir(CopyTo), 463 | {ok, {{Addr, Port}, _Pid}} = start(test,CopyTo), 464 | Files = copy_info("test_fs_copy/src"), 465 | Res = copy_files(test,Files, Addr, Port), 466 | discard_files_info(Files), 467 | ?assertEqual(ok,Res) 468 | end}. 469 | 470 | -endif. -------------------------------------------------------------------------------- /src/zraft_snapshot_writer.erl: -------------------------------------------------------------------------------- 1 | %% ------------------------------------------------------------------- 2 | %% @author Gunin Alexander 3 | %% Copyright (c) 2015 Gunin Alexander. All Rights Reserved. 4 | %% 5 | %% This file is provided to you under the Apache License, 6 | %% Version 2.0 (the "License"); you may not use this file 7 | %% except in compliance with the License. You may obtain 8 | %% a copy of the License at 9 | %% 10 | %% http://www.apache.org/licenses/LICENSE-2.0 11 | %% 12 | %% Unless required by applicable law or agreed to in writing, 13 | %% software distributed under the License is distributed on an 14 | %% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | %% KIND, either express or implied. See the License for the 16 | %% specific language governing permissions and limitations 17 | %% under the License. 18 | %% 19 | %% ------------------------------------------------------------------- 20 | -module(zraft_snapshot_writer). 21 | -author("dreyk"). 22 | 23 | -behaviour(gen_server). 24 | 25 | %% API 26 | -export([start/5]). 27 | 28 | %% gen_server callbacks 29 | -export([init/1, 30 | handle_call/3, 31 | handle_cast/2, 32 | handle_info/2, 33 | terminate/2, 34 | code_change/3]). 35 | 36 | -export([ 37 | data_done/1, 38 | data/3, 39 | read_snapshot_info/1, 40 | stop/1 41 | ]). 42 | 43 | -include("zraft.hrl"). 44 | 45 | -ifdef(TEST). 46 | -include_lib("eunit/include/eunit.hrl"). 47 | -define(DATA_DIR, "test-snaphot"). 48 | -endif. 49 | 50 | -record(state, {raft,fsm,index,result_file,snap_dir,data_done=false,descr_done=false,descr_req}). 51 | 52 | data_done(Writer)-> 53 | gen_server:cast(Writer,data_done). 54 | 55 | data(Writer,DirName,Fun)-> 56 | gen_server:cast(Writer,{data,DirName,Fun}). 57 | 58 | start(Raft,Fsm,LastIndex,ResultFile,SnapshotDir) -> 59 | gen_server:start(?MODULE, [Raft,Fsm,LastIndex,ResultFile,SnapshotDir], []). 60 | 61 | stop(P)-> 62 | gen_server:call(P,stop). 63 | 64 | init([Raft,Fsm,LastIndex,ResultDir,SnapshotDir]) -> 65 | DescrRef = make_ref(), 66 | erlang:monitor(process,Fsm), 67 | zraft_consensus:make_snapshot_info(Raft,{DescrRef,self()},LastIndex), 68 | {ok, #state{ 69 | raft = Raft, 70 | fsm = Fsm, 71 | index = LastIndex, 72 | result_file = ResultDir, 73 | snap_dir = SnapshotDir, 74 | descr_req = DescrRef 75 | }}. 76 | 77 | handle_call(stop, _From, State) -> 78 | {stop,normal,ok,State}; 79 | handle_call(_Request, _From, State) -> 80 | {reply, ok, State}. 81 | 82 | handle_cast(data_done, State) -> 83 | State1 = State#state{data_done = true}, 84 | maybe_finish(State1); 85 | handle_cast({data,DirName,Fun}, State) -> 86 | ok = Fun(DirName), 87 | State1 = State#state{data_done = true}, 88 | maybe_finish(State1); 89 | handle_cast(_Request, State) -> 90 | {noreply, State}. 91 | 92 | handle_info({'DOWN',_, process,_,_Reason},State)-> 93 | {stop,{error,parent_exit},State}; 94 | handle_info({Ref,Res}, State=#state{descr_req = Ref}) -> 95 | State1 = State#state{descr_req = undefined}, 96 | case Res of 97 | #snapshot_info{term = 0}-> 98 | {stop,{error,null_term},State1}; 99 | #snapshot_info{conf = ?BLANK_CONF}-> 100 | {stop,{error,empty_conf},State1}; 101 | Info=#snapshot_info{}-> 102 | write_header(Info,State1); 103 | {error,Error}-> 104 | {stop,{error,Error},State1} 105 | end; 106 | handle_info(_Info, State) -> 107 | {noreply, State}. 108 | 109 | terminate(normal,_State) -> 110 | %%zraft_util:del_dir(State#state.snap_dir), 111 | ok; 112 | terminate(Reason,State) -> 113 | lager:error("Snapshot ~s failed:~p",[State#state.snap_dir,Reason]), 114 | zraft_util:del_dir(State#state.snap_dir), 115 | ok. 116 | 117 | code_change(_OldVsn, State, _Extra) -> 118 | {ok, State}. 119 | 120 | %%%=================================================================== 121 | %%% Internal functions 122 | %%%=================================================================== 123 | maybe_finish(State= 124 | #state{descr_done = true,data_done = true,result_file = Res,snap_dir = Snapshot})-> 125 | maybe_make_snapshot_backup(Res,Snapshot), 126 | {stop,normal,State}; 127 | maybe_finish(State)-> 128 | {noreply,State}. 129 | 130 | maybe_make_snapshot_backup(undefined,_)-> 131 | ok; 132 | maybe_make_snapshot_backup(Res,Snapshot)-> 133 | {ok,_}=zip:zip(Res,[Snapshot]), 134 | ok. 135 | 136 | write_header(Info,State)-> 137 | State1 = State#state{descr_done = true}, 138 | HeaderFile = filename:join(State#state.snap_dir,"info"), 139 | ok = write_header_file(Info,HeaderFile), 140 | maybe_finish(State1). 141 | 142 | write_header_file(#snapshot_info{}=Info,HeaderFile)-> 143 | file:write_file(HeaderFile,term_to_binary(Info)). 144 | 145 | read_snapshot_info(SnapshotDir)-> 146 | HeaderFile = filename:join(SnapshotDir,"info"), 147 | case file:read_file(HeaderFile) of 148 | {ok,C}-> 149 | case catch binary_to_term(C) of 150 | Info = #snapshot_info{}-> 151 | {ok,Info}; 152 | _-> 153 | {error,invalid_snapshot_header} 154 | end; 155 | Else-> 156 | lager:error("Can't read snapshot header ~s: ~p",[HeaderFile,Else]), 157 | {error,invalid_snapshot_header} 158 | end. 159 | 160 | 161 | -ifdef(TEST). 162 | setup() -> 163 | zraft_util:del_dir(?DATA_DIR), 164 | zraft_util:make_dir(?DATA_DIR), 165 | ok. 166 | clear_setup(_) -> 167 | %%zraft_util:del_dir(?DATA_DIR), 168 | ok. 169 | 170 | write_snapshot_test_() -> 171 | { 172 | setup, 173 | fun setup/0, 174 | fun clear_setup/1, 175 | fun(_X) -> 176 | [ 177 | write() 178 | ] 179 | end 180 | }. 181 | 182 | write() -> 183 | {"write snapshot", fun() -> 184 | ResultFile = filename:join(?DATA_DIR,"tmp-1"), 185 | SnapshotDir = filename:join(?DATA_DIR,"dump-1"), 186 | DataDir = filename:join(SnapshotDir,"data"), 187 | ok = zraft_util:make_dir(DataDir), 188 | {ok,Writer} = start(self(),self(),10,ResultFile,SnapshotDir), 189 | WriterRef = monitor(process,Writer), 190 | zraft_snapshot_writer:data(Writer,DataDir,fun(ToDir)-> 191 | file:write_file(filename:join(ToDir,"dump"),<<"test">>) end), 192 | receive 193 | {'$gen_event',{make_snapshot_info,{ReqRef1,_},Index}}-> 194 | ?assertEqual(10,Index), 195 | Writer ! {ReqRef1,#snapshot_info{index = Index,conf = [],term = 1,conf_index = 1}}; 196 | Else-> 197 | ?assertMatch(bad_result,Else) 198 | after 2000-> 199 | ?assert(false) 200 | end, 201 | receive 202 | {'DOWN',WriterRef, process,_, Reason}-> 203 | ?assertEqual(normal,Reason); 204 | Else1-> 205 | ?assertMatch(bad_result,Else1) 206 | after 2000-> 207 | ?assert(false) 208 | end, 209 | Res = zip:unzip(ResultFile), 210 | ?assertMatch({ok, [_, _]}, Res), 211 | {ok, ResData} = Res, 212 | ?assertMatch(["test-snaphot/dump-1/data/dump", "test-snaphot/dump-1/info"], lists:sort(ResData)), 213 | {ok,SInfo} = read_snapshot_info("test-snaphot/dump-1"), 214 | ?assertMatch(#snapshot_info{index = 10,conf = [],term = 1,conf_index = 1},SInfo), 215 | {ok,C2} = file:read_file("test-snaphot/dump-1/data/dump"), 216 | ?assertEqual(<<"test">>,C2) 217 | end}. 218 | 219 | -endif. 220 | 221 | -------------------------------------------------------------------------------- /src/zraft_util.erl: -------------------------------------------------------------------------------- 1 | %% ------------------------------------------------------------------- 2 | %% @author Gunin Alexander 3 | %% Copyright (c) 2015 Gunin Alexander. All Rights Reserved. 4 | %% 5 | %% This file is provided to you under the Apache License, 6 | %% Version 2.0 (the "License"); you may not use this file 7 | %% except in compliance with the License. You may obtain 8 | %% a copy of the License at 9 | %% 10 | %% http://www.apache.org/licenses/LICENSE-2.0 11 | %% 12 | %% Unless required by applicable law or agreed to in writing, 13 | %% software distributed under the License is distributed on an 14 | %% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | %% KIND, either express or implied. See the License for the 16 | %% specific language governing permissions and limitations 17 | %% under the License. 18 | %% 19 | %% ------------------------------------------------------------------- 20 | -module(zraft_util). 21 | -author("dreyk"). 22 | 23 | %% API 24 | -export([ 25 | peer_name/1, 26 | escape_node/1, 27 | node_name/1, 28 | get_env/2, 29 | random/1, 30 | start_app/1, 31 | del_dir/1, 32 | make_dir/1, 33 | node_addr/1, 34 | miscrosec_timeout/1, 35 | gen_server_cancel_timer/1, 36 | gen_server_cast_after/2, 37 | peer_id/1, 38 | set_test_dir/1, 39 | clear_test_dir/1, 40 | is_expired/2, 41 | random/2, 42 | now_millisec/0, 43 | timestamp_millisec/1, 44 | count_list/1, 45 | cycle_exp/1, 46 | peer_name_to_dir_name/1, 47 | format/2 48 | ]). 49 | 50 | now_millisec()-> 51 | {Mega,S,Micro} = os:timestamp(), 52 | (Mega*1000000+S)*1000+(Micro div 1000). 53 | timestamp_millisec({Mega,S,Micro})-> 54 | (Mega*1000000+S)*1000+(Micro div 1000). 55 | 56 | peer_name({Name,Node}) when is_atom(Name)-> 57 | atom_to_list(Name)++"-"++node_name(Node); 58 | peer_name({Name,Node})-> 59 | binary_to_list(base64:encode(term_to_binary(Name)))++"-"++node_name(Node). 60 | 61 | node_name(Node)-> 62 | escape_node(atom_to_list(Node)). 63 | 64 | escape_node([])-> 65 | []; 66 | escape_node([$@|T])-> 67 | [$_|escape_node(T)]; 68 | escape_node([$.|T])-> 69 | [$_|escape_node(T)]; 70 | escape_node([E|T])-> 71 | [E|escape_node(T)]. 72 | 73 | 74 | get_env(Key, Default) -> 75 | case application:get_env(zraft_lib, Key) of 76 | {ok, Value} -> 77 | Value; 78 | _ -> 79 | Default 80 | end. 81 | 82 | %% @doc Generate "random" number X, such that `0 <= X < N'. 83 | -spec random(pos_integer()) -> pos_integer(). 84 | random(N) -> 85 | erlang:phash2(erlang:statistics(io), N). 86 | -spec random(term(),pos_integer()) -> pos_integer(). 87 | random(Prefix,N) -> 88 | erlang:phash2({Prefix,erlang:statistics(io)}, N). 89 | 90 | del_dir(Dir)-> 91 | case del_dir1(Dir) of 92 | {error,enoent}-> 93 | ok; 94 | ok-> 95 | ok; 96 | Else-> 97 | Else 98 | end. 99 | del_dir1(Dir) -> 100 | case file:list_dir(Dir) of 101 | {ok, Files} -> 102 | lists:foreach(fun(F) -> 103 | del_dir1(filename:join(Dir, F)) end, Files), 104 | file:del_dir(Dir); 105 | _ -> 106 | file:delete(Dir), 107 | file:del_dir(Dir) 108 | end. 109 | 110 | make_dir(undefined)-> 111 | exit({error,dir_undefined}); 112 | make_dir("undefined"++_)-> 113 | exit({error,dir_undefined}); 114 | make_dir(Dir) -> 115 | case make_safe(Dir) of 116 | ok -> 117 | ok; 118 | {error, enoent} -> 119 | S1 = filename:split(Dir), 120 | S2 = lists:droplast(S1), 121 | case make_dir(filename:join(S2)) of 122 | ok -> 123 | make_safe(Dir); 124 | Else -> 125 | Else 126 | end; 127 | Else -> 128 | Else 129 | end. 130 | make_safe(Dir)-> 131 | case file:make_dir(Dir) of 132 | ok-> 133 | ok; 134 | {error,eexist}-> 135 | ok; 136 | Else-> 137 | Else 138 | end. 139 | 140 | %% Based on: https://github.com/rabbitmq/rabbitmq-server/blob/master/src/rabbit_queue_index.erl#L542 141 | peer_name_to_dir_name(PeerId) -> 142 | <> = erlang:md5(term_to_binary(PeerId)), 143 | list_to_atom(format("~.36B", [Num])). 144 | 145 | %% Taken from: https://github.com/rabbitmq/rabbitmq-server/blob/master/src/rabbit_misc.erl#L649 146 | format(Fmt, Args) -> 147 | lists:flatten(io_lib:format(Fmt, Args)). 148 | 149 | node_addr(Node)-> 150 | L = atom_to_list(Node), 151 | case string:tokens(L,"@") of 152 | [_,"nohost"]-> 153 | "127.0.0.1"; 154 | [_,Addr]-> 155 | Addr; 156 | _-> 157 | "127.0.0.1" 158 | end. 159 | 160 | miscrosec_timeout(Timeout) when is_integer(Timeout)-> 161 | Timeout*1000; 162 | miscrosec_timeout(Timeout)-> 163 | Timeout. 164 | 165 | gen_server_cast_after(Time, Event) -> 166 | erlang:start_timer(Time,self(),{'$zraft_timeout', Event}). 167 | gen_server_cancel_timer(Ref)-> 168 | case erlang:cancel_timer(Ref) of 169 | false -> 170 | receive {timeout, Ref, _} -> 0 171 | after 0 -> false 172 | end; 173 | RemainingTime -> 174 | RemainingTime 175 | end. 176 | 177 | -spec peer_id(zraft_consensus:from_peer_addr())->zraft_consensus:peer_id(). 178 | peer_id({ID,_})-> 179 | ID. 180 | 181 | 182 | set_test_dir(Dir)-> 183 | del_dir(Dir), 184 | ok = make_dir(Dir), 185 | application:set_env(zraft_lib,log_dir,Dir), 186 | application:set_env(zraft_lib,snapshot_dir,Dir). 187 | clear_test_dir(Dir)-> 188 | application:unset_env(zraft_lib,log_dir), 189 | application:unset_env(zraft_lib,snapshot_dir), 190 | del_dir(Dir). 191 | 192 | is_expired(_Start,infinity)-> 193 | {false,infinity}; 194 | is_expired(Start,Timeout)-> 195 | T1 = Timeout*1000, 196 | case timer:now_diff(os:timestamp(),Start) of 197 | T2 when T2 >= T1 -> 198 | true; 199 | T2-> 200 | {false,((T1-T2) div 1000)+1} 201 | end. 202 | 203 | cycle_exp(T)-> 204 | cycle_exp(os:timestamp(),T). 205 | cycle_exp(Start,T)-> 206 | case is_expired(Start,T) of 207 | true-> 208 | ok; 209 | {false,T1}-> 210 | %%io:format("s ~p~n",[T1]), 211 | cycle_exp(os:timestamp(),T1) 212 | end. 213 | 214 | start_app(App)-> 215 | start_app(App,ok). 216 | start_app(App,ok) -> 217 | io:format("starting ~s~n",[App]), 218 | case application:start(App) of 219 | {error,{not_started,App1}}-> 220 | start_app(App,start_app(App1,ok)); 221 | {error, {already_started, App}}-> 222 | ok; 223 | Else-> 224 | io:format("start result ~s - ~p~n",[App,Else]), 225 | Else 226 | end; 227 | start_app(_,Error) -> 228 | Error. 229 | 230 | count_list([])-> 231 | []; 232 | count_list([{E1,C1}|T])-> 233 | count_list(E1,C1,T). 234 | 235 | count_list(E1,C1,[{E1,C2}|T])-> 236 | count_list(E1,C1+C2,T); 237 | count_list(E1,C1,[{E2,C2}|T])-> 238 | [{E1,C1}|count_list(E2,C2,T)]; 239 | count_list(E1,C1,[])-> 240 | [{E1,C1}]. -------------------------------------------------------------------------------- /test/basic_zraft_progress.erl: -------------------------------------------------------------------------------- 1 | %% ------------------------------------------------------------------- 2 | %% @author Gunin Alexander 3 | %% Copyright (c) 2015 Gunin Alexander. All Rights Reserved. 4 | %% 5 | %% This file is provided to you under the Apache License, 6 | %% Version 2.0 (the "License"); you may not use this file 7 | %% except in compliance with the License. You may obtain 8 | %% a copy of the License at 9 | %% 10 | %% http://www.apache.org/licenses/LICENSE-2.0 11 | %% 12 | %% Unless required by applicable law or agreed to in writing, 13 | %% software distributed under the License is distributed on an 14 | %% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | %% KIND, either express or implied. See the License for the 16 | %% specific language governing permissions and limitations 17 | %% under the License. 18 | %% 19 | %% ------------------------------------------------------------------- 20 | -module(basic_zraft_progress). 21 | -author("dreyk"). 22 | 23 | -ifdef(TEST). 24 | -include_lib("eunit/include/eunit.hrl"). 25 | -include("zraft.hrl"). 26 | 27 | append_test(P, E) -> 28 | gen_fsm:sync_send_event(P, {append_test, E}). 29 | force_timeout(P) -> 30 | gen_fsm:sync_send_all_state_event(P, force_timeout). 31 | 32 | setup_node() -> 33 | net_kernel:start(['zraft_test@localhost', shortnames]), 34 | zraft_util:set_test_dir("test-data"), 35 | ok. 36 | stop_node(_) -> 37 | zraft_util:clear_test_dir("test-data"), 38 | net_kernel:stop(), 39 | ok. 40 | 41 | progress_test_() -> 42 | { 43 | setup, 44 | fun setup_node/0, 45 | fun stop_node/1, 46 | fun(_X) -> 47 | [ 48 | progress() 49 | ] 50 | end 51 | }. 52 | progress() -> 53 | {"progress", fun() -> 54 | PeerName = {test, node()}, 55 | {ok, Peer} = zraft_consensus:start_link(PeerName, zraft_dict_backend), 56 | ok = zraft_consensus:initial_bootstrap(Peer), 57 | true = force_timeout(Peer), 58 | timer:sleep(100), 59 | Res1 = zraft_consensus:stat(Peer), 60 | ?assertMatch( 61 | #peer_start{ 62 | term = 2, 63 | state_name = leader, 64 | allow_commit = true, 65 | log_state = #log_descr{commit_index = 2,first_index = 1, last_index = 2, last_term = 2}, 66 | leader = {test,_} 67 | }, 68 | Res1 69 | ), 70 | Ref = make_ref(), 71 | Peer1 = {test1, {Ref, self()}}, 72 | Peer2 = {test2, {Ref, self()}}, 73 | %%set new config 74 | NewConfEntry = #entry{term = 2, index = 3, type = ?OP_CONFIG, data = #pconf{old_peers = [PeerName], 75 | new_peers = ordsets:from_list([PeerName, Peer1, Peer2])}}, 76 | ok = append_test(Peer, [NewConfEntry]), 77 | Res2 = zraft_consensus:stat(Peer), 78 | %%transitional state 79 | ?assertMatch( 80 | #peer_start{ 81 | term = 2, 82 | state_name = leader, 83 | log_state = #log_descr{commit_index = 2, first_index = 1, last_index = 3, last_term = 2}, 84 | leader = {test,_}, 85 | conf_state = ?TRANSITIONAL_CONF, 86 | conf = {3,#pconf{old_peers = [{test,_}],new_peers = [{test,_},{test1,_},{test2,_}]}}, 87 | back_end = zraft_dict_backend 88 | }, 89 | Res2 90 | ), 91 | Up1 = check_progress(Peer1), 92 | ?assertMatch({peer_up,{{test,_},_}},Up1), 93 | Up2 = check_progress(Peer2), 94 | ?assertMatch({peer_up,{{test,_},_}},Up2), 95 | %%check hearbeat 96 | Command1_1 = check_progress(Peer1), 97 | ?assertMatch( 98 | #append_entries{commit_index = 2, term = 2, entries = [], prev_log_index = 3, prev_log_term = 2}, 99 | Command1_1 100 | ), 101 | Command2_1 = check_progress(Peer2), 102 | ?assertMatch( 103 | #append_entries{commit_index = 2, term = 2, entries = [], prev_log_index = 3, prev_log_term = 2}, 104 | Command2_1 105 | ), 106 | %%reject hearbeat, our log out of date 107 | Reply1 = #append_reply{success = false, term = 2, last_index = 0, agree_index = 0}, 108 | fake_reply(Command1_1, Reply1, Peer1), 109 | fake_reply(Command2_1, Reply1, Peer2), 110 | Command1_2 = check_progress(Peer1), 111 | ?assertMatch( 112 | #append_entries{commit_index = 0, term = 2, entries = [], prev_log_index = 0, prev_log_term = 0}, 113 | Command1_2 114 | ), 115 | Command2_2 = check_progress(Peer2), 116 | ?assertMatch( 117 | #append_entries{commit_index = 0, term = 2, entries = [], prev_log_index = 0, prev_log_term = 0}, 118 | Command2_2 119 | ), 120 | %%accept new hearbeat 121 | Reply2 = #append_reply{success = true, term = 2, last_index = 0, agree_index = 0}, 122 | fake_reply(Command1_2, Reply2, Peer1), 123 | fake_reply(Command2_2, Reply2, Peer2), 124 | Command1_3 = check_progress(Peer1), 125 | ?assertMatch( 126 | #append_entries{commit_index = 2, term = 2, prev_log_index = 0, prev_log_term = 0}, 127 | Command1_3 128 | ), 129 | Command2_3 = check_progress(Peer2), 130 | ?assertMatch( 131 | #append_entries{commit_index = 2, term = 2, prev_log_index = 0, prev_log_term = 0}, 132 | Command2_3 133 | ), 134 | %%must receive prev log 135 | ?assertMatch( 136 | [ 137 | #entry{index = 1, term = 1, type = ?OP_CONFIG, data = #pconf{}}, 138 | #entry{index = 2, term = 2, type = ?OP_NOOP}, 139 | #entry{index = 3, term = 2, type = ?OP_CONFIG, data = #pconf{}} 140 | ], 141 | Command1_3#append_entries.entries 142 | ), 143 | ?assertMatch( 144 | [ 145 | #entry{index = 1, term = 1, type = ?OP_CONFIG, data = #pconf{}}, 146 | #entry{index = 2, term = 2, type = ?OP_NOOP}, 147 | #entry{index = 3, term = 2, type = ?OP_CONFIG, data = #pconf{}} 148 | ], 149 | Command2_3#append_entries.entries 150 | ), 151 | %%replication ok 152 | Reply3 = #append_reply{success = true, term = 2, last_index = 3, agree_index = 3}, 153 | fake_reply(Command1_3, Reply3, Peer1), 154 | fake_reply(Command2_3, Reply3, Peer2), 155 | Command1_4 = check_progress(Peer1), 156 | %%check commit message 157 | ?assertMatch( 158 | #append_entries{commit_index = 3, term = 2, prev_log_index = 3, prev_log_term = 2, entries = []}, 159 | Command1_4 160 | ), 161 | Command2_4 = check_progress(Peer2), 162 | ?assertMatch( 163 | #append_entries{commit_index = 3, term = 2, prev_log_index = 3, prev_log_term = 2, entries = []}, 164 | Command2_4 165 | ), 166 | Reply4 = #append_reply{success = true, term = 2, last_index = 3, agree_index = 3}, 167 | fake_reply(Command1_4, Reply4, Peer1), 168 | fake_reply(Command2_4, Reply4, Peer2), 169 | Command1_5 = check_progress(Peer1), 170 | %%check new conf 171 | ?assertMatch( 172 | #append_entries{commit_index = 3, term = 2, prev_log_index = 3, prev_log_term = 2, 173 | entries = [#entry{index = 4, type = ?OP_CONFIG, term = 2}]}, 174 | Command1_5 175 | ), 176 | Command2_5 = check_progress(Peer2), 177 | ?assertMatch( 178 | #append_entries{commit_index = 3, term = 2, prev_log_index = 3, prev_log_term = 2, 179 | entries = [#entry{index = 4, type = ?OP_CONFIG, term = 2}]}, 180 | Command2_5 181 | ), 182 | timer:sleep(100), 183 | Res3 = zraft_consensus:stat(Peer), 184 | %%stable state 185 | ?assertMatch( 186 | #peer_start{ 187 | term = 2, 188 | state_name = leader, 189 | log_state = #log_descr{commit_index = 3, first_index = 1, last_index = 4, last_term = 2}, 190 | leader = {test,_}, 191 | conf_state = ?STABLE_CONF, 192 | conf = {4,#pconf{new_peers = [],old_peers = [{test,_},{test1,_},{test2,_}]}} 193 | }, 194 | Res3 195 | ), 196 | Reply5 = #append_reply{success = true, term = 2, last_index = 4, agree_index = 4}, 197 | fake_reply(Command1_5, Reply5, Peer1), 198 | fake_reply(Command2_5, Reply5, Peer2), 199 | Command1_6 = check_progress(Peer1), 200 | %%check new conf commit 201 | ?assertMatch( 202 | #append_entries{commit_index = 4, term = 2, prev_log_index = 4, prev_log_term = 2, 203 | entries = []}, 204 | Command1_6 205 | ), 206 | Command2_6 = check_progress(Peer2), 207 | ?assertMatch( 208 | #append_entries{commit_index = 4, term = 2, prev_log_index = 4, prev_log_term = 2, 209 | entries = []}, 210 | Command2_6 211 | ), 212 | %%accept commit new conf 213 | Reply6 = #append_reply{success = true, term = 2, last_index = 4, agree_index = 4}, 214 | fake_reply(Command1_6, Reply6, Peer1), 215 | fake_reply(Command2_6, Reply6, Peer2), 216 | 217 | Res4 = zraft_consensus:stat(Peer), 218 | ?assertMatch( 219 | #peer_start{ 220 | term = 2, 221 | state_name = leader, 222 | log_state = #log_descr{commit_index = 4, first_index = 1, last_index = 4, last_term = 2}, 223 | leader = {test,_}, 224 | conf_state = ?STABLE_CONF, 225 | conf = {4,#pconf{new_peers = [],old_peers = [{test,_},{test1,_},{test2,_}]}} 226 | }, 227 | Res4 228 | ), 229 | %%peer1 new leader 230 | VoteReq = #vote_request{term = 3, from = Peer1, last_index = 3, last_term = 2}, 231 | ok = zraft_peer_route:cmd(PeerName, VoteReq), 232 | Command1_7 = check_progress(Peer1), 233 | %%leader must reject hearbeat 234 | ?assertMatch( 235 | #vote_reply{commit = 4, granted = false, peer_term = 2, request_term = 3}, 236 | Command1_7 237 | ), 238 | Ref1 = ref1, 239 | NewLeaderHearbeat = 240 | #append_entries{ 241 | prev_log_term = 2, 242 | prev_log_index = 4, 243 | entries = [], 244 | commit_index = 4, 245 | request_ref = Ref1, 246 | from = Peer1, 247 | term = 3 248 | }, 249 | ok = zraft_peer_route:cmd(PeerName, NewLeaderHearbeat), 250 | Command1_8 = check_progress(Peer1), 251 | ?assertMatch( 252 | #append_reply{last_index = 4, agree_index = 4, request_ref = ref1, success = true, term = 3}, 253 | Command1_8 254 | ), 255 | %%start new election 256 | true = force_timeout(Peer), 257 | Command1_9 = check_progress(Peer1), 258 | ?assertMatch( 259 | #vote_request{last_term = 2, last_index = 4, term = 4}, 260 | Command1_9 261 | ), 262 | Command2_9 = check_progress(Peer2), 263 | ?assertMatch( 264 | #vote_request{last_term = 2, last_index = 4, term = 4}, 265 | Command2_9 266 | ), 267 | Res5 = zraft_consensus:stat(Peer), 268 | ?assertMatch( 269 | #peer_start{ 270 | term = 4, 271 | state_name = candidate, 272 | log_state = #log_descr{commit_index = 4, first_index = 1, last_index = 4, last_term = 2}, 273 | leader = undefined, 274 | conf_state = ?STABLE_CONF, 275 | conf = {4,#pconf{new_peers = [],old_peers = [{test,_},{test1,_},{test2,_}]}} 276 | }, 277 | Res5 278 | ), 279 | %%reject and new election 280 | Reply7 = #vote_reply{request_term = 4, granted = false, peer_term = 4, commit = 4}, 281 | fake_reply(Command1_9, Reply7, Peer1), 282 | Reply8 = #vote_reply{request_term = 4, granted = false, peer_term = 5, commit = 4}, 283 | fake_reply(Command2_9, Reply8, Peer2), 284 | true = force_timeout(Peer), 285 | Command1_10 = check_progress(Peer1), 286 | ?assertMatch( 287 | #vote_request{last_term = 2, last_index = 4, term = 6}, 288 | Command1_10 289 | ), 290 | Command2_10 = check_progress(Peer2), 291 | ?assertMatch( 292 | #vote_request{last_term = 2, last_index = 4, term = 6}, 293 | Command2_10 294 | ), 295 | Reply9 = #vote_reply{request_term = 6, granted = true, peer_term = 5, commit = 4}, 296 | fake_reply(Command1_10, Reply9, Peer1), 297 | fake_reply(Command2_10, Reply9, Peer2), 298 | timer:sleep(500), 299 | Res6 = zraft_consensus:stat(Peer), 300 | ?assertMatch( 301 | #peer_start{ 302 | term = 6, 303 | state_name = leader, 304 | log_state = #log_descr{commit_index = 4, first_index = 1, last_index = 5, last_term = 6}, 305 | leader = {test,_}, 306 | conf_state = ?STABLE_CONF, 307 | conf = {4,#pconf{new_peers = [],old_peers = [{test,_},{test1,_},{test2,_}]}} 308 | }, 309 | Res6 310 | ), 311 | Command1_11 = check_progress(Peer1), 312 | ?assertMatch( 313 | #append_entries{commit_index = 4, prev_log_index = 4, prev_log_term = 2, entries = [], term = 6}, 314 | Command1_11 315 | ), 316 | Command2_11 = check_progress(Peer2), 317 | ?assertMatch( 318 | #append_entries{commit_index = 4, prev_log_index = 4, prev_log_term = 2, entries = [], term = 6}, 319 | Command2_11 320 | ), 321 | Reply10 = #append_reply{success = true, term = 6, last_index = 4, agree_index = 4}, 322 | fake_reply(Command1_11, Reply10, Peer1), 323 | fake_reply(Command2_11, Reply10, Peer2), 324 | Command1_12 = check_progress(Peer1), 325 | ?assertMatch( 326 | #append_entries{commit_index = 4, prev_log_index = 4, prev_log_term = 2, term = 6, 327 | entries = [#entry{index = 5, type = ?OP_NOOP, term = 6}]}, 328 | Command1_12 329 | ), 330 | Command2_12 = check_progress(Peer2), 331 | ?assertMatch( 332 | #append_entries{commit_index = 4, prev_log_index = 4, prev_log_term = 2, term = 6, 333 | entries = [#entry{index = 5, type = ?OP_NOOP, term = 6}]}, 334 | Command2_12 335 | ), 336 | ok = zraft_consensus:stop(PeerName) 337 | end}. 338 | 339 | check_progress(Peer) -> 340 | receive 341 | {Peer, V} -> 342 | V 343 | after 2000 -> 344 | {error, timeout} 345 | end. 346 | 347 | fake_reply(Command = #append_entries{epoch = E}, Reply, Peer) -> 348 | zraft_peer_route:reply_proxy( 349 | Command#append_entries.from, 350 | Reply#append_reply{from_peer = {Peer,self()}, request_ref = Command#append_entries.request_ref,epoch = E} 351 | ); 352 | fake_reply(Command = #vote_request{epoch = E}, Reply, Peer) -> 353 | zraft_peer_route:reply_consensus( 354 | Command#vote_request.from, 355 | Reply#vote_reply{from_peer = Peer,epoch = E} 356 | ). 357 | 358 | -endif. 359 | -------------------------------------------------------------------------------- /test/full_zraft_progress.erl: -------------------------------------------------------------------------------- 1 | %% ------------------------------------------------------------------- 2 | %% @author Gunin Alexander 3 | %% Copyright (c) 2015 Gunin Alexander. All Rights Reserved. 4 | %% 5 | %% This file is provided to you under the Apache License, 6 | %% Version 2.0 (the "License"); you may not use this file 7 | %% except in compliance with the License. You may obtain 8 | %% a copy of the License at 9 | %% 10 | %% http://www.apache.org/licenses/LICENSE-2.0 11 | %% 12 | %% Unless required by applicable law or agreed to in writing, 13 | %% software distributed under the License is distributed on an 14 | %% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | %% KIND, either express or implied. See the License for the 16 | %% specific language governing permissions and limitations 17 | %% under the License. 18 | %% 19 | %% ------------------------------------------------------------------- 20 | -module(full_zraft_progress). 21 | -author("dreyk"). 22 | 23 | -ifdef(TEST). 24 | -include_lib("eunit/include/eunit.hrl"). 25 | -include("zraft.hrl"). 26 | 27 | 28 | -define(TIMEOUT, 10000). 29 | 30 | force_timeout(P) -> 31 | gen_fsm:sync_send_all_state_event(P, force_timeout). 32 | 33 | setup_node() -> 34 | zraft_util:set_test_dir("full-test-data"), 35 | application:set_env(zraft_lib, max_log_count, 10), 36 | net_kernel:start(['zraft_test@localhost', shortnames]), 37 | ok. 38 | stop_node(_) -> 39 | net_kernel:stop(), 40 | application:unset_env(zraft_lib, max_log_count), 41 | zraft_util:clear_test_dir("test-data"), 42 | ok. 43 | 44 | progress_test_() -> 45 | { 46 | setup, 47 | fun setup_node/0, 48 | fun stop_node/1, 49 | fun(_X) -> 50 | [ 51 | progress() 52 | ] 53 | end 54 | }. 55 | progress() -> 56 | {timeout,30,fun() -> 57 | PeerID1 = {test1, node()}, 58 | {ok, Peer1} = zraft_consensus:start_link(PeerID1, zraft_dict_backend), 59 | PeerID2 = {test2, node()}, 60 | {ok, Peer2} = zraft_consensus:start_link(PeerID2, zraft_dict_backend), 61 | PeerID3 = {test3, node()}, 62 | {ok, Peer3} = zraft_consensus:start_link(PeerID3, zraft_dict_backend), 63 | ok = zraft_consensus:initial_bootstrap(Peer1), 64 | true = force_timeout(Peer1), 65 | ok = wait_leadership(Peer1,2,1), 66 | Res1 = zraft_consensus:stat(Peer1), 67 | ?assertMatch( 68 | #peer_start{ 69 | term = 2, 70 | state_name = leader, 71 | log_state = #log_descr{commit_index = 2, first_index = 1, last_index = 2, last_term = 2}, 72 | leader = {test1, _} 73 | }, 74 | Res1 75 | ), 76 | Res2 = zraft_consensus:stat(Peer2), 77 | ?assertMatch( 78 | #peer_start{ 79 | term = 0, 80 | state_name = follower, 81 | log_state = #log_descr{commit_index = 0, first_index = 1, last_index = 0, last_term = 0}, 82 | leader = undefined, 83 | conf_state = ?STABLE_CONF, 84 | conf = ?BLANK_CONF, 85 | snapshot_info = #snapshot_info{conf_index = 0, conf = ?BLANK_CONF, index = 0, term = 0} 86 | }, 87 | Res2 88 | ), 89 | Res3 = zraft_consensus:stat(Peer3), 90 | ?assertMatch( 91 | #peer_start{ 92 | term = 0, 93 | state_name = follower, 94 | log_state = #log_descr{commit_index = 0, first_index = 1, last_index = 0, last_term = 0}, 95 | leader = undefined, 96 | conf_state = ?STABLE_CONF, 97 | conf = ?BLANK_CONF, 98 | snapshot_info = #snapshot_info{conf_index = 0, conf = ?BLANK_CONF, index = 0, term = 0} 99 | }, 100 | Res3 101 | ), 102 | C1 = zraft_consensus:get_conf(PeerID1, ?TIMEOUT), 103 | ?assertMatch({ok, {1, [{test1, _}]}}, C1), 104 | C2 = zraft_consensus:set_new_configuration(PeerID1, 1, lists:usort([PeerID1, PeerID2, PeerID3]), ?TIMEOUT), 105 | ?assertMatch(ok, C2), 106 | Res4 = zraft_consensus:stat(Peer1), 107 | ?assertMatch( 108 | #peer_start{ 109 | term = 2, 110 | state_name = leader, 111 | log_state = #log_descr{commit_index = 4, first_index = 1, last_index = 4, last_term = 2}, 112 | leader = {test1, _} 113 | }, 114 | Res4 115 | ), 116 | ok = wait_new_config(4, PeerID1, 1), 117 | Res5 = zraft_consensus:stat(Peer2), 118 | ?assertMatch( 119 | #peer_start{ 120 | term = 2, 121 | state_name = follower, 122 | log_state = #log_descr{commit_index = 4, first_index = 1, last_index = 4, last_term = 2}, 123 | leader = {test1, _} 124 | }, 125 | Res5 126 | ), 127 | Res6 = zraft_consensus:stat(Peer3), 128 | ?assertMatch( 129 | #peer_start{ 130 | term = 2, 131 | state_name = follower, 132 | log_state = #log_descr{commit_index = 4, first_index = 1, last_index = 4, last_term = 2}, 133 | leader = {test1, _} 134 | }, 135 | Res6 136 | ), 137 | W1 = zraft_consensus:write(PeerID2, {1, "1"}, ?TIMEOUT), 138 | ?assertMatch({leader, {test1, _}}, W1), 139 | W2 = zraft_consensus:write(PeerID1, {1, "1"}, ?TIMEOUT), 140 | ?assertMatch({ok,ok}, W2), 141 | ok = wait_success_read(1, PeerID1, 1), 142 | 143 | %%drop test2 peer and all it's data 144 | ok = zraft_consensus:stop(Peer2), 145 | ok = zraft_util:del_dir("full-test-data/test2-zraft_test_localhost"), 146 | %%restart it this empty state 147 | {ok, Peer22} = zraft_consensus:start_link(PeerID2, zraft_dict_backend), 148 | %%wait log replicate 149 | ok = wait_follower_sync(5, 5, 2, PeerID2, Peer22, 1), 150 | R1 = zraft_consensus:query_local(PeerID2, fun(Dict) -> lists:ukeysort(1, dict:to_list(Dict)) end, ?TIMEOUT), 151 | ?assertMatch({ok,[{1, "1"}]}, R1), 152 | [zraft_consensus:write(PeerID1, {I, integer_to_list(I)}, ?TIMEOUT) || I <- lists:seq(2, 8)], 153 | ok = wait_snapshot_done(10, Peer1, 1), 154 | Res7 = zraft_consensus:stat(Peer1), 155 | ?assertMatch(#log_descr{commit_index = 12, first_index = 11, last_index = 12, last_term = 2},Res7#peer_start.log_state), 156 | ?assertMatch( 157 | #peer_start{ 158 | term = 2, 159 | state_name = leader, 160 | log_state = #log_descr{commit_index = 12, first_index = 11, last_index = 12, last_term = 2}, 161 | leader = {test1, _}, 162 | snapshot_info = #snapshot_info{index = 10, term = 2, conf_index = 4} 163 | }, 164 | Res7 165 | ), 166 | ok = zraft_consensus:stop(Peer3), 167 | ok = zraft_util:del_dir("full-test-data/test3-zraft_test_localhost"), 168 | %%restart it this empty state 169 | {ok, Peer32} = zraft_consensus:start_link(PeerID3, zraft_dict_backend), 170 | %%wait log replicate and snapshot 171 | ok = wait_follower_sync(12, 12, 2, PeerID3, Peer32, 1), 172 | 173 | R2 = zraft_consensus:query_local(PeerID3, fun(Dict) -> lists:ukeysort(1, dict:to_list(Dict)) end, ?TIMEOUT), 174 | ?assertMatch({ok,[{1, "1"}, {2, "2"}, {3, "3"}, {4, "4"}, {5, "5"}, {6, "6"}, {7, "7"}, {8, "8"}]}, R2), 175 | 176 | %%truncate old leader log 177 | ok = zraft_consensus:stop(Peer22), 178 | ok = zraft_consensus:stop(Peer32), 179 | 180 | [zraft_consensus:write_async(PeerID1, {I, integer_to_list(I)}) || I <- lists:seq(100, 120)], 181 | Res8 = zraft_consensus:stat(Peer1), 182 | ?assertMatch( 183 | #peer_start{ 184 | term = 2, 185 | state_name = leader, 186 | log_state = #log_descr{commit_index = 12, first_index = 11, last_index = 33, last_term = 2}, 187 | leader = {test1, _}, 188 | snapshot_info = #snapshot_info{index = 10, term = 2, conf_index = 4} 189 | }, 190 | Res8 191 | ), 192 | ok = zraft_consensus:stop(Peer1), 193 | {ok, Peer23} = zraft_consensus:start_link(PeerID2, zraft_dict_backend), 194 | {ok, Peer33} = zraft_consensus:start_link(PeerID3, zraft_dict_backend), 195 | {Leader, InTerm} = wait_leader(Peer23, 1), 196 | ?debugFmt("New leader ~p in term ~p", [Leader, InTerm]), 197 | %%start old leader 198 | {ok, Peer13} = zraft_consensus:start_link(PeerID1, zraft_dict_backend), 199 | %%New Index must be 12(old index)+1(NO_OP) after new leader was elected 200 | ok = wait_follower_sync(13, 13, InTerm, PeerID1, Peer13, 1), 201 | ok = zraft_consensus:stop(Peer13), 202 | ok = zraft_consensus:stop(Peer23), 203 | ok = zraft_consensus:stop(Peer33) 204 | end}. 205 | 206 | down_attempt(Attempt,Max) when Attempt 207 | ok; 208 | down_attempt(_Attempt,_Max)-> 209 | exit({error,to_many_attempts}). 210 | wait_leadership(Peer,Commit, Attempt) -> 211 | down_attempt(Attempt,20), 212 | case zraft_consensus:stat(Peer) of 213 | #peer_start{state_name = leader,log_state = #log_descr{commit_index = Commit}} -> 214 | ok; 215 | #peer_start{} -> 216 | ?debugFmt("Wait leadership attempt - ~p",[Attempt]), 217 | wait_leadership(Peer,Commit, Attempt + 1) 218 | end. 219 | wait_leader(Peer, Attempt) -> 220 | timer:sleep(500), 221 | down_attempt(Attempt,20), 222 | case zraft_consensus:stat(Peer) of 223 | #peer_start{state_name = leader, leader = L, term = T} -> 224 | {L, T}; 225 | #peer_start{state_name = follower, leader = L, term = T} when L /= undefined -> 226 | {L, T}; 227 | #peer_start{state_name = S, leader = L} -> 228 | ?debugFmt("Current state ~s,leader is ~p. Make leader attempt - ~p", [S, L, Attempt]), 229 | wait_leader(Peer, Attempt + 1) 230 | end. 231 | 232 | wait_new_config(Index, PeerID, Attempt) -> 233 | down_attempt(Attempt,30), 234 | case zraft_consensus:get_conf(PeerID, ?TIMEOUT) of 235 | {ok, {Index, _}} -> 236 | ok; 237 | _ -> 238 | ?debugFmt("Wait config attempt - ~p", [Attempt]), 239 | wait_new_config(Index, PeerID, Attempt + 1) 240 | end. 241 | 242 | wait_success_read(Key, PeerID, Attempt) -> 243 | down_attempt(Attempt,20), 244 | case zraft_consensus:query(PeerID, Key, ?TIMEOUT) of 245 | {ok, _} -> 246 | ok; 247 | _ -> 248 | ?debugFmt("Wait read attempt - ~p", [Attempt]), 249 | wait_success_read(Key, PeerID, Attempt + 1) 250 | end. 251 | 252 | wait_follower_sync(CommitIndex, LastIndex, Term, PeerID, Peer, Attempt) -> 253 | down_attempt(Attempt,30), 254 | case zraft_consensus:stat(Peer) of 255 | #peer_start{term = Term, state_name = follower, log_state = #log_descr{last_index = LastIndex, commit_index = CommitIndex}} -> 256 | ?debugFmt("Wait start ~p current state[term:~p,last-index:~p,commit:~p,state:~p] attempt - ~p", 257 | [PeerID, Term, LastIndex, CommitIndex, follower, finished]), 258 | ok; 259 | #peer_start{term = T1, state_name = StateName, log_state = #log_descr{last_index = L1, commit_index = C1}} -> 260 | ?debugFmt("Wait start ~p current state[term:~p,last-index:~p,commit:~p,state:~p] attempt - ~p", 261 | [PeerID, T1, L1, C1, StateName, Attempt]), 262 | timer:sleep(100), 263 | wait_follower_sync(CommitIndex, LastIndex, Term, PeerID, Peer, Attempt + 1); 264 | _ -> 265 | ?debugFmt("Wait start ~p attempt - ~p", [PeerID, Attempt]), 266 | wait_follower_sync(CommitIndex, LastIndex, Term, PeerID, Peer, Attempt + 1) 267 | end. 268 | 269 | wait_snapshot_done(CommitIndex, Peer, Attempt) -> 270 | down_attempt(Attempt,30), 271 | case zraft_consensus:stat(Peer) of 272 | #peer_start{snapshot_info = #snapshot_info{index = CommitIndex}} -> 273 | ok; 274 | _ -> 275 | ?debugFmt("Wait snapshot attempt - ~p", [Attempt]), 276 | wait_snapshot_done(CommitIndex, Peer, Attempt + 1) 277 | end. 278 | 279 | 280 | -endif. 281 | -------------------------------------------------------------------------------- /test/session_zraft_client.erl: -------------------------------------------------------------------------------- 1 | %% ------------------------------------------------------------------- 2 | %% @author Gunin Alexander 3 | %% Copyright (c) 2015 Gunin Alexander. All Rights Reserved. 4 | %% 5 | %% This file is provided to you under the Apache License, 6 | %% Version 2.0 (the "License"); you may not use this file 7 | %% except in compliance with the License. You may obtain 8 | %% a copy of the License at 9 | %% 10 | %% http://www.apache.org/licenses/LICENSE-2.0 11 | %% 12 | %% Unless required by applicable law or agreed to in writing, 13 | %% software distributed under the License is distributed on an 14 | %% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | %% KIND, either express or implied. See the License for the 16 | %% specific language governing permissions and limitations 17 | %% under the License. 18 | %% 19 | %% ------------------------------------------------------------------- 20 | -module(session_zraft_client). 21 | -author("dreyk"). 22 | 23 | -ifdef(TEST). 24 | -include_lib("eunit/include/eunit.hrl"). 25 | -include("zraft.hrl"). 26 | 27 | 28 | -define(TIMEOUT, 10000). 29 | 30 | force_timeout(P) -> 31 | gen_fsm:sync_send_all_state_event(P, force_timeout). 32 | 33 | setup_node() -> 34 | zraft_util:set_test_dir("session-test-data"), 35 | application:set_env(zraft_lib, max_log_count, 10), 36 | net_kernel:start(['zraft_test@localhost', shortnames]), 37 | ok. 38 | stop_node(_) -> 39 | net_kernel:stop(), 40 | application:unset_env(zraft_lib, max_log_count), 41 | zraft_util:clear_test_dir("session-test-data1"), 42 | ok. 43 | 44 | session_test_() -> 45 | { 46 | setup, 47 | fun setup_node/0, 48 | fun stop_node/1, 49 | fun(_X) -> 50 | [ 51 | sessions(), 52 | session_first() 53 | ] 54 | end 55 | }. 56 | sessions() -> 57 | {timeout,30,fun() -> 58 | PeerID1 = {test1, node()}, 59 | {ok, Peer1} = zraft_consensus:start_link(PeerID1, zraft_dict_backend), 60 | PeerID2 = {test2, node()}, 61 | {ok, Peer2} = zraft_consensus:start_link(PeerID2, zraft_dict_backend), 62 | PeerID3 = {test3, node()}, 63 | {ok, Peer3} = zraft_consensus:start_link(PeerID3, zraft_dict_backend), 64 | ok = zraft_consensus:initial_bootstrap(Peer1), 65 | true = force_timeout(Peer1), 66 | ok = wait_leadership(Peer1,2,1), 67 | C = zraft_consensus:set_new_configuration(PeerID1, 1, lists:usort([PeerID1, PeerID2, PeerID3]), ?TIMEOUT), 68 | ?assertMatch(ok, C), 69 | ok = wait_new_config(4, PeerID1, 1), 70 | {ok,S1} = zraft_session:start_link(PeerID1,10000), 71 | {ok,S2} = zraft_session:start_link(PeerID2,10000), 72 | W1 = zraft_session:write(S1,{1,v1},1000), 73 | ?assertEqual(ok,W1), 74 | R1 = zraft_session:query(S2,1,1000), 75 | ?assertEqual({ok,v1},R1), 76 | R2 = zraft_session:query(S2,2,waite1,1000), 77 | ?assertEqual(not_found,R2), 78 | W2 = zraft_session:write(S1,{2,v2},true,1000), 79 | ?assertEqual(ok,W2), 80 | receive 81 | T1-> 82 | ?assertEqual(#swatch_trigger{ref = waite1,reason = change},T1), 83 | ?debugMsg("First trigger ok") 84 | end, 85 | R3 = zraft_session:query(S2,2,waite2,1000), 86 | ?assertEqual({ok,v2},R3), 87 | zraft_session:stop(S1), 88 | receive 89 | T2-> 90 | ?assertEqual(#swatch_trigger{ref = waite2,reason = change},T2), 91 | ?debugMsg("Expire trigger ok") 92 | end, 93 | R4 = zraft_session:query(S2,2,waite3,1000), 94 | ?assertEqual(not_found,R4), 95 | Me = self(), 96 | spawn_link(fun()-> 97 | Me ! {w5,(catch zraft_session:write(S2,{3,3},2000))} end), 98 | spawn_link(fun()-> 99 | Me ! {r5,(catch zraft_session:query(S2,1,2000))} end), 100 | ok = zraft_consensus:stop(Peer1), 101 | receive 102 | T3-> 103 | ?assertEqual(#swatch_trigger{ref = waite3,reason=change_leader},T3), 104 | ?debugMsg("Leader change ok") 105 | end, 106 | receive 107 | {w5,W5}-> 108 | ?assertEqual(ok,W5) 109 | end, 110 | receive 111 | {r5,R5}-> 112 | ?assertEqual({ok,v1},R5) 113 | end, 114 | zraft_session:stop(S2), 115 | ok = zraft_consensus:stop(Peer2), 116 | ok = zraft_consensus:stop(Peer3) 117 | end}. 118 | 119 | session_first() -> 120 | {timeout,30,fun() -> 121 | PeerID1 = {test1, node()}, 122 | PeerID2 = {test2, node()}, 123 | PeerID3 = {test3, node()}, 124 | {ok, Peer1} = zraft_consensus:start_link(PeerID1, zraft_dict_backend), 125 | wait_start(Peer1,1), 126 | {ok,S1} = zraft_session:start_link([PeerID1,PeerID2,PeerID3],10000), 127 | timer:sleep(1000), 128 | {ok, Peer2} = zraft_consensus:start_link(PeerID2, zraft_dict_backend), 129 | {ok, Peer3} = zraft_consensus:start_link(PeerID3, zraft_dict_backend), 130 | {L,_} = wait_leader(Peer1,1), 131 | ?debugFmt("Leader is ~p",[L]), 132 | R1 = zraft_session:query(S1,1,10000), 133 | ?assertEqual({ok,v1},R1), 134 | zraft_session:stop(S1), 135 | ok = zraft_consensus:stop(Peer1), 136 | ok = zraft_consensus:stop(Peer2), 137 | ok = zraft_consensus:stop(Peer3) 138 | end}. 139 | 140 | down_attempt(Attempt,Max) when Attempt 141 | ok; 142 | down_attempt(_Attempt,_Max)-> 143 | exit({error,to_many_attempts}). 144 | wait_leadership(Peer,Commit, Attempt) -> 145 | down_attempt(Attempt,20), 146 | case zraft_consensus:stat(Peer) of 147 | #peer_start{state_name = leader,log_state = #log_descr{commit_index = Commit}} -> 148 | ok; 149 | #peer_start{} -> 150 | ?debugFmt("Wait leadership attempt - ~p",[Attempt]), 151 | wait_leadership(Peer,Commit, Attempt + 1) 152 | end. 153 | wait_start(Peer, Attempt) -> 154 | timer:sleep(500), 155 | down_attempt(Attempt,20), 156 | case zraft_consensus:stat(Peer) of 157 | #peer_start{state_name = load} -> 158 | wait_start(Peer, Attempt + 1); 159 | _-> 160 | ok 161 | end. 162 | wait_leader(Peer, Attempt) -> 163 | timer:sleep(500), 164 | down_attempt(Attempt,20), 165 | case zraft_consensus:stat(Peer) of 166 | #peer_start{state_name = leader, leader = L, term = T} -> 167 | {L, T}; 168 | #peer_start{state_name = follower, leader = L, term = T} when L /= undefined -> 169 | {L, T}; 170 | #peer_start{state_name = S, leader = L} -> 171 | ?debugFmt("Current state ~s,leader is ~p. Make leader attempt - ~p", [S, L, Attempt]), 172 | wait_leader(Peer, Attempt + 1) 173 | end. 174 | 175 | wait_new_config(Index, PeerID, Attempt) -> 176 | down_attempt(Attempt,30), 177 | case zraft_consensus:get_conf(PeerID, ?TIMEOUT) of 178 | {ok, {Index, _}} -> 179 | ok; 180 | _ -> 181 | ?debugFmt("Wait config attempt - ~p", [Attempt]), 182 | wait_new_config(Index, PeerID, Attempt + 1) 183 | end. 184 | 185 | -endif. 186 | 187 | -------------------------------------------------------------------------------- /tools.mk: -------------------------------------------------------------------------------- 1 | REBAR ?= ./rebar 2 | 3 | test: compile 4 | ${REBAR} eunit skip_deps=true 5 | 6 | docs: 7 | ${REBAR} doc skip_deps=true 8 | 9 | xref: compile 10 | ${REBAR} xref skip_deps=true 11 | 12 | PLT ?= $(PWD)/.combo_dialyzer_plt 13 | LOCAL_PLT = $(PWD)/.local_dialyzer_plt 14 | DIALYZER_FLAGS ?= -Wunmatched_returns -Werror_handling -Wrace_conditions -Wunderspecs 15 | 16 | ${PLT}: compile 17 | @if [ -f $(PLT) ]; then \ 18 | dialyzer --check_plt --plt $(PLT) --apps $(DIALYZER_APPS) && \ 19 | dialyzer --add_to_plt --plt $(PLT) --output_plt $(PLT) --apps $(DIALYZER_APPS) ; test $$? -ne 1; \ 20 | else \ 21 | dialyzer --build_plt --output_plt $(PLT) --apps $(DIALYZER_APPS); test $$? -ne 1; \ 22 | fi 23 | 24 | ${LOCAL_PLT}: compile 25 | @if [ -d deps ]; then \ 26 | if [ -f $(LOCAL_PLT) ]; then \ 27 | dialyzer --check_plt --plt $(LOCAL_PLT) deps/*/ebin && \ 28 | dialyzer --add_to_plt --plt $(LOCAL_PLT) --output_plt $(LOCAL_PLT) deps/*/ebin ; test $$? -ne 1; \ 29 | else \ 30 | dialyzer --build_plt --output_plt $(LOCAL_PLT) deps/*/ebin ; test $$? -ne 1; \ 31 | fi \ 32 | fi 33 | 34 | dialyzer: ${PLT} ${LOCAL_PLT} 35 | @echo "==> $(shell basename $(shell pwd)) (dialyzer)" 36 | @if [ -f $(LOCAL_PLT) ]; then \ 37 | PLTS="$(PLT) $(LOCAL_PLT)"; \ 38 | else \ 39 | PLTS=$(PLT); \ 40 | fi; \ 41 | if [ -f dialyzer.ignore-warnings ]; then \ 42 | if [ $$(grep -cvE '[^[:space:]]' dialyzer.ignore-warnings) -ne 0 ]; then \ 43 | echo "ERROR: dialyzer.ignore-warnings contains a blank/empty line, this will match all messages!"; \ 44 | exit 1; \ 45 | fi; \ 46 | dialyzer $(DIALYZER_FLAGS) --plts $${PLTS} -c ebin > dialyzer_warnings ; \ 47 | egrep -v "^[[:space:]]*(done|Checking|Proceeding|Compiling)" dialyzer_warnings | grep -F -f dialyzer.ignore-warnings -v > dialyzer_unhandled_warnings ; \ 48 | cat dialyzer_unhandled_warnings ; \ 49 | [ $$(cat dialyzer_unhandled_warnings | wc -l) -eq 0 ] ; \ 50 | else \ 51 | dialyzer $(DIALYZER_FLAGS) --plts $${PLTS} -c ebin; \ 52 | fi 53 | 54 | cleanplt: 55 | @echo 56 | @echo "Are you sure? It takes several minutes to re-build." 57 | @echo Deleting $(PLT) and $(LOCAL_PLT) in 5 seconds. 58 | @echo 59 | sleep 5 60 | rm $(PLT) 61 | rm $(LOCAL_PLT) 62 | 63 | --------------------------------------------------------------------------------