├── .gitignore ├── .travis.yml ├── LICENSE ├── README.md ├── include └── eavro.hrl ├── priv └── avro_ocf.avsc ├── rebar.config ├── rebar.lock ├── src ├── eavro.app.src ├── eavro.erl ├── eavro_codec.erl ├── eavro_ocf_codec.erl ├── eavro_ocf_zcodec.erl ├── eavro_rpc_fsm.erl ├── eavro_rpc_handler.erl ├── eavro_rpc_proto.erl ├── eavro_rpc_srv.erl └── eavro_zcodec.erl └── test ├── data ├── eavro-rpc-test-servers-1.7.5-SNAPSHOT-jar-with-dependencies.jar ├── flume.avdl ├── flume.avpr ├── issue_11.avsc ├── mail.avpr ├── transformer.avsc ├── transformers-deflated.avro ├── transformers-deflated2.avro ├── transformers.avro ├── transformers.json ├── twitter.avro └── twitter.avsc ├── eavro_codec_tests.erl ├── eavro_ocf_tests.erl ├── eavro_rpc_proto_tests.erl ├── eavro_rpc_test_email_handler.erl ├── eavro_schema_tests.erl └── eavro_zcodec_tests.erl /.gitignore: -------------------------------------------------------------------------------- 1 | ebin 2 | deps 3 | .eunit 4 | /avro_tools/ 5 | eavro.plt 6 | .rebar 7 | _build 8 | rebar3.crashdump 9 | test/transformers* 10 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: erlang 2 | otp_release: 3 | - R16B01 4 | - R16B02 5 | - R16B03-1 6 | - R16B 7 | - 17.1 8 | - 17.0 9 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "{}" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright {yyyy} {name of copyright owner} 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | 203 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | Apache Avro encoder/decoder 2 | =========================== 3 | 4 | [![Build Status](https://secure.travis-ci.org/SIfoxDevTeam/eavro.png)](https://travis-ci.org/SIfoxDevTeam/eavro) 5 | 6 | ## Supported primitives 7 | 8 | * null: no value 9 | * boolean: a binary value 10 | * int: 32-bit signed integer 11 | * long: 64-bit signed integer 12 | * float: single precision (32-bit) IEEE 754 floating-point number 13 | * double: double precision (64-bit) IEEE 754 floating-point number 14 | * bytes: sequence of 8-bit unsigned bytes 15 | * string: unicode character sequence 16 | 17 | ## Supported complex types 18 | * records 19 | * enum 20 | * map 21 | * fixed 22 | * union 23 | * array 24 | 25 | ## Usage 26 | 27 | ### Encoding/decoding 28 | 29 | Let file `schema.avsc` contain: 30 | ```json 31 | { 32 | "type": "record", 33 | "name": "User", 34 | "fields" : [ 35 | {"name": "username", "type": "string"}, 36 | {"name": "age", "type": "int"}, 37 | {"name": "verified", "type": "boolean", "default": "false"} 38 | ] 39 | } 40 | ``` 41 | Then, encode record according to the schema above as: 42 | 43 | ```erlang 44 | 45 | Erlang R16B01 (erts-5.10.2) [source] [64-bit] [smp:2:2] [async-threads:10] [kernel-poll:false] 46 | 47 | Eshell V5.10.2 (abort with ^G) 48 | 1> Schema = eavro:read_schema("schema.avsc"). 49 | {avro_record,<<"User">>, 50 | [{<<"username">>,string}, 51 | {<<"age">>,int}, 52 | {<<"verified">>,boolean}]} 53 | 2> eavro:encode(Schema, [<<"John">>, 23, true]). 54 | <<8,74,111,104,110,46,1>> 55 | ``` 56 | Encode value of union type require explicit type specification when encoding: 57 | 58 | ```erlang 59 | 1> rr(eavro). 60 | [avro_array,avro_enum,avro_fixed,avro_map,avro_record] 61 | 2> eavro:encode([int, string], {int, 1}). 62 | <<0,2>> 63 | 3> eavro:encode([int, string], {string, <<"blah">>}). 64 | <<2,8,98,108,97,104>> 65 | 4> eavro:encode(#avro_array{ items = [int, string] }, [{int, 1}, {string, <<"blah">>}]). 66 | <<4,0,2,2,8,98,108,97,104,0>> 67 | 5> eavro:decode(#avro_array{ items = [int, string] }, <<4,0,2,2,8,98,108,97,104,0>>). 68 | {[[1,<<"blah">>]],<<>>} 69 | 6> RecType = #avro_record{ name = some_struct, fields = [{field1, int}] }. 70 | #avro_record{name = some_struct,fields = [{field1,int}]} 71 | 7> eavro:encode(#avro_array{ items = [int, string, RecType] }, [{int, 1}, {string, <<"blah">>}, {RecType, [37337] }]). 72 | <<6,0,2,2,8,98,108,97,104,4,178,199,4,0>> 73 | 8> eavro:decode(#avro_array{ items = [int, string, RecType] }, <<6,0,2,2,8,98,108,97,104,4,178,199,4,0>>). 74 | {[[1,<<"blah">>,[37337]]],<<>>} 75 | ``` 76 | ### Object Container Files 77 | 78 | Read data from Avro binary file in an OCF format: 79 | ```erlang 80 | 2> rr(eavro). 81 | [avro_enum,avro_fixed,avro_map,avro_record] 82 | 3> rp(eavro:read_ocf("test/data/transformers.avro")). 83 | {#avro_record{name = transformer_schema, 84 | fields = [{<<"fname">>,string}, 85 | {<<"lname">>,string}, 86 | {<<"age">>,int}, 87 | {<<"is_autobot">>,boolean}, 88 | {<<"location">>, 89 | #avro_enum{name = 'Location', 90 | symbols = ['Earth','Moon','March','Venus','Jupiter', 91 | 'Mercury','Titan','Io','Europe','Ganimed','Callisto', 92 | 'Pluton']}}, 93 | {<<"equipment">>, 94 | #avro_map{values = #avro_record{name = 'Equipment', 95 | fields = [{<<"name">>,string},{<<"weight">>,int}]}}}]}, 96 | [[[<<"Optimus">>,<<"Prime">>,1000,true,'Earth', 97 | [[{<<"weapon">>,[<<"SuperBlaster">>,33]}]]], 98 | [<<"Nexus">>,<<"Prime">>,1001,true,'Moon', 99 | [[{<<"weapon">>,[<<"PlasmaCanon">>,100]}]]], 100 | [<<"Zeta">>,<<"Prime">>,2000,true,'March', 101 | [[{<<"weapon">>,[<<"LazerCanon">>,60]}]]], 102 | [<<"Rodmus">>,<<"Prime">>,1000,true,'Venus', 103 | [[{<<"weapon">>,[<<"RocketLauncher">>,200]}]]], 104 | [<<"Optimus1">>,<<"Prime">>,1000,true,'Jupiter', 105 | [[{<<"weapon">>,[<<"Blaster">>,33]}]]], 106 | [<<"Nexus1">>,<<"Prime">>,1001,true,'Mercury', 107 | [[{<<"weapon">>,[<<"Blaster">>,33]}]]], 108 | [<<"Zeta1">>,<<"Prime">>,2000,true,'Titan', 109 | [[{<<"weapon">>,[<<"Blaster">>,33]}]]], 110 | [<<"Rodmus1">>,<<"Prime">>,1000,true,'Io', 111 | [[{<<"weapon">>,[<<"Blaster">>,33]}]]], 112 | [<<"Optimus2">>,<<"Prime">>,1000,true,'Europe', 113 | [[{<<"weapon">>,[<<"Blaster">>,33]}]]], 114 | [<<"Nexus2">>,<<"Prime">>,1001,true,'Ganimed', 115 | [[{<<"weapon">>,[<<"Blaster">>,33]}]]], 116 | [<<"Zeta3">>,<<"Prime">>,2000,true,'Callisto', 117 | [[{<<"weapon">>,[<<"NuclearGun">>,70]}]]], 118 | [<<"Rodmus3">>,<<"Prime">>,1000,true,'Pluton', 119 | [[{<<"weapon">>,[<<"ElectroHammer">>,180]}]]]]]} 120 | ok 121 | ``` 122 | Please note how data is returned: 123 | * the first element of a binary tuple is a schema extracted from OCF header 124 | * the second element contains a list of blocks, where each block is a list on schema instances - in our case these are records whose data represented as list of values, that is why we see a deep list structure in a result. 125 | 126 | It would be easy to remove such a deep list structure, i.e block lists, but it would lead to use of '++' operator which is not good for performance, hence it was decided to keep block division structure in a result. 127 | 128 | The same reason affected to a 'map' type decoding result. 129 | 130 | 131 | Read data from Avro binary file in an OCF format using eavro_ocf_zcodec: 132 | 133 | ```erlang 134 | 1> rr(eavro). 135 | [avro_array,avro_enum,avro_fixed,avro_map,avro_record] 136 | 2> eavro_ocf_zcodec:read_ocf_with( 137 | 2> "test/data/transformers-deflated.avro", 138 | 2> fun(Schema, ZInstances) -> 139 | 2> ZInstances 140 | 2> end). 141 | [[<<"0000">>,<<"Optimus">>,<<"Prime">>,1000,true,'Earth', 142 | [[{<<"weapon">>,[<<"SuperBlaster">>,33]}]], 143 | [[<<"0001">>,<<"0002">>]], 144 | 234]| 145 | #Fun] 146 | 3> eavro_ocf_zcodec:read_ocf_with( 147 | 3> "test/data/transformers-deflated.avro", 148 | 3> fun(Schema, ZInstances) -> 149 | 3> zlists:count(ZInstances) 150 | 3> end). 151 | 12 152 | 4> eavro_ocf_zcodec:read_ocf_with( 153 | 4> "test/data/transformers-deflated.avro", 154 | 4> fun(Schema, ZInstances) -> 155 | 4> zlists:expand(5, 156 | 4> zlists:map(fun(Inst) -> hd(Inst) end, ZInstances)) 157 | 4> end). 158 | [<<"0000">>,<<"0001">>,<<"0002">>,<<"0003">>,<<"0004">>| 159 | #Fun] 160 | ``` 161 | The function 'eavro_ocf_zcodec:read_ocf_with' gives a way for memory effecient way to read huge Avro OCF files. Currently only 'deflate' compression codec supported (snappy TBD). 162 | 163 | Writing OCFs: 164 | 165 | ```erlang 166 | 1> rr(eavro). 167 | [avro_array,avro_enum,avro_fixed,avro_map,avro_record] 168 | 2> Schema = eavro:read_schema("test/data/twitter.avsc"). 169 | #avro_record{name = twitter_schema, 170 | fields = [{<<"username">>,string}, 171 | {<<"tweet">>,string}, 172 | {<<"timestamp">>,long}]} 173 | 3> eavro:write_ocf("data.avro", Schema, [ [<<"Optimus">>, <<"Prime">>, 134234132], [<<"Nexus">>, <<"Prime">>, 3462547657] ]). 174 | ok 175 | 4> eavro:read_ocf_with("data.avro", fun(_Schema, ZInstances) -> zlists:expand(ZInstances) end ). 176 | [[<<"Optimus">>,<<"Prime">>,134234132], 177 | [<<"Nexus">>,<<"Prime">>,3462547657]] 178 | ``` 179 | ### Avro Protocol 180 | #### Client 181 | Making an Avro RPC calls: 182 | 183 | ```erlang 184 | 1> {ok, P} = eavro_rpc_fsm:start_link("localhost", 41414, "flume.avpr"). 185 | {ok,<0.35.0>} 186 | 2> eavro_rpc_fsm:call(P, append, _Args = [ _Rec = [ [], <<"HELLO">> ] ]). 187 | {ok,'OK'} 188 | ``` 189 | 190 | To make an Avro RPC calls you need an Avro protocol file in a JSON format 191 | (usually *.avpr file), if you have an only Avro IDL file (usually *.avdl file), 192 | for now you are addressed to the avro tool to make `.avdl -> .avpr` conversion: 193 | 194 | ```bash 195 | $ mkdir avro_tools 196 | $ (cd avro_tools && wget http://apache-mirror.rbc.ru/pub/apache/avro/avro-1.7.7/java/avro-tools-1.7.7.jar) 197 | $ java -jar avro_tools/avro-tools-1.7.7.jar idl test/data/flume.avdl | python -mjson.tool > flume.avpr 198 | ``` 199 | #### Server 200 | To implement Avro RPC server on Erlang language, consider to implement a behaviour `eavro_rpc_handler`. Then just start it as follows: 201 | ```erlang 202 | eavro_rpc_srv:start(your_rpc_handler,_InitArgs = [], _Port = 2525, _PoolSize = 1). 203 | ``` 204 | The server framework is implemented using Ranch application. 205 | ##### RPC handler example 206 | ```erlang 207 | -module(my_email_handler). 208 | 209 | -behaviour(eavro_rpc_handler). 210 | -include("eavro.hrl"). 211 | 212 | -export([get_protocol/0, 213 | init/1, 214 | handle_call/2]). 215 | 216 | -record(state, { }). 217 | 218 | get_protocol() -> eavro_rpc_proto:parse_protocol_file("mail.avpr"). 219 | 220 | init([]) -> 221 | {ok, #state{} }. 222 | 223 | handle_call( {#avro_message{ name = <<"send">> }, 224 | [ Record = [_From, _To, Body] ] = _Args}, 225 | #state{} = _State ) -> 226 | io:format("Body '~s` sent!", [Record]), 227 | {ok, "Ok"}. 228 | ``` 229 | 230 | ### Working with Kafka 231 | 232 | #### Encode 233 | 234 | To include schema id to encoded message before sending it to kafka, you should prepend encoded message with 'magic byte' and schema id as 4 bytes: 235 | ``` 236 | MessageToKafka = <<0, SchemaId:32, EncodedMessage/binary>>. 237 | ``` 238 | 239 | #### Decode 240 | 241 | To decode message which contains schema id, you should skip first 5 bytes before decoding it: 242 | ``` 243 | <> = MessageFromKafka. 244 | eavro:decode(Schema, EncodedMessage). 245 | ``` 246 | 247 | 248 | ## TODO 249 | 250 | * Add specs, tests and documentation 251 | * Support codecs (snappy) when reading and writing data from OCF 252 | 253 | ## License 254 | 255 | All parts of this software are distributed under the Apache License, Version 2.0 terms. 256 | -------------------------------------------------------------------------------- /include/eavro.hrl: -------------------------------------------------------------------------------- 1 | %% 2 | %% Avro Schema Types 3 | %% 4 | -record(avro_record, { 5 | name :: atom(), 6 | fields :: [{atom(), avro_type()}] 7 | }). 8 | 9 | -record(avro_enum, { name :: atom(), symbols :: [ atom() ] }). 10 | 11 | -record(avro_fixed, { name :: atom(), size :: integer() }). 12 | 13 | -record(avro_map, { values :: avro_type() }). 14 | 15 | -record(avro_array, { items :: avro_type() }). 16 | 17 | -type avro_type() :: #avro_record{} | 18 | #avro_enum{} | 19 | int | 20 | long | 21 | double | 22 | bytes | 23 | boolean | 24 | string | 25 | null . 26 | -type decode_hook() :: fun( (avro_type(), any() ) -> any() ). 27 | 28 | %% 29 | %% Avro Protocol Records 30 | %% 31 | 32 | -record(avro_message, 33 | { name :: binary(), 34 | args :: [avro_type()], 35 | return :: avro_type()}). 36 | 37 | -record(avro_proto, 38 | { ns :: binary(), 39 | name :: binary(), 40 | types :: avro_type(), 41 | messages :: #avro_message{}, 42 | json :: binary()}). 43 | -------------------------------------------------------------------------------- /priv/avro_ocf.avsc: -------------------------------------------------------------------------------- 1 | {"type": "record", "name": "org.apache.avro.file.Header", 2 | "fields" : [ 3 | {"name": "magic", "type": {"type": "fixed", "name": "Magic", "size": 4}}, 4 | {"name": "meta", "type": {"type": "map", "values": "bytes"}}, 5 | {"name": "sync", "type": {"type": "fixed", "name": "Sync", "size": 16}} 6 | ] 7 | } -------------------------------------------------------------------------------- /rebar.config: -------------------------------------------------------------------------------- 1 | {deps, [ 2 | {jsx, "2.8.2"}, 3 | {zlists, "0.0.4"}, 4 | {ranch, "1.3.2"} 5 | ]}. 6 | 7 | {profiles, [ 8 | {test, [ 9 | {cover_enabled, true}, 10 | {cover_opts, [verbose]}, 11 | {eunit_opts, [verbose, {{eunit_surefire, [{dir, "."}]}}]} 12 | ]} 13 | ]}. 14 | 15 | {xref_checks,[ 16 | undefined_function_calls, 17 | undefined_functions, 18 | locals_not_used 19 | ]}. 20 | 21 | {provider_hooks, [{post, [{compile, xref}]}]}. 22 | -------------------------------------------------------------------------------- /rebar.lock: -------------------------------------------------------------------------------- 1 | {"1.1.0", 2 | [{<<"jsx">>,{pkg,<<"jsx">>,<<"2.8.2">>},0}, 3 | {<<"ranch">>,{pkg,<<"ranch">>,<<"1.3.2">>},0}, 4 | {<<"zlists">>,{pkg,<<"zlists">>,<<"0.0.4">>},0}]}. 5 | [ 6 | {pkg_hash,[ 7 | {<<"jsx">>, <<"7ACC7D785B5ABE8A6E9ADBDE926A24E481F29956DD8B4DF49E3E4E7BCC92A018">>}, 8 | {<<"ranch">>, <<"E4965A144DC9FBE70E5C077C65E73C57165416A901BD02EA899CFD95AA890986">>}, 9 | {<<"zlists">>, <<"F641FCD2D05DCC34E6F98A0A88290EC54FD269246D2BED41D9504361F243A442">>}]} 10 | ]. 11 | -------------------------------------------------------------------------------- /src/eavro.app.src: -------------------------------------------------------------------------------- 1 | {application, eavro, [ 2 | {description, "Apache Avro encoder/decoder and RPC protocol"}, 3 | {vsn, "0.0.5"}, 4 | {applications, [kernel, stdlib, jsx, zlists, ranch]}, 5 | {maintainers, ["Artem Teslenko", "Vyacheslav Vorobyov"]}, 6 | {licenses, ["Apache"]}, 7 | {links,[ 8 | {"Github", "https://github.com/SIfoxDevTeam/eavro"} 9 | ]} 10 | ]}. 11 | -------------------------------------------------------------------------------- /src/eavro.erl: -------------------------------------------------------------------------------- 1 | -module(eavro). 2 | 3 | %% API exports 4 | -export([read_ocf/1, 5 | read_ocf/2, 6 | read_ocf_with/2, 7 | read_ocf_with/3, 8 | read_schema/1, 9 | write_ocf/3, 10 | write_ocf/4, 11 | parse_schema/1, 12 | parse_type/2, 13 | parse_types/2, 14 | encode_schema/1, 15 | encode/2, 16 | decode/2, 17 | decode/3]). 18 | 19 | -export([ type_to_jsx/1 ]). 20 | 21 | -include("eavro.hrl"). 22 | 23 | %%======================= 24 | %% API functions 25 | %%====================== 26 | 27 | -type type_context() :: dict:dict(atom(), avro_type()). 28 | 29 | %% 30 | %% 31 | %% 32 | -spec read_ocf(Filename :: file:filename()) -> 33 | {Schema :: avro_type(), 34 | Blocks :: [ [ any() ] ]}. 35 | read_ocf(File) -> 36 | read_ocf(File, undefined). 37 | 38 | %% 39 | %% 40 | %% 41 | -spec read_ocf(File :: file:filename(), 42 | Hook :: undefined | decode_hook()) -> 43 | {Schema :: avro_type(), 44 | Blocks :: [ [ any() ] ]}. 45 | read_ocf(File, Hook) -> 46 | {ok, Bin} = file:read_file(File), 47 | eavro_ocf_codec:decode(Bin,Hook). 48 | 49 | %% 50 | %% 51 | %% 52 | -spec read_schema(File :: file:filename() ) -> avro_type(). 53 | read_schema(File) -> 54 | case file:read_file(File) of 55 | {ok, Data} -> 56 | Schema = jsx:decode(Data), 57 | parse_schema(Schema); 58 | Error -> Error 59 | end. 60 | 61 | %% 62 | %% Read OCF using callback function which accept two 63 | %% arguments - schema and Z-List of instances. Function 64 | %% returns a result of callback. 65 | %% 66 | -spec read_ocf_with( 67 | File :: file:filename(), 68 | Visitor :: eavro_ocf_zcodec:ocf_visitor(Result) ) -> Result. 69 | read_ocf_with(File, Visitor) -> 70 | read_ocf_with(File, Visitor, undefined). 71 | 72 | %% 73 | %% Read OCF using callback function which accept two 74 | %% arguments - schema and Z-List of instances, and 75 | %% decode hook callback to transform instances just 76 | %% when they decoded in a default way. Function 77 | %% returns a result of callback. 78 | %% 79 | -spec read_ocf_with( 80 | File :: file:filename(), 81 | Visitor :: eavro_ocf_zcodec:ocf_visitor(Result), 82 | Hook :: decode_hook() ) -> Result. 83 | read_ocf_with(File, Visitor, Hook) -> 84 | eavro_ocf_zcodec:read_ocf_with(File, Visitor, Hook). 85 | 86 | %% 87 | %% Write OCF with given schema and instances. 88 | %% 89 | -spec write_ocf( 90 | Filename :: file:filename(), 91 | Schema :: avro_type(), 92 | ZInstances :: zlists:zlist()) -> ok. 93 | write_ocf(Filename, Schema, ZInstances) -> 94 | write_ocf(Filename, Schema, ZInstances, []). 95 | 96 | %% 97 | %% Write OCF with given schema, instances, and options which 98 | %% controll binary format details such as compression codec 99 | %% used (currently only 'deflate' and 'plain' supported), and 100 | %% size of block. If size of block is specified it does not 101 | %% mean that block will have strictly that size, this value 102 | %% just mean a threshold of written bytes into block which is 103 | %% when exceeded then a new block started. 104 | %% 105 | -spec write_ocf( 106 | Filename :: file:filename(), 107 | Schema :: avro_type(), 108 | ZInstances :: zlists:zlist(), 109 | Opts :: [{codec, deflate | plain} | 110 | {block_size, non_neg_integer()}]) -> ok. 111 | write_ocf(Filename, Schema, ZInstances, Opts) -> 112 | eavro_ocf_zcodec:write_ocf_file(Filename, Schema, ZInstances, Opts). 113 | 114 | %% 115 | %% Parse JSONed schema. 116 | %% 117 | -spec parse_schema( binary() ) -> avro_type(). 118 | parse_schema(SchemaJson) when is_binary(SchemaJson) -> 119 | parse_schema(jsx:decode(SchemaJson)); 120 | parse_schema(SchemaJsx) -> 121 | {Type, _Ctx} = parse_type(SchemaJsx, dict:new()), 122 | Type. 123 | 124 | %% 125 | %% Encode schema as JSON. 126 | %% 127 | -spec encode_schema(Schema :: avro_type()) -> binary(). 128 | encode_schema(Schema) -> 129 | jsx:encode(type_to_jsx(Schema)). 130 | 131 | %% 132 | %% 133 | %% 134 | decode(Schema, Buff) -> 135 | decode(Schema, Buff, undefined). 136 | 137 | %% 138 | %% 139 | %% 140 | -spec decode( Schema :: avro_type(), 141 | Buff :: binary() | iolist(), 142 | Hook :: undefined | decode_hook() ) -> 143 | { Value :: term(), Buff :: binary()}. 144 | decode(Schema, Buff, Hook) -> 145 | eavro_codec:decode(Schema, Buff, Hook). 146 | %% 147 | %% 148 | %% 149 | encode(Schema, Data) -> 150 | iolist_to_binary(eavro_codec:encode(Schema, Data)). 151 | 152 | %% 153 | %% Private functions section 154 | %% 155 | 156 | type_to_jsx(#avro_record{ name = Name, fields = Fields}) -> 157 | [{type, <<"record">>}, 158 | {name, to_bin(Name)}, 159 | {fields, [ [ {name, to_bin(FName)}, 160 | {type, type_to_jsx(FType)} ] || {FName, FType} <- Fields]} ]; 161 | type_to_jsx(#avro_enum{ name = Name, symbols = Symbols}) -> 162 | [{type, <<"enum">>}, 163 | {name, to_bin(Name)}, 164 | {symbols, [ to_bin(Symbol) || Symbol <- Symbols]} ]; 165 | type_to_jsx(#avro_fixed{ name = Name, size = Size }) -> 166 | [{type, <<"fixed">>},{name, to_bin(Name)}, {size, Size}]; 167 | type_to_jsx(#avro_map{ values = VType}) -> 168 | [{type, <<"map">>}, 169 | {values, type_to_jsx(VType)}]; 170 | type_to_jsx(#avro_array{ items = IType}) -> 171 | [{type, <<"array">>}, 172 | {items, type_to_jsx(IType)}]; 173 | type_to_jsx(Union) when is_atom(hd(Union)) -> 174 | [ type_to_jsx(T) || T <- Union]; 175 | type_to_jsx(A) when is_atom(A) -> 176 | type_to_jsx(atom_to_binary(A,latin1)); 177 | type_to_jsx(B) when is_binary(B) -> 178 | case B of 179 | <<"null">> -> ok; 180 | <<"boolean">> -> ok; 181 | <<"int">> -> ok; 182 | <<"long">> -> ok; 183 | <<"double">> -> ok; 184 | <<"string">> -> ok; 185 | <<"bytes">> -> ok; 186 | <<"float">> -> ok; 187 | BadType -> exit({bad_simple_type, BadType}) 188 | end, 189 | B. 190 | 191 | parse_types(Types, Context) -> 192 | {TypesRev, Context1} = 193 | lists:foldl( 194 | fun(Type, {Ts, Ctx}) -> 195 | {T, Ctx1} = parse_type(Type, Ctx), 196 | {[T|Ts], Ctx1} 197 | end, {[], Context}, Types), 198 | {lists:reverse(TypesRev), Context1}. 199 | 200 | -spec parse_type(Jsx :: jsx:json_term(), 201 | Context :: type_context()) -> 202 | {avro_type(), type_context() }. 203 | parse_type(Simple, Context) when is_binary(Simple) -> 204 | Type = 205 | case Simple of 206 | <<"null">> -> null; 207 | <<"boolean">> -> boolean; 208 | <<"int">> -> int; 209 | <<"long">> -> long; 210 | <<"double">> -> double; 211 | <<"string">> -> string; 212 | <<"bytes">> -> bytes; 213 | <<"float">> -> float; 214 | BadType -> 215 | case dict:find(binary_to_atom(BadType, latin1), Context) of 216 | {ok, T} -> T; 217 | error -> 218 | exit({bad_simple_type_or_alias, BadType, Context}) 219 | end 220 | end, 221 | {Type, Context}; 222 | parse_type([{_,_}|_] = Complex, Context) -> 223 | Parser = 224 | case proplists:get_value(<<"type">>,Complex) of 225 | <<"record">> -> 226 | fun parse_record/2; 227 | <<"enum">> -> 228 | fun parse_enum/2; 229 | <<"map">> -> 230 | fun parse_map/2; 231 | <<"array">> -> 232 | fun parse_array/2; 233 | <<"fixed">> -> 234 | fun parse_fixed/2; 235 | BadType -> exit({bad_complex_type, BadType}) 236 | end, 237 | Parser(Complex, Context); 238 | parse_type([B|_] = Union, Context) when is_binary(B) -> 239 | parse_union(Union, Context); 240 | parse_type([ [{B,_}|_]= _ComplexType |_] = Union, Context) when is_binary(B) -> 241 | parse_union(Union, Context); 242 | parse_type(_Bad,_) -> exit({badarg, _Bad}). 243 | 244 | 245 | get_attributes(Complex, Attrs) -> 246 | [proplists:get_value(Attr, Complex) || Attr <- Attrs]. 247 | 248 | binary_to_latin1_atom(Bin) -> 249 | binary_to_atom(Bin,latin1). 250 | 251 | parse_record(Record, Context) -> 252 | [Name, Fields] = get_attributes(Record, [<<"name">>, <<"fields">>]), 253 | {FieldsParsedRev, Context1} = 254 | lists:foldl( 255 | fun(Field, {Fs, Ctx})-> 256 | {FieldParsed, Ctx1} = parse_field(Field, Ctx), 257 | {[FieldParsed | Fs], Ctx1} 258 | end, {[],Context}, Fields), 259 | FieldsParsed = lists:reverse(FieldsParsedRev), 260 | AName = binary_to_latin1_atom(Name), %% From Avro spec.: [A-Za-z0-9_] 261 | RecTypeParsed = 262 | #avro_record{ name = AName, 263 | fields = FieldsParsed}, 264 | {RecTypeParsed, 265 | dict:store(AName, RecTypeParsed, Context1)}. 266 | 267 | parse_field(RecField, Context) -> 268 | [Name, Type] = get_attributes(RecField, [<<"name">>, <<"type">>] ), 269 | {TypeParsed, Context1} = parse_type(Type, Context), 270 | {{Name, TypeParsed}, Context1}. 271 | 272 | parse_enum(Enum, Context) -> 273 | [Name, Symbols] = get_attributes(Enum, [<<"name">>, <<"symbols">>]), 274 | AName = binary_to_latin1_atom(Name), %% From Avro spec.: [A-Za-z0-9_] 275 | TypeParsed = #avro_enum{ name = AName, 276 | symbols = lists:map(fun binary_to_latin1_atom/1, Symbols) }, 277 | {TypeParsed, 278 | dict:store(AName,TypeParsed,Context)}. 279 | 280 | parse_union(Union,Context) -> 281 | {Types, Context1} = parse_types(Union, Context), 282 | check_uniqueness(Types), 283 | {Types, Context1}. 284 | 285 | check_uniqueness(Types) -> 286 | L0 = [case T of 287 | #avro_record{ name = N } -> N; 288 | #avro_enum{ name = N } -> N; 289 | #avro_fixed{ name = N } -> N; 290 | _ -> T 291 | end || T <- Types], 292 | L = lists:zip(L0, lists:seq(0, length(L0) - 1)), 293 | [ if N1 == N2 -> exit({bad_union, Types, {name_clash, {Idx1, T1}, {Idx2,T2} } }); 294 | true -> ok 295 | end|| {{T1, N1}, Idx1} <- L, {{T2,N2}, Idx2} <- L, Idx1 < Idx2], 296 | ok. 297 | 298 | 299 | 300 | parse_map(Map, Context) -> 301 | [ValuesType] = get_attributes(Map, [<<"values">>]), 302 | {ValuesTypeParsed, Context1} = parse_type(ValuesType, Context), 303 | {#avro_map{ values = ValuesTypeParsed }, Context1}. 304 | 305 | parse_fixed(Fixed,Context) -> 306 | [Name, Size] = get_attributes(Fixed, [<<"name">>, <<"size">>]), 307 | AName = binary_to_latin1_atom(Name), %% From Avro spec.: [A-Za-z0-9_] 308 | Type = #avro_fixed{ name = AName, 309 | size = Size }, 310 | {Type, dict:store(AName, Type, Context)}. 311 | 312 | parse_array(Array, Context) -> 313 | [Type] = get_attributes(Array, [<<"items">>]), 314 | {ParsedType, Context1} = parse_type(Type, Context), 315 | {#avro_array{ items = ParsedType }, Context1}. 316 | 317 | 318 | to_bin(B) when is_binary(B) -> 319 | B; 320 | to_bin(A) when is_atom(A) -> 321 | atom_to_binary(A,latin1); 322 | to_bin(L) when is_list(L) -> 323 | list_to_binary(L). 324 | -------------------------------------------------------------------------------- /src/eavro_codec.erl: -------------------------------------------------------------------------------- 1 | -module(eavro_codec). 2 | 3 | %% API 4 | -export([ encode/2, 5 | decode/2, 6 | decode/3 ]). 7 | 8 | -export([ varint_encode/1, 9 | varint_decode/2 ]). 10 | 11 | -include("eavro.hrl"). 12 | 13 | %% primitives 14 | encode(int, Int) -> 15 | Z = zigzag_encode(int, Int), 16 | varint_encode(<>); 17 | encode(long, Long) -> 18 | Z = zigzag_encode(long, Long), 19 | varint_encode(<>); 20 | encode(float, Float) when is_float(Float) -> 21 | <>; 22 | encode(double, Double) when is_float(Double) -> 23 | <>; 24 | encode(string, Data) when is_binary(Data) -> 25 | [encode(long, byte_size(Data)), Data]; 26 | encode(bytes, Data) when is_binary(Data) -> 27 | [encode(long, byte_size(Data)), Data]; 28 | encode(boolean, true) -> <<1>>; 29 | encode(boolean, false) -> <<0>>; 30 | encode(null, _Any) -> <<>>; 31 | 32 | %% complex data types 33 | encode(#avro_record{fields = Fields}, Data) -> 34 | [encode(Type, Value) || {{_Name, Type}, Value} <- lists:zip(Fields, Data)]; 35 | encode(#avro_enum{symbols = Symbols}, Data) -> 36 | ZeroBasedIndex = index_of(Data, Symbols) - 1, 37 | encode(int,ZeroBasedIndex); 38 | encode(#avro_fixed{ size = Size }, Data) -> 39 | byte_size(Data) == Size orelse exit(bad_size), 40 | Data; 41 | encode(#avro_map{ values = ValuesType }, Data) when is_list(Data) -> 42 | encode_blocks( 43 | ValuesType, Data, 44 | fun(T, {K,V}) -> 45 | [ encode(string, K), 46 | encode(T, V) ] 47 | end ); 48 | encode(#avro_array{ items = Type }, Data) when is_list(Data) -> 49 | encode_blocks(Type, Data, fun encode/2); 50 | encode(Union, null) when is_list(Union) -> 51 | encode(Union, {null, null}); 52 | encode(Union, {Type, Data}) when is_list(Union) -> 53 | try 54 | I = index_of(Type, Union) - 1, 55 | [encode(long, I), encode(Type, Data)] 56 | catch 57 | _:not_found -> exit({union_mismatch, Union, Type}) 58 | end. 59 | 60 | encode_blocks(Type, Data, Encoder) when is_list(Data) -> 61 | Count = length(Data), 62 | if Count == 0 -> <<0>>; 63 | true -> 64 | [encode(long, Count), 65 | [ Encoder(Type, V) || V <- Data], 66 | <<0>> ] 67 | end. 68 | 69 | 70 | index_of(Item, List) -> index_of(Item, List, 1). 71 | 72 | index_of(_, [], _) -> exit(not_found); 73 | index_of(Item, [Item|_], Index) -> Index; 74 | index_of(Item, [_|Tl], Index) -> index_of(Item, Tl, Index + 1). 75 | 76 | %% 77 | %% Decoding functins 78 | %% 79 | 80 | decode(Type, Buff ) -> 81 | decode(Type, Buff, undefined). 82 | 83 | -spec decode( Type :: avro_type(), 84 | Buff :: binary() | iolist(), 85 | Hook :: undefined | decode_hook() ) -> 86 | { Value :: term(), Buff :: binary()}. 87 | 88 | decode(Type, Buff, Hook) when is_list(Buff) -> 89 | decode(Type, iolist_to_binary(Buff), Hook); 90 | decode(#avro_record{fields = Fields} = Type, Buff, Hook) -> 91 | {FieldsValues, Buff1} = 92 | lists:foldl( 93 | fun({_FName, FType}, {Vals0,Buff0}) -> 94 | {Val, Buff1} = decode(FType, Buff0, Hook), 95 | {[ Val | Vals0 ], Buff1} 96 | end, {[], Buff}, Fields), 97 | { decode_hook(Hook, Type, lists:reverse(FieldsValues) ), Buff1}; 98 | decode(#avro_enum{symbols=Symbols} = Type, Buff, Hook) -> 99 | {ZeroBasedIndex, Buff1} = decode(int, Buff, Hook), 100 | Symbol = lists:nth(ZeroBasedIndex + 1, Symbols), 101 | { decode_hook(Hook, Type, Symbol ), Buff1}; 102 | decode(#avro_map{values=Type} = CType, Buff, Hook) -> 103 | decode_blocks(CType, Type, [], Buff, Hook, fun map_entry_decoder/3); 104 | decode(#avro_array{items=Type} = CType, Buff, Hook) -> 105 | decode_blocks(CType, Type, [], Buff, Hook, fun decode/3); 106 | decode(#avro_fixed{size=Size}=Type, Buff, Hook) -> 107 | <> = Buff, 108 | {decode_hook(Hook, Type, Val), Buff1}; 109 | decode(Type, Buff, Hook) when Type == string orelse Type == bytes -> 110 | {ByteSize, Buff1} = decode(long, Buff, undefined), 111 | <> = Buff1, 112 | {decode_hook(Hook, Type, String), Buff2}; 113 | decode(int = Type, Buff, Hook) -> 114 | {<>, Buff1} = varint_decode(int, Buff), 115 | Int = zigzag_decode(int, Z), 116 | {decode_hook(Hook, Type, Int), Buff1}; 117 | decode(long = Type, Buff, Hook) -> 118 | {<>, Buff1} = varint_decode(long, Buff), 119 | Long = zigzag_decode(long, Z), 120 | {decode_hook(Hook, Type, Long), Buff1}; 121 | decode(float = Type, <>, Hook) -> 122 | {decode_hook(Hook, Type, Float), Buff}; 123 | decode(double = Type, <>, Hook) -> 124 | {decode_hook(Hook, Type, Double), Buff}; 125 | decode(boolean = Type, <<0:7,B:1,Buff/binary>>, Hook) -> 126 | {decode_hook(Hook, Type, case B of 0 -> false; 1 -> true end), Buff}; 127 | decode(null = Type, Buff, Hook) -> 128 | {decode_hook(Hook, Type, <<>>), Buff}; 129 | decode(Union, Buff, Hook) when is_atom(hd(Union)) -> 130 | {Idx, Buff1} = decode(long, Buff), 131 | Type = lists:nth(Idx + 1, Union), 132 | {Val, Buff2} = decode(Type, Buff1, Hook), 133 | { if Type == null -> null; 134 | true -> {Type, Val} 135 | end, Buff2}. 136 | 137 | 138 | map_entry_decoder(Type, Buff, Hook) -> 139 | {K, Buff1} = decode(string,Buff), 140 | {V, Buff2} = decode(Type,Buff1,Hook), 141 | { {K, V}, Buff2}. 142 | 143 | decode_blocks(CollectionType, ItemType, Blocks, Buff, Hook, ItemDecoder)-> 144 | %% Decode block item count 145 | {Count_, Buff1} = decode(long, Buff), 146 | %% Analyze count: there is a special behavior for count < 0 147 | {Count, Buff2} = 148 | if Count_ < 0 -> 149 | %% When count <0 there is a block size, which we are do not use here 150 | {_BlockSize, Buff_} = decode(long, Buff1), 151 | {-Count_, Buff_}; 152 | true -> 153 | {Count_, Buff1} 154 | end, 155 | %% Decode block items 156 | {Block, Buff3} = decodeN(Count, ItemType, Buff2, Hook, ItemDecoder), 157 | case Block of 158 | [] -> 159 | {decode_hook(Hook, CollectionType, Blocks), Buff3}; 160 | _ -> 161 | decode_blocks(CollectionType, ItemType,[Block|Blocks],Buff3,Hook,ItemDecoder) 162 | end. 163 | 164 | decodeN(0, _Type, Buff, _Hook, _Decoder) -> 165 | {[], Buff}; 166 | decodeN(N, Type, Buff, Hook, Decoder) -> 167 | {H, Buff1} = Decoder(Type, Buff, Hook), 168 | {Tail, Buff2} = decodeN(N - 1 , Type, Buff1, Hook, Decoder), 169 | {[ H | Tail ], Buff2}. 170 | 171 | decode_hook(undefined, _Type, Val) -> 172 | Val; 173 | decode_hook(Hook, Type, Val) when is_function(Hook,2) -> 174 | Hook(Type, Val). 175 | 176 | %% Internal functions 177 | 178 | %% ZigZag encode/decode 179 | %% https://developers.google.com/protocol-buffers/docs/encoding?&csw=1#types 180 | zigzag_encode(int, Int) -> 181 | (Int bsl 1) bxor (Int bsr 31); 182 | zigzag_encode(long, Int) -> 183 | (Int bsl 1) bxor (Int bsr 63). 184 | 185 | zigzag_decode(int, ZigInt) -> 186 | (ZigInt bsr 1) bxor -(ZigInt band 1); 187 | zigzag_decode(long, ZigInt) -> 188 | (ZigInt bsr 1) bxor -(ZigInt band 1). 189 | 190 | 191 | %% Variable-length format 192 | %% http://lucene.apache.org/core/3_5_0/fileformats.html#VInt 193 | 194 | %% 32 bit encode 195 | varint_encode(<<0:32>>) -> <<0>>; 196 | varint_encode(<<0:25, B1:7>>) -> <>; 197 | varint_encode(<<0:18, B1:7, B2:7>>) -> 198 | <<1:1, B2:7, B1>>; 199 | varint_encode(<<0:11, B1:7, B2:7, B3:7>>) -> 200 | <<1:1, B3:7, 1:1, B2:7, B1>>; 201 | varint_encode(<<0:4, B1:7, B2:7, B3:7, B4:7>>) -> 202 | <<1:1, B4:7, 1:1, B3:7, 1:1, B2:7, B1>>; 203 | varint_encode(<>) -> 204 | <<1:1, B5:7, 1:1, B4:7, 1:1, B3:7, 1:1, B2:7, B1>>; 205 | 206 | %% 64 bit encode 207 | varint_encode(<<0:64>>) -> <<0>>; 208 | varint_encode(<<0:57, B1:7>>) -> <>; 209 | varint_encode(<<0:50, B1:7, B2:7>>) -> 210 | <<1:1, B2:7, B1>>; 211 | varint_encode(<<0:43, B1:7, B2:7, B3:7>>) -> 212 | <<1:1, B3:7, 1:1, B2:7, B1>>; 213 | varint_encode(<<0:36, B1:7, B2:7, B3:7, B4:7>>) -> 214 | <<1:1, B4:7, 1:1, B3:7, 1:1, B2:7, B1>>; 215 | varint_encode(<<0:29, B1:7, B2:7, B3:7, B4:7, B5:7>>) -> 216 | <<1:1, B5:7, 1:1, B4:7, 1:1, B3:7, 1:1, B2:7, B1>>; 217 | varint_encode(<<0:22, B1:7, B2:7, B3:7, B4:7, B5:7, B6:7>>) -> 218 | <<1:1, B6:7, 1:1, B5:7, 1:1, B4:7, 1:1, B3:7, 1:1, B2:7, B1>>; 219 | varint_encode(<<0:15, B1:7, B2:7, B3:7, B4:7, B5:7, B6:7, B7:7>>) -> 220 | <<1:1, B7:7, 1:1, B6:7, 1:1, B5:7, 1:1, B4:7, 1:1, B3:7, 1:1, B2:7, B1>>; 221 | varint_encode(<<0:8, B1:7, B2:7, B3:7, B4:7, B5:7, B6:7, B7:7, B8:7>>) -> 222 | <<1:1, B8:7, 1:1, B7:7, 1:1, B6:7, 1:1, B5:7, 1:1, B4:7, 1:1, B3:7, 1:1, B2:7, B1>>; 223 | varint_encode(<<0:1, B1:7, B2:7, B3:7, B4:7, B5:7, B6:7, B7:7, B8:7, B9:7>>) -> 224 | <<1:1, B9:7, 1:1, B8:7, 1:1, B7:7, 1:1, B6:7, 1:1, B5:7, 1:1, B4:7, 1:1, B3:7, 1:1, B2:7, B1>>; 225 | varint_encode(<>) -> 226 | <<1:1, B10:7, 1:1, B9:7, 1:1, B8:7, 1:1, B7:7, 1:1, B6:7, 1:1, B5:7, 1:1, B4:7, 1:1, B3:7, 1:1, B2:7, B1>>. 227 | 228 | varint_decode(int, <<1:1, B5:7, 1:1, B4:7, 1:1, B3:7, 1:1, B2:7, 0:4, B1:4, Bytes/binary>>) -> 229 | {<>, Bytes}; 230 | varint_decode(long, <<1:1, B10:7, 1:1, B9:7, 1:1, B8:7, 1:1, B7:7, 231 | 1:1, B6:7, 1:1, B5:7, 1:1, B4:7, 1:1, B3:7, 232 | 1:1, B2:7, 0:7, B1:1, Bytes/binary>>) -> 233 | {<>, Bytes}; 234 | varint_decode(Type, Bytes) -> 235 | {DecBits, RestBytes} = varint_decode(Bytes), 236 | Base = case Type of 237 | int -> 32; 238 | long -> 64 239 | end, 240 | LeadingZeroBits = Base - bit_size(DecBits), 241 | {<<0:LeadingZeroBits/integer, DecBits/bitstring>>, RestBytes}. 242 | 243 | varint_decode(<<0:1, X:7, Bytes/binary>>) -> 244 | {<>, Bytes}; 245 | varint_decode(<<1:1,X:7, Bytes/binary>>) -> 246 | {DecBits, Bytes1} = varint_decode(Bytes), 247 | {<>, Bytes1}. 248 | -------------------------------------------------------------------------------- /src/eavro_ocf_codec.erl: -------------------------------------------------------------------------------- 1 | -module(eavro_ocf_codec). 2 | 3 | -include("eavro.hrl"). 4 | 5 | -export([decode/2]). 6 | 7 | %% See: http://avro.apache.org/docs/1.7.7/spec.html#Object+Container+Files 8 | 9 | -spec decode(Bin :: binary(), 10 | Hook :: decode_hook() ) -> 11 | {Schema :: avro_type(), 12 | Blocks :: [ [ any() ] ]}. 13 | decode(Bin, Hook) -> 14 | PrivDir = code:priv_dir(eavro), 15 | OcfSchema = eavro:read_schema(filename:join(PrivDir, "avro_ocf.avsc")), 16 | {_Header = [_,Meta,_], Bin1} = eavro_codec:decode(OcfSchema,Bin), 17 | SchemaJson = proplists:get_value(<<"avro.schema">>, lists:flatten(Meta)), 18 | Codec = proplists:get_value(<<"avro.codec">>, lists:flatten(Meta)), 19 | Schema = eavro:parse_schema(SchemaJson), 20 | {Schema, decode_blocks(Schema, Bin1, Codec, Hook)}. 21 | 22 | decode_blocks(_Schema, <<>>, _Codec, _Hook) -> []; 23 | decode_blocks(Schema, Buff, Codec, Hook) -> 24 | {ObjCount, Buff1} = eavro_codec:decode(long, Buff), 25 | {BlockSize, Buff2} = eavro_codec:decode(long, Buff1), 26 | <> = Buff2, 27 | Objs = decodeN(Schema, ObjCount, inflate(Block, Codec), Hook), 28 | [Objs | decode_blocks(Schema, Buff3, Codec, Hook)]. 29 | 30 | decodeN(_Schema, 0, <<>>, _Hook) -> 31 | []; 32 | decodeN(Schema, ObjCount, Buff, Hook) -> 33 | {Obj, Buff1} = eavro_codec:decode(Schema, Buff, Hook), 34 | [Obj | decodeN(Schema, ObjCount-1, Buff1, Hook)]. 35 | 36 | inflate(Block, _Codec) -> 37 | Block. 38 | -------------------------------------------------------------------------------- /src/eavro_ocf_zcodec.erl: -------------------------------------------------------------------------------- 1 | -module(eavro_ocf_zcodec). 2 | 3 | -include("eavro.hrl"). 4 | 5 | -export( [ read_ocf_with/2, 6 | read_ocf/1, 7 | read_ocf_with/3, 8 | read_ocf/2, 9 | write_ocf/4, 10 | write_ocf/3, 11 | write_ocf_file/4, 12 | write_ocf_file/3, 13 | read_ocf_blocks/1] ). 14 | 15 | -export_type([ocf_visitor/1]). 16 | 17 | -record(ocf_header, 18 | { schema, 19 | compression_codec, 20 | sync }). 21 | 22 | -type ocf_visitor(T) :: fun ( (Schema :: avro_type(), 23 | ZInstancesRead :: zlists:zlist(term()) ) -> T ). 24 | 25 | -define(EXPAND(Tail), if is_function(Tail, 0) -> Tail(); 26 | is_list(Tail) -> Tail end). 27 | 28 | -define(ECHO(T), io:format("~p| ~p~n", [?LINE, T])). 29 | %%========================================================= 30 | %% API 31 | %%========================================================= 32 | 33 | write_ocf_file(Filename, Schema, ZInstances) -> 34 | write_ocf_file(Filename, Schema, ZInstances, []). 35 | 36 | write_ocf_file(Filename, Schema, ZInstances, Opts) -> 37 | FOpts = proplists:get_value(file_opts, Opts, [raw]), 38 | {ok, Io} = file:open(Filename, [write | FOpts]), 39 | try write_ocf(Io, Schema, ZInstances, Opts) 40 | after 41 | file:close(Io) 42 | end. 43 | 44 | write_ocf(Io, Schema, ZInstances) -> 45 | write_ocf(Io, Schema, ZInstances, []). 46 | 47 | write_ocf(Io, Schema, ZInstances, Opts) -> 48 | Codec0 = proplists:get_value(codec, Opts, plain), 49 | BlockSize = proplists:get_value(block_size, Opts, 1024*1024*10), 50 | Codec = to_bin(Codec0), 51 | Sync = crypto:rand_bytes(16), 52 | PrivDir = code:priv_dir(eavro), 53 | OcfSchema = eavro:read_schema(filename:join(PrivDir, "avro_ocf.avsc")), 54 | Meta = [{<<"avro.schema">>, eavro:encode_schema(Schema)}] ++ 55 | case Codec0 of 56 | plain -> []; 57 | _ -> [{<<"avro.codec">>, Codec}] 58 | end, 59 | Z = open_codec(Codec), 60 | AddInstanceToBlock = 61 | fun(Instance, {BlockInstCount, CurrBlockSize, BlockBytes}) -> 62 | EncBytes = eavro_codec:encode(Schema, Instance), 63 | CompressedBytes = compress(Z, EncBytes), 64 | {BlockInstCount + 1, 65 | CurrBlockSize + iolist_size(CompressedBytes), 66 | [CompressedBytes | BlockBytes]} 67 | end, 68 | FlushBlock = 69 | fun({BlockInstCount, CurrBlockSize, BlockBytes}) -> 70 | Tail = end_compress(Z), 71 | ok = file:write(Io,eavro_codec:encode(long, BlockInstCount)), 72 | ok = file:write(Io,eavro_codec:encode( 73 | long, CurrBlockSize + iolist_size(Tail))), 74 | ok = file:write(Io,lists:reverse([Tail|BlockBytes])), 75 | ok = file:write(Io,Sync) 76 | end, 77 | try ok = file:write( 78 | Io, 79 | eavro_codec:encode(OcfSchema, [<<"Obj",1>>, Meta, Sync]) ), 80 | init_codec(Z), 81 | LastBlock = 82 | zlists:foldl( 83 | fun 84 | (Instance, {_BlockInstCount, CurrBlockSize, _BlockBytes} = Block) 85 | when CurrBlockSize >= BlockSize -> 86 | FlushBlock(Block), 87 | init_codec(Z), 88 | AddInstanceToBlock(Instance, {0,0,[]}); 89 | (Instance, Block) -> 90 | AddInstanceToBlock(Instance, Block) 91 | end, {0,0,[]}, ZInstances), 92 | FlushBlock(LastBlock), 93 | ok 94 | after 95 | close_codec(Z) 96 | end. 97 | 98 | init_codec(<<"plain">>) -> ok; 99 | init_codec(plain) -> ok; 100 | init_codec({<<"deflate">>, Z}) -> 101 | zlib:deflateInit(Z, default, deflated, -15, 9, default). 102 | 103 | end_compress(<<"plain">>) -> <<>>; 104 | end_compress(plain) -> <<>>; 105 | end_compress({<<"deflate">>, Z}) -> 106 | B = zlib:deflate(Z,<< >>,finish), 107 | zlib:deflateEnd(Z), 108 | B. 109 | 110 | 111 | compress(<<"plain">>, B) -> B; 112 | compress(plain, B) -> B; 113 | compress({<<"deflate">>, Z}, B) -> 114 | zlib:deflate(Z, B, sync). 115 | 116 | 117 | 118 | 119 | to_bin(B) when is_binary(B) -> 120 | B; 121 | to_bin(A) when is_atom(A) -> 122 | atom_to_binary(A,latin1); 123 | to_bin(L) when is_list(L) -> 124 | list_to_binary(L). 125 | 126 | %% 127 | %% Opens an Avro OCF file, reads schema and instances and passes 128 | %% them into callback function. This function helps to make reading 129 | %% more safe due to it handles properly closing of a file and a zlib 130 | %% stream (when 'deflate' codec used). 131 | %% 132 | %% Also this function gives a possibility to read huge OCF files in a 133 | %% lazy, memory effecient way. Instances passed into callback function 134 | %% as a lazy list, i.e. improper list in a form [H1,...,Hn | Fun], where 135 | %% Fun - is a fun( () -> [H1',...,Hn' | Fun1]) etc.. Such an improper 136 | %% list may be consumed directly or by means of 'zlists' library 137 | %% (See github for erlang-zlists). 138 | %% 139 | -spec read_ocf_with( 140 | File :: file:filename(), 141 | Visitor :: ocf_visitor(Result) ) -> Result. 142 | read_ocf_with(File, Visitor) -> 143 | read_ocf_with(File, Visitor, undefined). 144 | 145 | -spec read_ocf_with( 146 | File :: file:filename(), 147 | Visitor :: ocf_visitor(Result), 148 | Hook :: decode_hook() ) -> Result. 149 | read_ocf_with(File, Visitor, Hook) -> 150 | {ok, Io} = file:open(File, [read, raw, binary]), 151 | try {Schema, ZInstances, CodecFinalizer} = read_ocf(Io, Hook), 152 | try Visitor(Schema, ZInstances) 153 | after 154 | CodecFinalizer() 155 | end 156 | after 157 | file:close(Io) 158 | end. 159 | 160 | read_ocf_blocks(Filename) -> 161 | {ok, Io} = file:open(Filename, [read, raw, binary]), 162 | ZBytes = zlists_file:read(Io, 1024*64), 163 | { Header = #ocf_header{}, ZBytes1 } = read_ocf_header(ZBytes), 164 | %% 165 | ReadBlock = 166 | fun 167 | ([], _Cont) -> []; 168 | (ZBuff, Cont) -> 169 | {Count, ZBuff1} = eavro_zcodec:decode(long, ZBuff), 170 | {BSize, ZBuff2} = eavro_zcodec:decode(long, ZBuff1), 171 | [<> | ZBuff3] = 172 | zlists_file:expand_binary(ZBuff2, BSize + 16), 173 | [ {Count, BSize, BlockBytes, Sync1} | 174 | fun() -> Cont(?EXPAND(ZBuff3), Cont) end ] 175 | end, 176 | { Header, ReadBlock(ZBytes1, ReadBlock) }. 177 | 178 | %% 179 | %% Reads and decodes Avro OCF file and returns a schema and instances as a Z-List. 180 | %% 181 | -spec read_ocf(Io :: file:io_device() ) -> 182 | { avro_type(), zlists:zlist( term() ), fun( () -> ok ) }. 183 | read_ocf(Io) -> 184 | read_ocf(Io, undefined). 185 | 186 | -spec read_ocf(Io :: file:io_device(), 187 | Hook :: decode_hook() ) -> 188 | { avro_type(), zlists:zlist( term() ), fun( () -> ok ) }. 189 | read_ocf(Io, Hook) -> 190 | { Schema, ZBytes, CodecFinalizer} = read_ocf_(Io), 191 | ZInstances = eavro_zcodec:decode_seq( 192 | Schema, Hook, fun eavro_zcodec:decode/3, ZBytes), 193 | { Schema, ZInstances, CodecFinalizer}. 194 | %%========================================================= 195 | %% Private area 196 | %%========================================================= 197 | 198 | read_ocf_header(ZBytes) -> 199 | %% Read header 200 | PrivDir = code:priv_dir(eavro), 201 | OcfSchema = eavro:read_schema(filename:join(PrivDir, "avro_ocf.avsc")), 202 | {_Header = [_,Meta,Sync], ZBytes1} = eavro_zcodec:decode(OcfSchema, ZBytes), 203 | SchemaJson = proplists:get_value(<<"avro.schema">>, lists:flatten(Meta)), 204 | Codec = proplists:get_value(<<"avro.codec">>, lists:flatten(Meta)), 205 | Schema = eavro:parse_schema(SchemaJson), 206 | { #ocf_header{ 207 | schema = Schema, 208 | compression_codec = Codec, 209 | sync = Sync}, ZBytes1 }. 210 | 211 | read_ocf_(Io) -> 212 | ZBytes = zlists_file:read(Io, 1024*64), 213 | { #ocf_header{ 214 | schema = Schema, 215 | compression_codec = Codec, 216 | sync = _Sync}, ZBytes1 } = read_ocf_header(ZBytes), 217 | %% 218 | CodecSession = open_codec(Codec), 219 | try 220 | {Schema, 221 | read_ocf_blocks_continuosly(CodecSession, ZBytes1), 222 | fun()-> close_codec(CodecSession) end} 223 | catch 224 | _:Reason -> 225 | exit({failed_read_ocf_blocks, Reason}), 226 | close_codec(CodecSession) 227 | end. 228 | 229 | open_codec(<<"deflate">> = Codec) -> 230 | {Codec, zlib:open()}; 231 | open_codec(<<"snappy">>) -> 232 | exit(unsupported); 233 | open_codec(_) -> 234 | plain. 235 | 236 | close_codec({<<"deflate">>, Z}) -> 237 | zlib:close(Z), ok; 238 | close_codec(_) -> 239 | ok. 240 | 241 | 242 | read_ocf_blocks_continuosly(_Codec, []) -> []; 243 | read_ocf_blocks_continuosly(_Codec, [<<>>]) -> []; 244 | read_ocf_blocks_continuosly(Codec, ZBytes) -> 245 | {_ObjCount, ZBytes1} = eavro_zcodec:decode(long, ZBytes), 246 | {BlockSize, ZBytes2} = eavro_zcodec:decode(long, ZBytes1), 247 | uncompress_ocf_block(Codec, ZBytes2, BlockSize). 248 | 249 | uncompress_ocf_block({<<"deflate">>, Z}, ZBuff, BlockSize) -> 250 | ok = zlib:inflateInit(Z,-15), 251 | inflate_ocf_block(Z, ZBuff, BlockSize); 252 | uncompress_ocf_block(<<"snappy">>, _ZBuff, _BlockSize) -> 253 | exit(unsupported); 254 | uncompress_ocf_block(Plain, ZBuff, BlockSize) 255 | when Plain == plain;Plain== <<"plain">> -> 256 | plain_ocf_block(ZBuff, BlockSize). 257 | 258 | plain_ocf_block(ZBuff, 0) when is_list(ZBuff) -> 259 | [<<_Sync:16/binary>> | ZBuff1] = zlists_file:expand_binary(ZBuff, 16), 260 | read_ocf_blocks_continuosly(plain, ?EXPAND(ZBuff1)); 261 | plain_ocf_block([Chunk | Tail], Size) -> 262 | ChunkSize = byte_size(Chunk), 263 | if ChunkSize =< Size -> 264 | [Chunk | fun() -> plain_ocf_block(?EXPAND(Tail), Size - ChunkSize) end]; 265 | ChunkSize > Size -> 266 | <> = Chunk, 267 | [LChunk | fun() -> plain_ocf_block([ RChunk | Tail ], 0) end] 268 | end. 269 | 270 | inflate_ocf_block(Z, ZBuff, 0) -> 271 | ok = zlib:inflateEnd(Z), 272 | [<<_Sync:16/binary>> | ZBuff1] = zlists_file:expand_binary(ZBuff, 16), 273 | read_ocf_blocks_continuosly({<<"deflate">>, Z}, ?EXPAND(ZBuff1)); 274 | inflate_ocf_block(Z,[Chunk | Tail], Size) -> 275 | ChunkSize = byte_size(Chunk), 276 | if ChunkSize =< Size -> 277 | [iolist_to_binary(zlib:inflate(Z,Chunk)) | 278 | fun() -> 279 | inflate_ocf_block(Z,?EXPAND(Tail), Size - ChunkSize) 280 | end]; 281 | ChunkSize > Size -> 282 | <> = Chunk, 283 | [iolist_to_binary(zlib:inflate(Z,LChunk)) | 284 | fun() -> inflate_ocf_block(Z,[ RChunk | Tail ], 0) end] 285 | end. 286 | -------------------------------------------------------------------------------- /src/eavro_rpc_fsm.erl: -------------------------------------------------------------------------------- 1 | -module(eavro_rpc_fsm). 2 | 3 | -behaviour(gen_fsm). 4 | 5 | -include("eavro.hrl"). 6 | 7 | %% API 8 | -export([start_link/3]). 9 | 10 | %% gen_fsm callbacks 11 | -export([init/1, 12 | handle_event/3, 13 | handle_sync_event/4, 14 | handle_info/3, 15 | terminate/3, 16 | code_change/4]). 17 | 18 | %% gen_fsm states 19 | -export([connect/2, 20 | connect/3, 21 | handshake_start/2, 22 | handshake_start/3, 23 | handshake_finish/2, 24 | handshake_finish/3, 25 | main/2, 26 | main/3]). 27 | 28 | -export([call/3, call/4]). 29 | 30 | -record(state, { host, 31 | port, 32 | proto :: #avro_proto{}, 33 | socket, 34 | cont, 35 | serial = 0, 36 | reply_list = [] }). 37 | -record(call, { name, args}). 38 | 39 | %%%=================================================================== 40 | %%% API 41 | %%%=================================================================== 42 | 43 | -type fsm_ref() :: atom() | 44 | {atom(), node()} | 45 | {global, any()} | 46 | {via, atom(),atom()} | 47 | pid(). 48 | 49 | start_link(Host, Port, ProtoFilename) when is_integer(hd(ProtoFilename)) -> 50 | Proto = eavro_rpc_proto:parse_protocol_file(ProtoFilename), 51 | start_link(Host, Port, Proto); 52 | start_link(Host, Port, Proto) -> 53 | gen_fsm:start_link(?MODULE, [Host, Port, Proto], []). 54 | 55 | -spec call(FsmRef :: fsm_ref(), MethodName :: atom(), Args :: [any()]) -> 56 | {ok, Result :: any()} | {error, any()}. 57 | call(FsmRef, Name, Args) when is_list(Args) -> 58 | gen_fsm:sync_send_event(FsmRef, #call{name = Name, args = Args}, infinity). 59 | 60 | 61 | -spec call(FsmRef :: fsm_ref(), MethodName :: atom(), Args :: [any()], 62 | Timeout :: non_neg_integer() | infinity) -> 63 | {ok, Result :: any()} | {error, any()}. 64 | call(FsmRef, Name, Args, Timeout) when is_list(Args) -> 65 | gen_fsm:sync_send_event(FsmRef, #call{name = Name, args = Args}, Timeout). 66 | 67 | %%%=================================================================== 68 | %%% gen_fsm callbacks 69 | %%%=================================================================== 70 | 71 | init([Host, Port, Proto]) -> 72 | ConnectFun = 73 | fun(State) -> 74 | {ok, Sock} = gen_tcp:connect( 75 | Host, Port, 76 | [inet, binary, 77 | {packet, 0}, 78 | {active, true}, 79 | {nodelay, true}, 80 | {reuseaddr, true}], 3000), 81 | {cont, Cont} = eavro_rpc_proto:decode_frame_sequences(<<>>), 82 | {next_state, handshake_start, 83 | State#state{ socket = Sock, 84 | proto = Proto, 85 | serial = 0, 86 | cont = Cont }} 87 | end, 88 | gen_fsm:send_event_after(0, {connect, 0, ConnectFun}), 89 | {ok, connect, #state{ host = Host, port = Port}}. 90 | 91 | %% 92 | %% STATE: CONNECT 93 | %% 94 | connect({connect, N, Fun}, #state{ host = Host, port = Port} = State) -> 95 | try Fun(State) 96 | catch 97 | _:Reason -> 98 | if N rem 3 == 0 -> 99 | error_logger:error_msg( 100 | "Failed to connect to Flume/AvroAPI endpoint ~p due to: ~p~n", 101 | [{Host, Port}, Reason]); 102 | true -> ok 103 | end, 104 | gen_fsm:send_event_after(5000, {connect, N + 1, Fun}), 105 | {next_state, connect, State} 106 | end. 107 | 108 | connect(_Event = #call{}, _From, State) -> 109 | {reply, {error, connecting}, connect, State}. 110 | 111 | %% 112 | %% STATE: HANDSHAKE-START 113 | %% 114 | 115 | handshake_start(_Event = #call{ name = Name, args = Args}, 116 | #state{socket = Sock, serial = Ser, 117 | proto = #avro_proto{json = Json} = Proto} = State) 118 | when is_list(Args) -> 119 | HReq = eavro:encode( 120 | eavro_rpc_proto:schema_HandshakeRequest(), 121 | [erlang:md5(Json), 122 | {string, Json}, 123 | <<0:128>>, 124 | {#avro_map{ values = bytes}, []}]), 125 | EncCall = eavro_rpc_proto:encode_call(Proto, Name, Args), 126 | ok = gen_tcp:send( 127 | Sock, make_frame_sequence(Ser, [HReq, EncCall]) ), 128 | {next_state, handshake_finish, State#state{serial = Ser + 1}}. 129 | 130 | handshake_start(Event = #call{}, From, State) -> 131 | handshake_start(Event, track_caller(From, Event, State) ). 132 | 133 | 134 | %% 135 | %% STATE: HANDSHAKE-FINISH 136 | %% 137 | handshake_finish(_Event = #call{ name = Name, args = Args}, 138 | #state{socket = Sock, serial = Ser, 139 | proto = Proto} = State) 140 | when is_list(Args) -> 141 | EncCall = eavro_rpc_proto:encode_call(Proto, Name, Args), 142 | ok = gen_tcp:send( 143 | Sock, make_frame_sequence(Ser, [EncCall]) ), 144 | {next_state, handshake_finish, State#state{serial = Ser + 1}}. 145 | 146 | handshake_finish(Event = #call{}, From, State) -> 147 | handshake_finish(Event, track_caller(From, Event, State) ). 148 | 149 | 150 | %% 151 | %% STATE: MAIN 152 | %% 153 | main(_Event = #call{ name = Name, args = Args}, 154 | #state{socket = Sock, serial = Ser, 155 | proto = Proto} = State) when is_list(Args) -> 156 | EncCall = eavro_rpc_proto:encode_call(Proto, Name, Args), 157 | ok = gen_tcp:send( 158 | Sock, make_frame_sequence(Ser, [EncCall])), 159 | {next_state, main, State#state{serial = Ser + 1}}. 160 | 161 | main(Event = #call{}, From, State) -> 162 | main(Event, track_caller(From, Event, State)). 163 | 164 | track_caller(Caller, 165 | #call{ name = Name}, 166 | #state{serial = Ser, 167 | reply_list = RL} = State) -> 168 | State#state{serial = Ser, 169 | reply_list = [{Ser, Caller, Name} | RL] }. 170 | 171 | 172 | %% 173 | %% 174 | %% 175 | handle_event(_Event, StateName, State) -> 176 | {next_state, StateName, State}. 177 | 178 | handle_sync_event(_Event, _From, StateName, State) -> 179 | Reply = ok, 180 | {reply, Reply, StateName, State}. 181 | 182 | handle_info(_Info = {tcp, Sock, Data}, 183 | handshake_finish = CurrState, 184 | #state{socket = Sock, cont = Cont} = State) -> 185 | case Cont(Data) of 186 | {cont, Cont1} -> 187 | {next_state, CurrState, State#state{ cont = Cont1 } }; 188 | { [{0 = Ser0, [ HeadFrame | Frames ]} = _HeadSeq | Sequences], {cont, Cont1} } -> 189 | { _HSResp = [Match|_], HeadFrameTail } = 190 | eavro_rpc_proto:decode_handshake_response(HeadFrame), 191 | Match == 'NONE' andalso exit(server_not_support_protocol), 192 | Sequences1 = [{Ser0, [HeadFrameTail|Frames]} | Sequences], 193 | State1 = reply_to_clients(Sequences1, State), 194 | {next_state, main, State1#state{ cont = Cont1 }} 195 | end; 196 | handle_info(_Info = {tcp, Sock, Data}, main, 197 | #state{socket = Sock, cont = Cont} = State) -> 198 | case Cont(Data) of 199 | {cont, Cont1} -> 200 | {next_state, main, State#state{ cont = Cont1 } }; 201 | { Sequences, {cont, Cont1} } -> 202 | State1 = reply_to_clients(Sequences, State), 203 | {next_state, main, State1#state{ cont = Cont1 }} 204 | end; 205 | handle_info(_Info, StateName, State) -> 206 | {next_state, StateName, State}. 207 | 208 | 209 | 210 | reply_to_clients([], State) -> 211 | State; 212 | reply_to_clients([ {Ser, Frames} | Seqs], 213 | #state{ proto = Proto, reply_list = Clis } = State) -> 214 | case lists:keytake(Ser, 1, Clis) of 215 | false -> Clis1 = Clis; 216 | {value, {Ser, Caller, MessageName}, Clis1 } -> 217 | #avro_message{ return = RetType} = 218 | eavro_rpc_proto:get_message(Proto, MessageName), 219 | Resp = eavro_rpc_proto:decode_call_response(RetType, iolist_to_binary(Frames)), 220 | gen_fsm:reply(Caller, Resp) 221 | end, 222 | reply_to_clients(Seqs, State#state{ reply_list = Clis1 }). 223 | 224 | terminate(_Reason, _StateName, _State) -> ok. 225 | code_change(_OldVsn, StateName, State, _Extra) -> {ok, StateName, State}. 226 | 227 | %%%=================================================================== 228 | %%% Internal functions 229 | %%%=================================================================== 230 | 231 | -spec make_frame( Data :: iolist()) -> iolist(). 232 | make_frame(Data) -> 233 | Size = iolist_size(Data), 234 | [<>, Data]. 235 | 236 | make_frame_sequence(Serial, EncodedCalls) when is_integer(Serial), 237 | is_list(EncodedCalls) -> 238 | SequenceLength = length(EncodedCalls), 239 | [<> | [ make_frame(Call) || Call <- EncodedCalls ] ]. 240 | -------------------------------------------------------------------------------- /src/eavro_rpc_handler.erl: -------------------------------------------------------------------------------- 1 | -module(eavro_rpc_handler). 2 | 3 | -include("eavro.hrl"). 4 | 5 | %% 6 | %% Returns an Avro protocol served by this Avro RPC server. 7 | %% Use e.g. : 8 | %% eavro_rpc_proto:parse_protocol_file("my_proto.avpr") 9 | %% 10 | -callback get_protocol() -> 11 | #avro_proto{}. 12 | 13 | %% 14 | %% This function called when client established connection 15 | %% with a server. Implementing this call gives handler a chance 16 | %% for some initializations. 17 | %% 18 | -callback init(Args :: any()) -> 19 | {ok, State :: any()}. 20 | 21 | %% 22 | %% Implement this callback to handle client calls. 23 | %% 24 | -callback handle_call( 25 | Call :: { MessageSchema :: #avro_message{}, 26 | CallArgs :: [ any() ] }, 27 | State :: any() ) -> 28 | {ok, any() } | 29 | {error, avro_type() }. 30 | 31 | -------------------------------------------------------------------------------- /src/eavro_rpc_proto.erl: -------------------------------------------------------------------------------- 1 | -module(eavro_rpc_proto). 2 | 3 | -compile(export_all). 4 | 5 | -include("eavro.hrl"). 6 | 7 | -export([parse_protocol_file/1, 8 | parse_protocol/1, 9 | encode_call/3, 10 | decode_call/2, 11 | encode_response/2, 12 | encode_handshake_response/1, 13 | decode_frame_sequences/1, 14 | decode_handshake_response/1, 15 | decode_handshake_request/1, 16 | decode_call_response/2]). 17 | 18 | %% Parses Avro Protocol Definition from JSON file (*.avpr). 19 | -spec parse_protocol_file(Filename :: file:filename()) -> #avro_proto{}. 20 | parse_protocol_file(Filename) -> 21 | {ok, Json} = file:read_file(Filename), 22 | parse_protocol(Json). 23 | 24 | %% 25 | %% Parses Avro Protocol Definition from JSON formatted text.. 26 | %% 27 | -spec parse_protocol(AvprJson :: binary()) -> #avro_proto{}. 28 | parse_protocol(Json) -> 29 | Jsx = [{_,_}|_] = jsx:decode(Json), 30 | Prop = fun(Name, Props) -> 31 | case proplists:get_value(Name, Props) of 32 | undefined -> 33 | exit({expected, Name}); 34 | Val -> Val 35 | end 36 | end, 37 | Messages = Prop(<<"messages">>, Jsx), 38 | {Types, Context} = eavro:parse_types(Prop(<<"types">>, Jsx), dict:new()), 39 | Name = Prop(<<"protocol">>, Jsx), 40 | Ns = Prop(<<"namespace">>, Jsx), 41 | ParseMessage = 42 | fun({MName, M}) -> 43 | Req = Prop(<<"request">>, M), 44 | Res = Prop(<<"response">>, M), 45 | ResolveType = 46 | fun(ArgNameOrDef) -> 47 | try {TypeParsed, _Context1} = 48 | eavro:parse_type(ArgNameOrDef, Context), 49 | TypeParsed 50 | catch 51 | _:_ when is_binary(ArgNameOrDef) -> 52 | ArgName = binary_to_atom(ArgNameOrDef,latin1), 53 | hd([_] = [ T || T <- Types, 54 | case T of 55 | #avro_record{name = ArgName} -> true; 56 | #avro_enum{name = ArgName} -> true; 57 | #avro_fixed{name = ArgName} -> true; 58 | _ -> false 59 | end]) 60 | end 61 | end, 62 | MArgs = 63 | [ ResolveType(Prop(<<"type">>, Arg)) || Arg <- Req ], 64 | #avro_message{ 65 | name = MName, 66 | args = MArgs, 67 | return = ResolveType(Res) } 68 | end, 69 | #avro_proto{ 70 | ns = Ns, 71 | name = Name, 72 | types = Types, 73 | messages = lists:map(ParseMessage, Messages), 74 | json = Json }. 75 | 76 | %% 77 | %% Encoding/decoding calls. 78 | %% 79 | 80 | encode_call(Proto, 81 | MessageName, Args) when is_atom(MessageName) -> 82 | encode_call(Proto, atom_to_binary(MessageName,latin1), Args); 83 | encode_call(#avro_proto{} = Proto, 84 | MessageName, Args) when is_binary(MessageName), is_list(Args) -> 85 | #avro_message{ 86 | name = MessageNameBin, 87 | args = Types } = get_message(Proto, MessageName), 88 | length(Args) == length(Types) orelse exit(bad_message_arity), 89 | [eavro:encode(schema_Meta(), []), 90 | eavro:encode(string, MessageNameBin), 91 | [ eavro:encode(Type, Arg) || {Type, Arg} <- lists:zip(Types, Args) ] 92 | ]. 93 | 94 | decode_call(#avro_proto{} = Proto, Buff) -> 95 | {_Meta, Buff1} = eavro:decode(schema_Meta(), Buff), 96 | {MessageNameBin, Buff2} = eavro:decode(string, Buff1), 97 | Msg = #avro_message{ 98 | args = Types } = get_message(Proto, MessageNameBin), 99 | {ArgsR, Buff3} = 100 | lists:foldl( 101 | fun(T, {Vals, B}) -> 102 | {V, B1} = eavro:decode(T,B), 103 | {[V|Vals], B1} 104 | end, 105 | {[], Buff2}, 106 | Types), 107 | { {Msg, lists:reverse(ArgsR)}, Buff3}. 108 | 109 | encode_response(#avro_proto{} = Proto, MessageName, Result) -> 110 | M = get_message(Proto, MessageName), 111 | encode_response(M, Result). 112 | 113 | encode_response(#avro_message{return = RetType }, {Status, Ret}) -> 114 | [eavro:encode(schema_Meta(), []), 115 | case Status of 116 | ok -> [<<0>>, eavro:encode(RetType, Ret)]; 117 | error -> [<<1>>, eavro:encode( 118 | [string, RetType], 119 | {RetType, Ret} )] 120 | end]. 121 | 122 | encode_handshake_response(#avro_proto{json = Json})-> 123 | eavro:encode( 124 | schema_HandshakeResponse(), 125 | ['CLIENT', 126 | {string, Json}, 127 | {schema_MD5(), erlang:md5(Json)}, 128 | {schema_Meta(), []}]). 129 | 130 | get_message(#avro_proto{ messages = Messages}, 131 | MessageName) -> 132 | case lists:keyfind(if is_atom(MessageName) -> atom_to_binary(MessageName, latin1); 133 | true -> MessageName 134 | end, #avro_message.name, Messages) of 135 | #avro_message{} = Msg -> Msg; 136 | false -> exit({unsupported_proto_message, MessageName}) 137 | end. 138 | 139 | decode_response(Proto, MessageName, Buff) -> 140 | #avro_message{return = RetType } = get_message(Proto, MessageName), 141 | [{Ser, Buff1} | Buffs] = decode_frame_sequences(Buff), 142 | {HeaderResponse, Buff2} = decode_handshake_response(Buff1), 143 | {HeaderResponse, 144 | [ {S, decode_call_response(RetType, B)} || {S,B} <- [ {Ser, Buff2} | Buffs ] ] }. 145 | 146 | decode_call_response(RetType, Buff) -> 147 | {_Meta, <>} = eavro:decode(schema_Meta(), Buff), 148 | case ErrFlag of 149 | 0 -> 150 | {Rsp, <<>>} = eavro:decode(RetType,Buff1), 151 | {ok, Rsp}; 152 | 1 -> 153 | {Err, <<>>} = eavro:decode([string, RetType],Buff1), 154 | {error, Err} 155 | end. 156 | 157 | test_data() -> 158 | [<<0:32>>,<<2:32>>,<<3:32,1,2,3>>,<<4:32,1,2,3,4>>]. 159 | 160 | decode_frame_sequences(<>) -> 161 | continue_decode_frame_sequences(Serial, 162 | decode_frame_sequence(N, Tail, [])); 163 | decode_frame_sequences(Buff) -> 164 | {cont, 165 | fun(Buff1) -> 166 | decode_frame_sequences(<>) 167 | end}. 168 | 169 | continue_decode_frame_sequences(Serial, {cont, Cont}) -> 170 | {cont, 171 | fun(Buff1) -> 172 | continue_decode_frame_sequences(Serial, Cont(Buff1)) 173 | end}; 174 | continue_decode_frame_sequences(Serial, {Arr, Tail2}) -> 175 | case decode_frame_sequences(Tail2) of 176 | {cont, _} = Cont -> { [ {Serial, Arr} ], Cont}; 177 | {Arrs, Cont} -> { [ {Serial, Arr} | Arrs], Cont} 178 | end. 179 | 180 | 181 | 182 | decode_frame_sequence(0, Buff, Acc) -> 183 | {lists:reverse(Acc), Buff}; 184 | decode_frame_sequence(N, <> = Buff, Acc) -> 185 | case Tail of 186 | <> -> 187 | decode_frame_sequence(N - 1, Tail1, [F|Acc]); 188 | _ -> 189 | {cont, fun(Buff1) -> 190 | decode_frame_sequence( 191 | N, <>, Acc) 192 | end} 193 | end; 194 | decode_frame_sequence(N, Buff, Acc) -> 195 | {cont, fun(Buff1) -> 196 | decode_frame_sequence( 197 | N, <>, Acc) 198 | end}. 199 | 200 | 201 | decode_handshake_response(Buff) -> 202 | eavro:decode(schema_HandshakeResponse(), Buff). 203 | 204 | decode_handshake_request(Buff) -> 205 | eavro:decode(schema_HandshakeRequest(), Buff). 206 | 207 | 208 | 209 | %% 210 | %% Avro Protocol Handshake Schema's 211 | %% 212 | 213 | schema_HandshakeRequest() -> 214 | #avro_record{ 215 | name = 'HandshakeRequest', 216 | fields = [{clientHash, schema_MD5()}, 217 | {clientProtocol, [null, string]}, 218 | {serverHash, schema_MD5()}, 219 | {meta, [null,schema_Meta()]}]}. 220 | 221 | schema_HandshakeResponse() -> 222 | #avro_record{ 223 | name = 'HandshakeResponse', 224 | fields = [{match, #avro_enum{ name = 'HandshakeMatch', 225 | symbols = ['BOTH', 'CLIENT', 'NONE']}}, 226 | {serverProtocol, [null, string]}, 227 | {serverHash, [null, schema_MD5()]}, 228 | {meta, [null, schema_Meta()]}]}. 229 | 230 | schema_Meta() -> 231 | #avro_map{ values = bytes }. 232 | 233 | schema_MD5() -> 234 | #avro_fixed{ name = 'MD5', size = 16}. 235 | -------------------------------------------------------------------------------- /src/eavro_rpc_srv.erl: -------------------------------------------------------------------------------- 1 | -module(eavro_rpc_srv). 2 | 3 | -behaviour(gen_server). 4 | -behaviour(ranch_protocol). 5 | 6 | -include("eavro.hrl"). 7 | 8 | %% API 9 | -export([start/4, start_link/4]). 10 | 11 | %% gen_server callbacks 12 | -export([init/1, 13 | handle_call/3, 14 | handle_cast/2, 15 | handle_info/2, 16 | terminate/2, 17 | code_change/3]). 18 | 19 | -define(TIMEOUT, 30000). 20 | 21 | -record(state, {socket, 22 | transport, 23 | proto :: #avro_proto{}, 24 | cont, 25 | serial = 0, 26 | handshaked = false, 27 | c_module :: atom(), 28 | c_state :: any()}). 29 | 30 | %% Start an Avro RPC Server. The callback module 31 | %% defines which and how protocol will be served. 32 | %% Callback module must implement a behaviour 33 | %% 'eavro_rpc_handler'. 34 | %% See also 'eavro_rpc_handler.erl'. 35 | -spec start(CallbackModule :: module(), 36 | CallbackOpts :: any(), 37 | ListenPort :: non_neg_integer(), 38 | AcceptorPoolSize :: non_neg_integer()) -> 39 | {ok, pid()} | {error, badarg}. 40 | start(CallbackModule, 41 | CallbackOpts, 42 | Port, 43 | NumAcceptors) when is_atom(CallbackModule), 44 | is_integer(Port), 45 | is_integer(NumAcceptors) -> 46 | case application:start(ranch) of 47 | ok -> ok; 48 | {error,{already_started,ranch}} -> ok 49 | end, 50 | Proto = CallbackModule:get_protocol(), 51 | ranch:start_listener( 52 | ?MODULE, NumAcceptors, 53 | ranch_tcp, [{port, Port}], 54 | ?MODULE, 55 | [{proto, Proto}, 56 | {callback_module, CallbackModule}, 57 | {callback_opts, CallbackOpts}]). 58 | 59 | %% Ranch Protocol Handler Behaviour Callback Function 60 | start_link(Ref, Socket, Transport, Opts) -> 61 | proc_lib:start_link(?MODULE, init, [{Ref, Socket, Transport, Opts}]). 62 | 63 | %% gen_server. 64 | init({Ref, Socket, Transport, Opts}) -> 65 | #avro_proto{} = Proto = 66 | proplists:get_value(proto, Opts), 67 | {CModule, CState} = callback_init(Opts), 68 | ok = proc_lib:init_ack({ok, self()}), 69 | ok = ranch:accept_ack(Ref), 70 | ok = Transport:setopts(Socket, [{active, once}]), 71 | {cont, Cont} = eavro_rpc_proto:decode_frame_sequences(<<>>), 72 | gen_server:enter_loop( 73 | ?MODULE, [], 74 | #state{ socket = Socket, 75 | transport = Transport, 76 | proto = Proto, 77 | cont = Cont, 78 | c_module = CModule, 79 | c_state = CState}, 80 | ?TIMEOUT). 81 | 82 | handle_info({tcp, Soc, Data}, 83 | #state{ socket = Soc, 84 | proto = Proto, 85 | handshaked = Handshaked, 86 | cont = Cont } = State) -> 87 | active_once(State), 88 | case Cont(Data) of 89 | {cont, Cont1} -> 90 | State1 = State; 91 | { [{Ser0, [ HeadFrame | Frames ]} = _HeadSeq | Sequences] = AsIs, 92 | {cont, Cont1} } -> 93 | case Handshaked of 94 | false -> 95 | { _HSReq, HeadFrameTail } = 96 | eavro_rpc_proto:decode_handshake_request(HeadFrame), 97 | HRes = eavro_rpc_proto:encode_handshake_response(Proto), 98 | [{Ser0, Fs0} | Seqs] = 99 | do_calls( 100 | [{Ser0, [HeadFrameTail|Frames]} | Sequences], 101 | State), 102 | Replies = [{Ser0, [HRes | Fs0]} | Seqs]; 103 | true -> 104 | Replies = do_calls( AsIs, State) 105 | end, 106 | [ tcp_reply( 107 | State, 108 | make_frame_sequence(Ser, Fs)) || {Ser, Fs} <- Replies ], 109 | State1 = State#state{ handshaked = true} 110 | end, 111 | {noreply, State1#state{ cont = Cont1 } }; 112 | handle_info({tcp_closed, _Socket}, State) -> 113 | {stop, normal, State}; 114 | handle_info({tcp_error, _, Reason}, 115 | State) -> {stop, Reason, State}; 116 | handle_info(timeout, State) -> {stop, normal, State}; 117 | handle_info(_Info, State) -> {stop, normal, State}. 118 | 119 | do_calls([], _State) -> 120 | []; 121 | do_calls([ {Ser, Frames} | Seq], 122 | #state{ proto = Proto } = State) -> 123 | {{_MsgSchema, _Args} = Call, Rest} = 124 | eavro_rpc_proto:decode_call(Proto, iolist_to_binary(Frames)), 125 | <<>> = iolist_to_binary(Rest), 126 | EncResp = do_call(Call, State), 127 | [{Ser, [EncResp]} | do_calls(Seq, State)]. 128 | 129 | do_call({#avro_message{} = MsgSchema, _Args} = Call, 130 | #state{ c_module = CMod, 131 | c_state = CState } = _State) -> 132 | Ret = 133 | try CMod:handle_call(Call, CState) of 134 | {ok, _} = R -> R; 135 | {error, _} = R -> R; 136 | Bad -> {error, iolist_to_binary( 137 | io_lib:format("Server error[1]: ~p", 138 | [{handler_bad_ret,Bad}]))} 139 | catch _:Reason -> 140 | {error, iolist_to_binary( 141 | io_lib:format("Server error[2]: ~p", 142 | [{handler_exit, Reason}]))} 143 | end, 144 | try eavro_rpc_proto:encode_response(MsgSchema, Ret) 145 | catch 146 | _:Reason1 -> 147 | eavro_rpc_proto:encode_response( 148 | MsgSchema, 149 | {error, 150 | iolist_to_binary( 151 | io_lib:format("Server error[3]: ~p", 152 | [{handler_result_encode_failed, Reason1}]))}) 153 | end. 154 | %% -------------------------------------------------------------------- 155 | %% Unused behaviour callbacks 156 | %% -------------------------------------------------------------------- 157 | handle_call(_Request, _From, State) -> {reply, ok, State}. 158 | handle_cast(_Msg, State) -> {noreply, State}. 159 | terminate(_Reason, _State) -> ok. 160 | code_change(_OldVsn, State, _Extra) -> {ok, State}. 161 | 162 | %% -------------------------------------------------------------------- 163 | %% Private 164 | %% -------------------------------------------------------------------- 165 | 166 | callback_init(Opts) -> 167 | CMod = proplists:get_value(callback_module, Opts), 168 | CMod /= undefined orelse exit(no_callback_module), 169 | COpts = proplists:get_value(callback_opts, Opts, []), 170 | {ok, CState} = CMod:init(COpts), 171 | {CMod, CState}. 172 | 173 | 174 | active_once(#state{ socket = Soc, 175 | transport = Tsp}) -> 176 | Tsp:setopts(Soc, [{active, once}]). 177 | 178 | tcp_reply(#state{ socket = Soc, 179 | transport = Tsp}, Data) -> 180 | Tsp:send(Soc, Data). 181 | 182 | -spec make_frame( Data :: iolist()) -> iolist(). 183 | make_frame(Data) -> 184 | Size = iolist_size(Data), 185 | [<>, Data]. 186 | 187 | make_frame_sequence(Serial, EncodedCalls) when is_integer(Serial), 188 | is_list(EncodedCalls) -> 189 | SequenceLength = length(EncodedCalls), 190 | [<> | [ make_frame(Call) || Call <- EncodedCalls ] ]. 191 | -------------------------------------------------------------------------------- /src/eavro_zcodec.erl: -------------------------------------------------------------------------------- 1 | -module(eavro_zcodec). 2 | 3 | 4 | %% API 5 | -export([ %encode/2, 6 | decode/2, 7 | decode/3, decode_seq/4 ]). 8 | 9 | -export([ varint_encode/1, 10 | varint_decode/2]). 11 | 12 | -include("eavro.hrl"). 13 | 14 | -define(B(V,S), <>). 15 | -define(EXPAND(Tail), if is_function(Tail, 0) -> 16 | Tail(); 17 | is_list(Tail) -> Tail end). 18 | 19 | %% 20 | %% Decoding functins 21 | %% 22 | 23 | decode(Type, Buff ) -> 24 | decode(Type, Buff, undefined). 25 | 26 | -spec decode( Type :: avro_type(), 27 | Buff :: zlists:zlist(binary()), 28 | Hook :: undefined | decode_hook() ) -> 29 | { Value :: term(), Buff :: zlists:zlist(binary()) }. 30 | 31 | decode(#avro_record{fields = Fields} = Type, Buff, Hook) -> 32 | {FieldsValues, Buff2} = 33 | lists:foldl( 34 | fun({_FName, FType}, {Vals0, Buff0}) -> 35 | {Val, Buff1} = decode(FType, Buff0, Hook), 36 | {[ Val | Vals0 ], Buff1} 37 | end, {[], Buff}, Fields), 38 | { decode_hook(Hook, Type, lists:reverse(FieldsValues) ), Buff2}; 39 | decode(#avro_enum{symbols=Symbols} = Type, Buff, Hook) -> 40 | {ZeroBasedIndex, Buff1} = decode(int, Buff, Hook), 41 | Symbol = lists:nth(ZeroBasedIndex + 1, Symbols), 42 | { decode_hook(Hook, Type, Symbol ), Buff1}; 43 | decode(#avro_map{values=Type} = CType, Buff, Hook) -> 44 | decode_blocks(CType, Type, [], Buff, Hook, fun map_entry_decoder/3); 45 | decode(#avro_array{items=Type} = CType, Buff, Hook) -> 46 | decode_blocks(CType, Type, [], Buff, Hook, fun decode/3); 47 | decode(#avro_fixed{size=Size}=Type, Buff, Hook) -> 48 | [?B(Val,Size) | Tail] = expand_binary(Buff, Size), 49 | {decode_hook(Hook, Type, Val), ?EXPAND(Tail)}; 50 | decode(Type, Buff, Hook) when Type == string orelse Type == bytes -> 51 | {ByteSize, Buff1} = decode(long, Buff, undefined), 52 | [?B(String,ByteSize) | Buff2] = expand_binary(Buff1,ByteSize), 53 | {decode_hook(Hook, Type, String), ?EXPAND(Buff2)}; 54 | decode(int = Type, Buff, Hook) -> 55 | {<>, Buff1} = varint_decode(int, Buff), 56 | Int = zigzag_decode(int, Z), 57 | {decode_hook(Hook, Type, Int), ?EXPAND(Buff1)}; 58 | decode(long = Type, Buff, Hook) -> 59 | {<>, Buff1} = varint_decode(long, Buff), 60 | Long = zigzag_decode(long, Z), 61 | {decode_hook(Hook, Type, Long), ?EXPAND(Buff1)}; 62 | decode(float = Type, Buff, Hook) -> 63 | [ <> | Buff1] = expand_binary(Buff,4), 64 | {decode_hook(Hook, Type, Float), ?EXPAND(Buff1)}; 65 | decode(double = Type, Buff, Hook) -> 66 | [<> | Buff1] = expand_binary(Buff,8), 67 | {decode_hook(Hook, Type, Double), ?EXPAND(Buff1)}; 68 | decode(boolean = Type, Buff, Hook) -> 69 | [<<0:7,B:1>> | Buff1] = expand_binary(Buff,1), 70 | {decode_hook(Hook, Type, case B of 0 -> false; 1 -> true end), ?EXPAND(Buff1)}; 71 | decode(null = Type, Buff, Hook) -> 72 | {decode_hook(Hook, Type, <<>>), Buff}; 73 | decode(Union, Buff, Hook) when is_atom(hd(Union)) -> 74 | {Idx, Buff1} = decode(long, Buff), 75 | Type = lists:nth(Idx + 1, Union), 76 | {Val, Buff2} = decode(Type, Buff1, Hook), 77 | { if Type == null -> null; 78 | true -> {Type, Val} 79 | end, Buff2}. 80 | 81 | 82 | map_entry_decoder(Type, Buff, Hook) -> 83 | {K, Buff1} = decode(string,Buff), 84 | {V, Buff2} = decode(Type,Buff1,Hook), 85 | { {K, V}, Buff2}. 86 | 87 | decode_blocks(CollectionType, ItemType, Blocks, Buff, Hook, ItemDecoder)-> 88 | %% Decode block item count 89 | {Count_, Buff1} = decode(long, Buff), 90 | %% Analyze count: there is a special behavior for count < 0 91 | {Count, Buff2} = 92 | if Count_ < 0 -> 93 | %% When count <0 there is a block size, which we are do not use here 94 | {_BlockSize, Buff_} = decode(long, Buff1), 95 | {-Count_, Buff_}; 96 | true -> 97 | {Count_, Buff1} 98 | end, 99 | %% Decode block items 100 | {Block, Buff3} = decodeN(Count, ItemType, Buff2, Hook, ItemDecoder), 101 | case Block of 102 | [] -> 103 | {decode_hook(Hook, CollectionType, Blocks), Buff3}; 104 | _ -> 105 | decode_blocks(CollectionType, ItemType,[Block|Blocks],Buff3,Hook,ItemDecoder) 106 | end. 107 | 108 | decode_seq(_Type, _Hook, _Decoder, [<<>>]) -> 109 | []; 110 | decode_seq(_Type, _Hook, _Decoder, []) -> 111 | []; 112 | decode_seq(Type, Hook, Decoder, ZBytes) -> 113 | {H, ZBytes1} = Decoder(Type, ZBytes, Hook), 114 | [H | fun() -> decode_seq(Type, Hook, Decoder, ?EXPAND(ZBytes1)) end]. 115 | 116 | decodeN(0, _Type, Buff, _Hook, _Decoder) -> 117 | {[], Buff}; 118 | decodeN(N, Type, Buff, Hook, Decoder) -> 119 | {H, Buff1} = Decoder(Type, Buff, Hook), 120 | {Tail, Buff2} = decodeN(N - 1 , Type, Buff1, Hook, Decoder), 121 | {[ H | Tail ], Buff2}. 122 | 123 | decode_hook(undefined, _Type, Val) -> 124 | Val; 125 | decode_hook(Hook, Type, Val) when is_function(Hook,2) -> 126 | Hook(Type, Val). 127 | 128 | %% Internal functions 129 | 130 | %% ZigZag encode/decode 131 | %% https://developers.google.com/protocol-buffers/docs/encoding?&csw=1#types 132 | zigzag_decode(int, ZigInt) -> 133 | (ZigInt bsr 1) bxor -(ZigInt band 1); 134 | zigzag_decode(long, ZigInt) -> 135 | (ZigInt bsr 1) bxor -(ZigInt band 1). 136 | 137 | 138 | %% Variable-length format 139 | %% http://lucene.apache.org/core/3_5_0/fileformats.html#VInt 140 | 141 | %% 32 bit encode 142 | varint_encode(<<0:32>>) -> <<0>>; 143 | varint_encode(<<0:25, B1:7>>) -> <>; 144 | varint_encode(<<0:18, B1:7, B2:7>>) -> 145 | <<1:1, B2:7, B1>>; 146 | varint_encode(<<0:11, B1:7, B2:7, B3:7>>) -> 147 | <<1:1, B3:7, 1:1, B2:7, B1>>; 148 | varint_encode(<<0:4, B1:7, B2:7, B3:7, B4:7>>) -> 149 | <<1:1, B4:7, 1:1, B3:7, 1:1, B2:7, B1>>; 150 | varint_encode(<>) -> 151 | <<1:1, B5:7, 1:1, B4:7, 1:1, B3:7, 1:1, B2:7, B1>>; 152 | 153 | %% 64 bit encode 154 | varint_encode(<<0:64>>) -> <<0>>; 155 | varint_encode(<<0:57, B1:7>>) -> <>; 156 | varint_encode(<<0:50, B1:7, B2:7>>) -> 157 | <<1:1, B2:7, B1>>; 158 | varint_encode(<<0:43, B1:7, B2:7, B3:7>>) -> 159 | <<1:1, B3:7, 1:1, B2:7, B1>>; 160 | varint_encode(<<0:36, B1:7, B2:7, B3:7, B4:7>>) -> 161 | <<1:1, B4:7, 1:1, B3:7, 1:1, B2:7, B1>>; 162 | varint_encode(<<0:29, B1:7, B2:7, B3:7, B4:7, B5:7>>) -> 163 | <<1:1, B5:7, 1:1, B4:7, 1:1, B3:7, 1:1, B2:7, B1>>; 164 | varint_encode(<<0:22, B1:7, B2:7, B3:7, B4:7, B5:7, B6:7>>) -> 165 | <<1:1, B6:7, 1:1, B5:7, 1:1, B4:7, 1:1, B3:7, 1:1, B2:7, B1>>; 166 | varint_encode(<<0:15, B1:7, B2:7, B3:7, B4:7, B5:7, B6:7, B7:7>>) -> 167 | <<1:1, B7:7, 1:1, B6:7, 1:1, B5:7, 1:1, B4:7, 1:1, B3:7, 1:1, B2:7, B1>>; 168 | varint_encode(<<0:8, B1:7, B2:7, B3:7, B4:7, B5:7, B6:7, B7:7, B8:7>>) -> 169 | <<1:1, B8:7, 1:1, B7:7, 1:1, B6:7, 1:1, B5:7, 1:1, B4:7, 1:1, B3:7, 1:1, B2:7, B1>>; 170 | varint_encode(<<0:1, B1:7, B2:7, B3:7, B4:7, B5:7, B6:7, B7:7, B8:7, B9:7>>) -> 171 | <<1:1, B9:7, 1:1, B8:7, 1:1, B7:7, 1:1, B6:7, 1:1, B5:7, 1:1, B4:7, 1:1, B3:7, 1:1, B2:7, B1>>; 172 | varint_encode(<>) -> 173 | <<1:1, B10:7, 1:1, B9:7, 1:1, B8:7, 1:1, B7:7, 1:1, B6:7, 1:1, B5:7, 1:1, B4:7, 1:1, B3:7, 1:1, B2:7, B1>>. 174 | 175 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 176 | varint_decode(Type, ZBuff) -> 177 | [Buff | Tail] = expand_binary(ZBuff, 10), 178 | {Vint, Buff1} = varint_decode_(Type, Buff), 179 | {Vint, [Buff1 | Tail]}. 180 | 181 | varint_decode_(int, <<1:1, B5:7, 1:1, B4:7, 1:1, B3:7, 1:1, 182 | B2:7, 0:4, B1:4, Bytes/binary>>) -> 183 | {<>, Bytes}; 184 | varint_decode_(long, <<1:1, B10:7, 1:1, B9:7, 1:1, B8:7, 1:1, B7:7, 185 | 1:1, B6:7, 1:1, B5:7, 1:1, B4:7, 1:1, B3:7, 186 | 1:1, B2:7, 0:7, B1:1, Bytes/binary>>) -> 187 | {<>, Bytes}; 188 | varint_decode_(Type, Bytes) -> 189 | {DecBits, RestBytes} = varint_decode(Bytes), 190 | Base = case Type of 191 | int -> 32; 192 | long -> 64 193 | end, 194 | LeadingZeroBits = Base - bit_size(DecBits), 195 | {<<0:LeadingZeroBits/integer, DecBits/bitstring>>, RestBytes}. 196 | 197 | varint_decode(<<0:1, X:7, Bytes/binary>>) -> 198 | {<>, Bytes}; 199 | varint_decode(<<1:1,X:7, Bytes/binary>>) -> 200 | {DecBits, Bytes1} = varint_decode(Bytes), 201 | {<>, Bytes1}. 202 | 203 | expand_binary(ZBuff, Size) -> 204 | zlists_file:expand_binary(?EXPAND(ZBuff), Size). 205 | -------------------------------------------------------------------------------- /test/data/eavro-rpc-test-servers-1.7.5-SNAPSHOT-jar-with-dependencies.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SIfoxDevTeam/eavro/065e4766592b83f52395b500ac2afc624439e5bf/test/data/eavro-rpc-test-servers-1.7.5-SNAPSHOT-jar-with-dependencies.jar -------------------------------------------------------------------------------- /test/data/flume.avdl: -------------------------------------------------------------------------------- 1 | /** 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | @namespace("org.apache.flume.source.avro") 21 | 22 | protocol AvroSourceProtocol { 23 | 24 | enum Status { 25 | OK, FAILED, UNKNOWN 26 | } 27 | 28 | record AvroFlumeEvent { 29 | map headers; 30 | bytes body; 31 | } 32 | 33 | Status append( AvroFlumeEvent event ); 34 | 35 | Status appendBatch( array events ); 36 | 37 | } 38 | -------------------------------------------------------------------------------- /test/data/flume.avpr: -------------------------------------------------------------------------------- 1 | { 2 | "doc": "* Licensed to the Apache Software Foundation (ASF) under one\n * or more contributor license agreements. See the NOTICE file\n * distributed with this work for additional information\n * regarding copyright ownership. The ASF licenses this file\n * to you under the Apache License, Version 2.0 (the\n * \"License\"); you may not use this file except in compliance\n * with the License. You may obtain a copy of the License at\n *\n * http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing,\n * software distributed under the License is distributed on an\n * \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n * KIND, either express or implied. See the License for the\n * specific language governing permissions and limitations\n * under the License.", 3 | "messages": { 4 | "append": { 5 | "request": [ 6 | { 7 | "name": "event", 8 | "type": "AvroFlumeEvent" 9 | } 10 | ], 11 | "response": "Status" 12 | }, 13 | "appendBatch": { 14 | "request": [ 15 | { 16 | "name": "events", 17 | "type": { 18 | "items": "AvroFlumeEvent", 19 | "type": "array" 20 | } 21 | } 22 | ], 23 | "response": "Status" 24 | } 25 | }, 26 | "namespace": "org.apache.flume.source.avro", 27 | "protocol": "AvroSourceProtocol", 28 | "types": [ 29 | { 30 | "name": "Status", 31 | "symbols": [ 32 | "OK", 33 | "FAILED", 34 | "UNKNOWN" 35 | ], 36 | "type": "enum" 37 | }, 38 | { 39 | "fields": [ 40 | { 41 | "name": "headers", 42 | "type": { 43 | "type": "map", 44 | "values": "string" 45 | } 46 | }, 47 | { 48 | "name": "body", 49 | "type": "bytes" 50 | } 51 | ], 52 | "name": "AvroFlumeEvent", 53 | "type": "record" 54 | } 55 | ] 56 | } 57 | -------------------------------------------------------------------------------- /test/data/issue_11.avsc: -------------------------------------------------------------------------------- 1 | { 2 | "type": "record", 3 | "namespace": "com.example.foo", 4 | "name": "Click", 5 | "fields": [ 6 | {"name": "url", "type": "string"}, 7 | {"name": "clicked_at", "type": "string"}, 8 | {"name": "remote_ip","type": "string"}, 9 | {"name": "http_headers", "type": [{"type": "map", "values": "string"}, "null"]}, 10 | {"name": "request_type", "type": ["string", "null"]}, 11 | {"name": "destination", "type": "string"} 12 | ] 13 | } -------------------------------------------------------------------------------- /test/data/mail.avpr: -------------------------------------------------------------------------------- 1 | {"namespace": "example.proto", 2 | "protocol": "Mail", 3 | 4 | "types": [ 5 | {"name": "Message", "type": "record", 6 | "fields": [ 7 | {"name": "to", "type": "string"}, 8 | {"name": "from", "type": "string"}, 9 | {"name": "body", "type": "string"} 10 | ] 11 | } 12 | ], 13 | 14 | "messages": { 15 | "send": { 16 | "request": [{"name": "message", "type": "Message"}], 17 | "response": "string" 18 | } 19 | } 20 | } 21 | -------------------------------------------------------------------------------- /test/data/transformer.avsc: -------------------------------------------------------------------------------- 1 | { 2 | "type" : "record", 3 | "name" : "transformer_schema", 4 | "namespace" : "vvorobyov", 5 | "fields" : 6 | [ { "name" : "id", "type" : { "name" : "ID", "type" : "fixed", "size" : 4} }, 7 | { "name" : "fname", "type" : "string" }, 8 | { "name" : "lname", "type" : "string" }, 9 | { "name" : "age", "type" : "int" }, 10 | { "name" : "is_autobot", "type" : "boolean" }, 11 | { "name" : "location", 12 | "type" : {"name" : "Location", 13 | "type" : "enum", 14 | "symbols" : 15 | ["Earth","Moon","March","Venus","Jupiter", 16 | "Mercury","Titan","Io","Europe","Ganimed", 17 | "Callisto","Pluton"] 18 | } 19 | }, 20 | {"name" : "equipment", 21 | "type" : 22 | { "type": "map", 23 | "values": { "name": "Equipment", 24 | "type" : "record", 25 | "fields" : [{ "name" : "name", "type" : "string" }, 26 | { "name" : "weight", "type" : "int" } ] 27 | } 28 | } 29 | }, 30 | {"name" : "friends", 31 | "type" : { "type" : "array", 32 | "items" : { "name" : "Ref", "type" : "fixed", "size" : 4} }}, 33 | {"name" : "energy_level", "type" : ["int", "string"]} 34 | ], 35 | "doc" : "A basic schema for storing Transformers" 36 | } 37 | -------------------------------------------------------------------------------- /test/data/transformers-deflated.avro: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SIfoxDevTeam/eavro/065e4766592b83f52395b500ac2afc624439e5bf/test/data/transformers-deflated.avro -------------------------------------------------------------------------------- /test/data/transformers-deflated2.avro: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SIfoxDevTeam/eavro/065e4766592b83f52395b500ac2afc624439e5bf/test/data/transformers-deflated2.avro -------------------------------------------------------------------------------- /test/data/transformers.avro: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SIfoxDevTeam/eavro/065e4766592b83f52395b500ac2afc624439e5bf/test/data/transformers.avro -------------------------------------------------------------------------------- /test/data/transformers.json: -------------------------------------------------------------------------------- 1 | {"id": "0000", "fname" : "Optimus", "lname" : "Prime", "age" : 1000, "is_autobot" :true, "location": "Earth", "equipment" : {"weapon" : { "name": "SuperBlaster", "weight" : 33 } }, "friends" : [ "0001", "0002"], "energy_level" : { "int" : 234} } 2 | {"id": "0001", "fname" : "Nexus", "lname" : "Prime", "age" : 1001, "is_autobot" :true, "location": "Moon" , "equipment" : {"weapon" : { "name": "PlasmaCanon", "weight" : 100 } }, "friends" : [], "energy_level" : { "int" : 234}} 3 | {"id": "0002", "fname" : "Zeta", "lname" : "Prime", "age" : 2000, "is_autobot" :true, "location": "March", "equipment" : {"weapon" : { "name": "LazerCanon", "weight" : 60 } } , "friends" : [], "energy_level" : { "int" : 234}} 4 | {"id": "0003", "fname" : "Rodmus", "lname" : "Prime", "age" : 1000, "is_autobot" :true, "location": "Venus", "equipment" : {"weapon" : { "name": "RocketLauncher", "weight" : 200 } } , "friends" : [], "energy_level" : { "int" : 234}} 5 | {"id": "0004", "fname" : "Optimus1", "lname" : "Prime", "age" : 1000, "is_autobot" :true, "location": "Jupiter", "equipment" : {"weapon" : { "name": "Blaster", "weight" : 33 } } , "friends" : [], "energy_level" : { "int" : 234}} 6 | {"id": "0005", "fname" : "Nexus1", "lname" : "Prime", "age" : 1001, "is_autobot" :true, "location": "Mercury", "equipment" : {"weapon" : { "name": "Blaster", "weight" : 33 } } , "friends" : [], "energy_level" : { "int" : 234}} 7 | {"id": "0006", "fname" : "Zeta1", "lname" : "Prime", "age" : 2000, "is_autobot" :true, "location": "Titan", "equipment" : {"weapon" : { "name": "Blaster", "weight" : 33 } } , "friends" : [], "energy_level" : {"string" : "very-much"} } 8 | {"id": "0007", "fname" : "Rodmus1", "lname" : "Prime", "age" : 1000, "is_autobot" :true, "location": "Io", "equipment" : {"weapon" : { "name": "Blaster", "weight" : 33 } } , "friends" : [], "energy_level" : { "int" : 234}} 9 | {"id": "0008", "fname" : "Optimus2", "lname" : "Prime", "age" : 1000, "is_autobot" :true, "location": "Europe", "equipment" : {"weapon" : { "name": "Blaster", "weight" : 33 } } , "friends" : [], "energy_level" : { "int" : 234}} 10 | {"id": "0009", "fname" : "Nexus2", "lname" : "Prime", "age" : 1001, "is_autobot" :true, "location": "Ganimed", "equipment" : {"weapon" : { "name": "Blaster", "weight" : 33 } } , "friends" : [], "energy_level" : { "int" : 234}} 11 | {"id": "000A", "fname" : "Zeta3", "lname" : "Prime", "age" : 2000, "is_autobot" :true, "location": "Callisto", "equipment" : {"weapon" : { "name": "NuclearGun", "weight" : 70 } } , "friends" : [], "energy_level" : { "int" : 234}} 12 | {"id": "000B", "fname" : "Rodmus3", "lname" : "Prime", "age" : 1000, "is_autobot" :true, "location": "Pluton", "equipment" : {"weapon" : { "name": "ElectroHammer", "weight" : 180 } } , "friends" : [], "energy_level" : {"string": "infinity"} } 13 | -------------------------------------------------------------------------------- /test/data/twitter.avro: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SIfoxDevTeam/eavro/065e4766592b83f52395b500ac2afc624439e5bf/test/data/twitter.avro -------------------------------------------------------------------------------- /test/data/twitter.avsc: -------------------------------------------------------------------------------- 1 | { 2 | "type" : "record", 3 | "name" : "twitter_schema", 4 | "namespace" : "com.miguno.avro", 5 | "fields" : [ { 6 | "name" : "username", 7 | "type" : "string", 8 | "doc" : "Name of the user account on Twitter.com" 9 | }, { 10 | "name" : "tweet", 11 | "type" : "string", 12 | "doc" : "The content of the user's Twitter message" 13 | }, { 14 | "name" : "timestamp", 15 | "type" : "long", 16 | "doc" : "Unix epoch time in seconds" 17 | } ], 18 | "doc:" : "A basic schema for storing Twitter messages" 19 | } 20 | -------------------------------------------------------------------------------- /test/eavro_codec_tests.erl: -------------------------------------------------------------------------------- 1 | -module(eavro_codec_tests). 2 | 3 | -include("eavro.hrl"). 4 | 5 | -include_lib("eunit/include/eunit.hrl"). 6 | 7 | %%========================================================================================= 8 | %% TESTS 9 | %%========================================================================================= 10 | 'ISSUE_11_2nd_part_test'() -> 11 | Union = [string, null], 12 | EncIoList = eavro_codec:encode(Union, null), 13 | ?assertMatch( 14 | {null, <<>> = _Rest}, 15 | eavro_codec:decode(Union, EncIoList)), 16 | EncIoList1 = eavro_codec:encode(Union, {string, <<"Ho-ho-ho!">>}), 17 | ?assertMatch( 18 | { {string, <<"Ho-ho-ho!">>}, <<>> = _Rest}, 19 | eavro_codec:decode(Union, EncIoList1)). 20 | 21 | integer_codec_test_() -> 22 | {inorder, 23 | [{caption( "Encode/Decode ~p as '~s'.", [N, Type]), 24 | fun() -> 25 | ?assertMatch( 26 | {N, <<>>}, 27 | eavro_codec:decode(Type, eavro_codec:encode(Type, N)) ) 28 | end} || {Type, Base, Gen} <- [ {int, 32, fun generate_int32_test_values/0}, 29 | {long, 64, fun generate_int64_test_values/0}], 30 | <> <- Gen() ]}. 31 | 32 | varint_codec_test_() -> 33 | {inorder, 34 | [{"Check varint codec of int32 when no leading zeroes.", 35 | fun() -> 36 | FF32 = 16#FFFFFFFF, 37 | EncBytes = <<_:5/binary>> = eavro_codec:varint_encode(<>), 38 | {<>, <<>>} = eavro_codec:varint_decode(int, EncBytes) 39 | end}, 40 | {"Check varint codec of int64 when no leading zeroes.", 41 | fun() -> 42 | FF64 = 16#FFFFFFFFFFFFFFFF, 43 | EncBytes = <<_:10/binary>> = eavro_codec:varint_encode(<>), 44 | {<>, <<>>} = eavro_codec:varint_decode(long, EncBytes) 45 | end}, 46 | {"Check varint codec of int32 when no leading zeroes with some irregularity in first byte.", 47 | fun() -> 48 | Int32 = <<2#10101110:8,16#FFFFFF:24>>, 49 | EncBytes = <<_:5/binary>> = eavro_codec:varint_encode(Int32), 50 | {Int32, <<>>} = eavro_codec:varint_decode(int, EncBytes) 51 | end}, 52 | {"Check varint codec of int64 when no leading zeroes with some irregularity in first byte.", 53 | fun() -> 54 | Int64 = <<2#10101110:8,16#FFFFFFFFFFFFFFFF:56>>, 55 | EncBytes = <<_:10/binary>> = eavro_codec:varint_encode(Int64), 56 | {Int64, <<>>} = eavro_codec:varint_decode(long, EncBytes) 57 | end}] ++ 58 | [ {caption("Check varint codec of int32: ~p.", [Int32]), 59 | fun() -> 60 | EncBytes = eavro_codec:varint_encode(Int32), 61 | {Int32, <<>>} = eavro_codec:varint_decode(int, EncBytes) 62 | end}|| Int32 <- generate_int32_test_values()] ++ 63 | [{caption("Check varint codec of int64: ~p.", [Int64]), 64 | fun() -> 65 | EncBytes = eavro_codec:varint_encode(Int64), 66 | {Int64, <<>>} = eavro_codec:varint_decode(long, EncBytes) 67 | end} || Int64 <- generate_int64_test_values()]}. 68 | 69 | %% 70 | %% Test app domain record. 71 | %% 72 | -record('Person', { id, fname, lname, age, is_autobot, 73 | is_desepticon, energy_level, home, clan}). 74 | -record('GalaxyCoord', {x,y,z}). 75 | 76 | avro_record_codec_test_() -> 77 | {inorder, 78 | [{ "Encode/Decode avro record with all types of fields and nested avro record.", 79 | fun()-> 80 | Type = #avro_record{ 81 | name = 'Person', 82 | fields = [{id, long}, 83 | {energy_level, float}, 84 | {home, #avro_record{ 85 | name = 'GalaxyCoord', 86 | fields = [{x,double}, 87 | {y,double}, 88 | {z,double}] } }, 89 | {fname, string}, 90 | {lname, string}, 91 | {age, int}, 92 | {is_autobot, boolean}, 93 | {is_desepticon, boolean}, 94 | {clan, #avro_enum{ 95 | name = 'Clan', 96 | symbols = ['Autobots', 'Desepticons']} } 97 | ]}, 98 | Hook = fun(#avro_record{ name = 'Person'} = _Type, 99 | [Id, Ener, Home, Fname, Lname, Age, IsAu, IsDe, Clan]) -> 100 | #'Person'{id = Id, energy_level = Ener, home = Home, 101 | fname = Fname, lname = Lname, age = Age, 102 | is_autobot = IsAu, is_desepticon = IsDe, 103 | clan = Clan}; 104 | (#avro_record{ name = 'GalaxyCoord'} = _Type, 105 | [X,Y,Z]) -> #'GalaxyCoord'{x = X, y = Y, z = Z}; 106 | ( string, B) -> binary_to_list(B); 107 | (_, AsIs) -> AsIs 108 | end, 109 | GalaxyCoord = [X,Y,Z] = [ 2342.34523, 110 | 675322341.422422324, 111 | 242252.56473457345], 112 | Encoded = eavro_codec:encode( 113 | Type, [ 16#AABBCCDDEEFF0011, 23423.5674 , 114 | GalaxyCoord, 115 | <<"Optimus">>, <<"Prime">>, 1000, true, false, 116 | 'Autobots']), 117 | Decoded = eavro_codec:decode(Type, Encoded, Hook), 118 | ?assertMatch( {#'Person'{ fname = "Optimus", 119 | lname = "Prime", 120 | age = 1000, 121 | home = #'GalaxyCoord'{ x = X, y = Y, z = Z}, 122 | clan = 'Autobots', 123 | is_autobot = true, 124 | is_desepticon = false }, <<>>}, Decoded) 125 | end} 126 | ]}. 127 | 128 | avro_map_codec_test() -> 129 | Type = #avro_map{values = long}, 130 | Map = lists:sort([ {<<"k1">>, 1}, {<<"k2">>, 2}, {<<"k3">>, 3} ]), 131 | Encoded = eavro_codec:encode(Type, Map), 132 | { DecodedMapBlocks, <<>>} = eavro_codec:decode(Type, Encoded), 133 | DecodedMap = lists:flatten(DecodedMapBlocks), 134 | ?assertMatch( Map, lists:sort(DecodedMap)). 135 | 136 | avro_fixed_codec_test() -> 137 | Type = #avro_fixed{size = 5}, 138 | Fixed = <<0,1,2,3,4>>, 139 | Encoded = eavro_codec:encode(Type, Fixed), 140 | ?assertMatch({ Fixed, <<>>}, eavro_codec:decode(Type, Encoded) ). 141 | 142 | avro_array_codec_test() -> 143 | Type = #avro_array{items = string}, 144 | Array = [<<"Alpha">>, <<"Beta">>, <<"Gamma">>, <<"Delta">>, <<"Epsilon">>, <<"Dzeta">>], 145 | Encoded = eavro_codec:encode(Type, Array), 146 | ?assertMatch({ [Array], <<>>}, eavro_codec:decode(Type, Encoded) ). 147 | 148 | avro_union_codec_test() -> 149 | Union = [int,string], 150 | ?assertMatch({ {int, 137}, <<>>}, 151 | eavro_codec:decode( 152 | Union, 153 | eavro_codec:encode(Union, {int, 137}) ) ), 154 | ?assertMatch({ {string, <<"NaN">>}, <<>>}, 155 | eavro_codec:decode( 156 | Union, 157 | eavro_codec:encode(Union, {string, <<"NaN">>}) ) ). 158 | 159 | avro_array_of_union_codec_test() -> 160 | RecType = #avro_record{ name = some_struct, 161 | fields = [{field1, long}]}, 162 | Type = #avro_array{ 163 | items = 164 | [int, string, RecType] }, 165 | ?assertMatch( 166 | { [ _Block = [ {int, 1}, {int, 2}, 167 | {string, <<"very much">>}, 168 | {RecType, [137]} ] ], <<>>}, 169 | eavro_codec:decode( 170 | Type, 171 | eavro_codec:encode( 172 | Type, 173 | [ {int, 1}, 174 | {int, 2}, 175 | {string, <<"very much">>}, 176 | {RecType, [137]} ]) ) ). 177 | 178 | %%========================================================================================= 179 | %% HELPER FUNCTIONS 180 | %%========================================================================================= 181 | 182 | caption(Fmt, Args) -> 183 | lists:flatten(io_lib:format(Fmt, Args)). 184 | 185 | generate_int32_test_values() -> 186 | [<<0:32>>]++[ <<(2#11001000 bsl (N*8)):32>>|| N <- lists:seq(0,3)]. 187 | 188 | generate_int64_test_values() -> 189 | [<<0:64>>] ++ [ <<(2#11001000 bsl (N*8)):64>>|| N <- lists:seq(0,7)]. 190 | -------------------------------------------------------------------------------- /test/eavro_ocf_tests.erl: -------------------------------------------------------------------------------- 1 | -module(eavro_ocf_tests). 2 | 3 | -include("eavro.hrl"). 4 | 5 | -include_lib("eunit/include/eunit.hrl"). 6 | 7 | std_hook(#avro_array{}, Blocks) -> 8 | [ Val || Block <- Blocks, Val <- Block ]; 9 | std_hook(#avro_map{}, Blocks) -> 10 | [ Val || Block <- Blocks, Val <- Block ]; 11 | std_hook(_,V) -> V. 12 | 13 | parse_twitter_ocf_test() -> 14 | Data = eavro:read_ocf("test/data/twitter.avro"), 15 | ?assertMatch( 16 | {_Schema = #avro_record{}, 17 | _Blocks = 18 | [_Block = 19 | [ _Rec1 = [<<"miguno">>,<<"Rock: Nerf paper, scissors is fine.">>, 20 | 1366150681], 21 | _Rec2 = [<<"BlizzardCS">>,<<"Works as intended. Terran is IMBA.">>, 22 | 1366154481] 23 | ] 24 | ]}, Data). 25 | 26 | parse_twitter_ocf_with_hook_test() -> 27 | {_Schema = #avro_record{}, 28 | Data } = eavro:read_ocf("test/data/twitter.avro", fun simple_hook/2), 29 | ?assertMatch( 30 | [{"miguno","Rock: Nerf paper, scissors is fine.",1366150681}, 31 | {"BlizzardCS","Works as intended. Terran is IMBA.",1366154481} ], 32 | lists:flatten(Data)). 33 | 34 | parse_transformers_ocf_test() -> 35 | Data = eavro:read_ocf("test/data/transformers.avro"), 36 | ?assertMatch( 37 | {_Schema = #avro_record{}, 38 | _Blocks = 39 | [_Block = 40 | [ _Rec1 = [<<"0000">>, <<"Optimus">>,<<"Prime">> |_] | _ ] 41 | ]}, Data). 42 | 43 | parse_transformers_ocf_deflated_test() -> 44 | Data = eavro_ocf_zcodec:read_ocf_with( 45 | "test/data/transformers-deflated.avro", 46 | fun(Schema, ZInstances) -> 47 | { Schema, zlists:expand(ZInstances) } 48 | end), 49 | ?assertMatch( 50 | {_Schema = #avro_record{}, 51 | _Instances = 52 | [ 53 | _Instance = [<<"0000">>, <<"Optimus">>,<<"Prime">> |_] | _ 54 | ]}, Data). 55 | 56 | ocf_read_write_read_test_() -> 57 | Test = 58 | fun(F1, F2, Opts) -> 59 | file:delete(F2), 60 | ?assert(not filelib:is_regular(F2)), 61 | ok = eavro_ocf_zcodec:read_ocf_with( 62 | F1, 63 | fun(Schema, ZInstances) -> 64 | eavro_ocf_zcodec:write_ocf_file( 65 | F2, 66 | Schema, ZInstances, Opts) 67 | end, 68 | fun std_hook/2 ), 69 | Cnt = eavro_ocf_zcodec:read_ocf_with( 70 | F2, 71 | fun(_Schema, ZInstances) -> 72 | zlists:count(ZInstances) 73 | end), 74 | ?assert( Cnt > 0 ) 75 | end, 76 | {inorder, 77 | [{Title, 78 | fun() -> Test(F1,F2,Opts) end} || 79 | {Title, F1, F2, Opts} <- 80 | [{"Read plain, write plain, read result.", 81 | "test/data/transformers.avro", 82 | "test/transformers.avro", 83 | []}, 84 | {"Read plain, write plain multiblocked, read multiblocked result.", 85 | "test/data/transformers.avro", 86 | "test/transformers-mult.avro", 87 | [{block_size, 128}]}, 88 | {"Read deflated, write plain, read result.", 89 | "test/data/transformers-deflated.avro", 90 | "test/transformers-infl.avro", 91 | []}, 92 | {"Read deflated, write deflated, read deflated result.", 93 | "test/data/transformers-deflated.avro", 94 | "test/transformers-deflated.avro", 95 | [{codec, deflated}]}, 96 | {"Read deflated multiblocked, write deflated, read deflated result.", 97 | "test/data/transformers-deflated2.avro", 98 | "test/transformers-deflated2.avro", 99 | [{codec, deflated}]}, 100 | {"Read deflated multiblocked, write deflated mult, read deflated result.", 101 | "test/data/transformers-deflated2.avro", 102 | "test/transformers-deflated2m.avro", 103 | [{codec, deflated},{block_size, 128}]} 104 | ] ]}. 105 | %% 106 | %% Private functions 107 | %% 108 | 109 | simple_hook(#avro_record{name='twitter_schema'}, 110 | [Name, Tweet, Timestamp]) -> 111 | {Name, Tweet, Timestamp}; 112 | simple_hook(string, V) -> binary_to_list(V); 113 | simple_hook(_, V) -> V. 114 | -------------------------------------------------------------------------------- /test/eavro_rpc_proto_tests.erl: -------------------------------------------------------------------------------- 1 | -module(eavro_rpc_proto_tests). 2 | 3 | -include("eavro.hrl"). 4 | 5 | -include_lib("eunit/include/eunit.hrl"). 6 | 7 | 8 | -define(assertMatch_(Exp), fun(Val) -> ?assertMatch(Exp, Val) end). 9 | 10 | parse_flume_proto_test() -> 11 | Proto = eavro_rpc_proto:parse_protocol_file("test/data/flume.avpr"), 12 | ?assertMatch( 13 | #avro_proto{ ns = <<"org.apache.flume.source.avro">>, 14 | name = <<"AvroSourceProtocol">>, 15 | types = [_ | _], 16 | messages = [#avro_message{} | _], 17 | json = <<_,_/binary>>}, Proto). 18 | 19 | call_email_eserver_test_() -> 20 | %% 1. Start Erlang RPC Server 21 | eavro_rpc_srv:start( 22 | eavro_rpc_test_email_handler,self(),2525,1), 23 | {ok, Cli} = eavro_rpc_fsm:start_link( 24 | "localhost", 2525, "test/data/mail.avpr"), 25 | timer:sleep(100), 26 | %% 2. Do calls using 'eavro' erlang client 27 | ECalls = [{ Title, 28 | fun() -> 29 | Ret = eavro_rpc_fsm:call( 30 | Cli, send, 31 | _Args = [ _Rec = [ <<"TOOOO">>, <<"FROOOOOM">>, 32 | list_to_binary(MsgBody) ] ]), 33 | RetExpect(Ret) 34 | end} || {Title, MsgBody, RetExpect} <- 35 | [ { "e-send 'HELLO'", "HELLO", ?assertMatch_({ok, <<"OK!">>}) }, 36 | { "e-send 'EXIT'", "EXIT", 37 | ?assertMatch_({error, 38 | {string, 39 | <<"Server error",_/binary>>} }) }, 40 | { "e-send 'ERROR'", "ERROR", 41 | ?assertMatch_({error, {string, <<"ERROR!">>} }) } ] ], 42 | JCalls = [{ "j-send ", 43 | with_port( 44 | "client 2525", 45 | fun() -> ok end) }], 46 | {inorder, ECalls ++ JCalls}. 47 | 48 | call_email_jserver_test_() -> 49 | {timeout, 30, 50 | with_port( 51 | "server 65111", 52 | fun() -> 53 | {ok, P} = eavro_rpc_fsm:start_link( 54 | "localhost", 65111, "test/data/mail.avpr"), 55 | timer:sleep(3000), 56 | Ret = eavro_rpc_fsm:call( 57 | P, send, 58 | _Args = [ _Rec = [ <<"TOOOO">>, <<"FROOOOOM">>, <<"HELLO">> ] ]), 59 | ?assertMatch({ok, <<"Sending ",_/binary>>}, Ret) 60 | end)}. 61 | 62 | with_port(Args, Fun) -> 63 | {ok, PWD} = file:get_cwd(), 64 | JavaServer = filename:join([PWD, "test/data/eavro-rpc-test-servers-1.7.5-SNAPSHOT-jar-with-dependencies.jar"]), 65 | fun() -> 66 | Port = erlang:open_port( 67 | {spawn, "java -jar " ++ JavaServer ++ " " ++ Args}, 68 | [{cd, "test"}, 69 | {env, []}, 70 | binary, 71 | stream, 72 | {parallelism, true}, 73 | exit_status]), 74 | try 75 | ok = await_done(Port), 76 | Fun() 77 | after 78 | erlang:port_close(Port) 79 | end 80 | end. 81 | 82 | await_done(Port) -> 83 | receive 84 | {Port, {data, <<"*DONE*",_/binary>>}} -> ok; 85 | {Port, {data, _}} -> 86 | await_done(Port) 87 | end. 88 | -------------------------------------------------------------------------------- /test/eavro_rpc_test_email_handler.erl: -------------------------------------------------------------------------------- 1 | -module(eavro_rpc_test_email_handler). 2 | 3 | -behaviour(eavro_rpc_handler). 4 | 5 | -include("eavro.hrl"). 6 | 7 | %% API 8 | -export([get_protocol/0, 9 | init/1, 10 | handle_call/2]). 11 | 12 | -record(state, { fwd_pid }). 13 | 14 | get_protocol() -> 15 | try eavro_rpc_proto:parse_protocol_file( 16 | "test/data/mail.avpr") 17 | catch 18 | _:_ -> 19 | eavro_rpc_proto:parse_protocol_file( 20 | "test/data/mail.avpr") % For EUnit testing 21 | end. 22 | 23 | init(FwdPid) -> 24 | {ok, #state{fwd_pid = FwdPid} }. 25 | 26 | handle_call( {#avro_message{ name = <<"send">> }, 27 | [ _Rec = [_From, _To, Body] ] = _Args}, 28 | #state{fwd_pid = FwdPid} = _State ) -> 29 | if FwdPid /= undefined -> 30 | FwdPid ! {?MODULE, _Rec}; 31 | true -> ok 32 | end, 33 | case Body of 34 | <<"EXIT">> -> 35 | exit({test_error, "Some reason"}); 36 | <<"ERROR">> -> 37 | {error, <<"ERROR!">>}; 38 | _ -> 39 | {ok, <<"OK!">>} 40 | end. 41 | 42 | 43 | -------------------------------------------------------------------------------- /test/eavro_schema_tests.erl: -------------------------------------------------------------------------------- 1 | -module(eavro_schema_tests). 2 | 3 | -include("eavro.hrl"). 4 | 5 | -include_lib("eunit/include/eunit.hrl"). 6 | 7 | parse_twitter_schema_test() -> 8 | Schema = eavro:read_schema("test/data/twitter.avsc"), 9 | ?assertMatch(#avro_record{ name = twitter_schema }, Schema), 10 | Fields = Schema#avro_record.fields, 11 | ?assertMatch( 12 | [string, string, long], 13 | [ proplists:get_value(K, Fields) || 14 | K <- [ <<"username">>, <<"tweet">>, <<"timestamp">>] ]). 15 | 16 | parse_avro_ocf_header_test() -> 17 | Schema = eavro:read_schema("priv/avro_ocf.avsc"), 18 | ?assertMatch(#avro_record{ name = 'org.apache.avro.file.Header' }, Schema), 19 | Fields = Schema#avro_record.fields, 20 | ?assertMatch( 21 | [#avro_fixed{name='Magic',size=4}, 22 | #avro_map{values=bytes}, 23 | #avro_fixed{name='Sync',size=16}], 24 | [ proplists:get_value(K, Fields) || 25 | K <- [ <<"magic">>, <<"meta">>, <<"sync">>] ]). 26 | 27 | parse_transformer_schema_test() -> 28 | Schema = eavro:read_schema("test/data/transformer.avsc"), 29 | ?assertMatch( 30 | #avro_record{ 31 | name = 'transformer_schema', 32 | fields = [{<<"id">>, #avro_fixed{} }, 33 | {<<"fname">>, string }, 34 | {<<"lname">>, string } | _]}, 35 | Schema). 36 | 37 | schema_parse_encode_parse_test() -> 38 | Schema = eavro:read_schema("test/data/transformer.avsc"), 39 | SchemaJson = eavro:encode_schema(Schema), 40 | ?assertMatch(Schema, eavro:parse_schema(SchemaJson)). 41 | 42 | 'ISSUE_11_test'() -> 43 | #avro_record{ 44 | name = Name, 45 | fields = Fields} = eavro:read_schema("test/data/issue_11.avsc"), 46 | ?assertMatch('Click', Name), 47 | ?assertMatch( 48 | [{avro_map,string},null], 49 | proplists:get_value(<<"http_headers">>, Fields)). 50 | -------------------------------------------------------------------------------- /test/eavro_zcodec_tests.erl: -------------------------------------------------------------------------------- 1 | -module(eavro_zcodec_tests). 2 | 3 | -include("eavro.hrl"). 4 | 5 | -include_lib("eunit/include/eunit.hrl"). 6 | 7 | -export([ to_zlist/1]). 8 | 9 | %%=========================================================================================== 10 | %% TESTS 11 | %%=========================================================================================== 12 | 13 | integer_codec_test_() -> 14 | {inorder, 15 | [{caption( "Encode/Decode ~p as '~s'.", [N, Type]), 16 | fun() -> 17 | ?assertMatch( 18 | {N, []}, 19 | ztrim(eavro_zcodec:decode(Type, to_zlist(eavro_codec:encode(Type, N))) )) 20 | end} || {Type, Base, Gen} <- [ {int, 32, fun generate_int32_test_values/0}, 21 | {long, 64, fun generate_int64_test_values/0}], 22 | <> <- Gen() ]}. 23 | 24 | varint_codec_test_() -> 25 | {inorder, 26 | [{"Check varint codec of int32 when no leading zeroes.", 27 | fun() -> 28 | FF32 = 16#FFFFFFFF, 29 | EncBytes = <<_:5/binary>> = eavro_codec:varint_encode(<>), 30 | {<>, []} = ztrim(eavro_zcodec:varint_decode(int, to_zlist(EncBytes))) 31 | end}, 32 | {"Check varint codec of int64 when no leading zeroes.", 33 | fun() -> 34 | FF64 = 16#FFFFFFFFFFFFFFFF, 35 | EncBytes = <<_:10/binary>> = eavro_codec:varint_encode(<>), 36 | {<>, []} = ztrim( 37 | eavro_zcodec:varint_decode( 38 | long, 39 | to_zlist(EncBytes))) 40 | end}, 41 | {"Check varint codec of int32 when no leading zeroes with some irregularity in first byte.", 42 | fun() -> 43 | Int32 = <<2#10101110:8,16#FFFFFF:24>>, 44 | EncBytes = <<_:5/binary>> = eavro_codec:varint_encode(Int32), 45 | {Int32, []} = ztrim(eavro_zcodec:varint_decode(int, to_zlist(EncBytes))) 46 | end}, 47 | {"Check varint codec of int64 when no leading zeroes with some irregularity in first byte.", 48 | fun() -> 49 | Int64 = <<2#10101110:8,16#FFFFFFFFFFFFFFFF:56>>, 50 | EncBytes = <<_:10/binary>> = eavro_codec:varint_encode(Int64), 51 | {Int64, []} = ztrim(eavro_zcodec:varint_decode(long, to_zlist(EncBytes))) 52 | end}] ++ 53 | [ {caption("Check varint codec of int32: ~p.", [Int32]), 54 | fun() -> 55 | EncBytes = eavro_codec:varint_encode(Int32), 56 | {Int32, []} = ztrim(eavro_zcodec:varint_decode(int, to_zlist(EncBytes))) 57 | end}|| Int32 <- generate_int32_test_values()] ++ 58 | [{caption("Check varint codec of int64: ~p.", [Int64]), 59 | fun() -> 60 | EncBytes = eavro_codec:varint_encode(Int64), 61 | {Int64, []} = ztrim(eavro_zcodec:varint_decode(long, to_zlist(EncBytes))) 62 | end} || Int64 <- generate_int64_test_values()]}. 63 | 64 | %% 65 | %% Test app domain record. 66 | %% 67 | -record('Person', { id, fname, lname, age, is_autobot, 68 | is_desepticon, energy_level, home, clan}). 69 | -record('GalaxyCoord', {x,y,z}). 70 | 71 | avro_record_codec_test_() -> 72 | {inorder, 73 | [{ "Encode/Decode avro record with all types of fields and nested avro record.", 74 | fun()-> 75 | Type = #avro_record{ 76 | name = 'Person', 77 | fields = [{id, long}, 78 | {energy_level, float}, 79 | {home, #avro_record{ 80 | name = 'GalaxyCoord', 81 | fields = [{x,double}, 82 | {y,double}, 83 | {z,double}] } }, 84 | {fname, string}, 85 | {lname, string}, 86 | {age, int}, 87 | {is_autobot, boolean}, 88 | {is_desepticon, boolean}, 89 | {clan, #avro_enum{ 90 | name = 'Clan', 91 | symbols = ['Autobots', 'Desepticons']} } 92 | ]}, 93 | Hook = fun(#avro_record{ name = 'Person'} = _Type, 94 | [Id, Ener, Home, Fname, Lname, Age, IsAu, IsDe, Clan]) -> 95 | #'Person'{id = Id, energy_level = Ener, home = Home, 96 | fname = Fname, lname = Lname, age = Age, 97 | is_autobot = IsAu, is_desepticon = IsDe, 98 | clan = Clan}; 99 | (#avro_record{ name = 'GalaxyCoord'} = _Type, 100 | [X,Y,Z]) -> #'GalaxyCoord'{x = X, y = Y, z = Z}; 101 | ( string, B) -> binary_to_list(B); 102 | (_, AsIs) -> AsIs 103 | end, 104 | GalaxyCoord = [X,Y,Z] = [ 2342.34523, 105 | 675322341.422422324, 106 | 242252.56473457345], 107 | Encoded = eavro_codec:encode( 108 | Type, [ 16#AABBCCDDEEFF0011, 23423.5674 , 109 | GalaxyCoord, 110 | <<"Optimus">>, <<"Prime">>, 1000, true, false, 111 | 'Autobots']), 112 | Decoded = eavro_zcodec:decode(Type, to_zlist(Encoded), Hook), 113 | ?assertMatch( {#'Person'{ fname = "Optimus", 114 | lname = "Prime", 115 | age = 1000, 116 | home = #'GalaxyCoord'{ x = X, y = Y, z = Z}, 117 | clan = 'Autobots', 118 | is_autobot = true, 119 | is_desepticon = false }, []}, ztrim(Decoded)) 120 | end} 121 | ]}. 122 | 123 | avro_map_codec_test() -> 124 | Type = #avro_map{values = long}, 125 | Map = lists:sort([ {<<"k1">>, 1}, {<<"k2">>, 2}, {<<"k3">>, 3} ]), 126 | Encoded = eavro_codec:encode(Type, Map), 127 | { DecodedMapBlocks, []} = ztrim(eavro_zcodec:decode(Type, to_zlist(Encoded))), 128 | DecodedMap = lists:flatten(DecodedMapBlocks), 129 | ?assertMatch( Map, lists:sort(DecodedMap)). 130 | 131 | avro_fixed_codec_test() -> 132 | Type = #avro_fixed{size = 5}, 133 | Fixed = <<0,1,2,3,4>>, 134 | Encoded = eavro_codec:encode(Type, Fixed), 135 | ?assertMatch({ Fixed, []}, 136 | ztrim( 137 | eavro_zcodec:decode(Type, to_zlist(Encoded)))). 138 | 139 | avro_array_codec_test() -> 140 | Type = #avro_array{items = string}, 141 | Array = [<<"Alpha">>, <<"Beta">>, <<"Gamma">>, <<"Delta">>, <<"Epsilon">>, <<"Dzeta">>], 142 | Encoded = eavro_codec:encode(Type, Array), 143 | ?assertMatch({ [Array], []}, 144 | ztrim( 145 | eavro_zcodec:decode(Type, to_zlist(Encoded)))). 146 | 147 | avro_union_codec_test() -> 148 | Union = [int,string], 149 | ?assertMatch({ {int, 137}, []}, 150 | ztrim( 151 | eavro_zcodec:decode( 152 | Union, 153 | to_zlist(eavro_codec:encode(Union, {int, 137}) )) )), 154 | ?assertMatch({ {string, <<"NaN">>}, []}, 155 | ztrim( 156 | eavro_zcodec:decode( 157 | Union, 158 | to_zlist(eavro_codec:encode(Union, {string, <<"NaN">>}) ) ))). 159 | 160 | avro_array_of_union_codec_test() -> 161 | RecType = #avro_record{ name = some_struct, 162 | fields = [{field1, long}]}, 163 | Type = #avro_array{ 164 | items = 165 | [int, string, RecType] }, 166 | ?assertMatch({ [ _Block = [ {int, 1}, {int, 2}, 167 | {string, <<"very much">>}, {RecType, [137]} ] ], [] }, 168 | ztrim( 169 | eavro_zcodec:decode( 170 | Type, 171 | to_zlist( 172 | eavro_codec:encode( 173 | Type, 174 | [ {int, 1}, 175 | {int, 2}, 176 | {string, <<"very much">>}, 177 | {RecType, [137]} ]) ) ) ) ). 178 | 179 | %%================================================================================== 180 | %% HELPER FUNCTIONS 181 | %%================================================================================== 182 | 183 | caption(Fmt, Args) -> 184 | lists:flatten(io_lib:format(Fmt, Args)). 185 | 186 | generate_int32_test_values() -> 187 | [<<0:32>>]++[ <<(2#11001000 bsl (N*8)):32>>|| N <- lists:seq(0,3)]. 188 | 189 | generate_int64_test_values() -> 190 | [<<0:64>>] ++ [ <<(2#11001000 bsl (N*8)):64>>|| N <- lists:seq(0,7)]. 191 | 192 | %% We could simply wrap ninary with [] but want some more complicated zlist form 193 | to_zlist(L) when is_list(L) -> 194 | to_zlist(iolist_to_binary(L)); 195 | to_zlist(<<>>) -> 196 | []; 197 | to_zlist(<>) -> 198 | [B | fun() -> to_zlist(Tail) end]. 199 | 200 | ztrim({Val, L}) when is_list(L)-> 201 | {Val, ztrim(L)}; 202 | ztrim([<<>>|Tail])-> 203 | ztrim(zlists:expand(1,Tail)); 204 | ztrim(L) -> 205 | L. 206 | --------------------------------------------------------------------------------