├── test ├── .gitignore ├── data │ ├── .gitignore │ ├── interop.ocf │ ├── interop.ocf.test │ ├── interop_deflate.ocf │ ├── interop_snappy.ocf │ ├── interop_no_codec.ocf │ ├── interop_snappy_invalid_checksum.ocf │ ├── README.md │ ├── test.avsc │ ├── interop.avsc │ └── gen_interop_data.py ├── avro_binary_decoder_tests.erl ├── avro_binary_encoder_tests.erl ├── avro_json_encoder_tests.erl ├── avro_enum_tests.erl ├── avro_map_tests.erl ├── avro_array_tests.erl ├── avro_fixed_tests.erl ├── avro_fingerprint_tests.erl ├── avro_decoder_hooks_tests.erl ├── avro_union_tests.erl ├── avro_ocf_tests.erl ├── avro_record_tests.erl ├── avro_json_encoder_canon_tests.erl └── avro_schema_store_tests.erl ├── .gitignore ├── src ├── erlavro.app.src ├── avro_fixed.erl ├── avro_map.erl ├── avro_array.erl ├── avro_primitive.erl ├── avro_enum.erl ├── avro_json_compat.erl ├── avro_binary_encoder.erl ├── avro_decoder_hooks.erl ├── avro_binary_decoder.erl ├── avro_union.erl ├── avro_schema_store.erl └── avro_ocf.erl ├── .github └── workflows │ └── erlang.yml ├── rebar.config ├── Makefile ├── .travis.yml ├── elvis.config ├── changelog.md ├── scripts └── cover-print-not-covered-lines.escript ├── include ├── erlavro.hrl └── avro_internal.hrl ├── README.md └── LICENSE /test/.gitignore: -------------------------------------------------------------------------------- 1 | *.beam 2 | -------------------------------------------------------------------------------- /test/data/.gitignore: -------------------------------------------------------------------------------- 1 | *.test 2 | -------------------------------------------------------------------------------- /test/data/interop.ocf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/klarna/erlavro/HEAD/test/data/interop.ocf -------------------------------------------------------------------------------- /test/data/interop.ocf.test: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/klarna/erlavro/HEAD/test/data/interop.ocf.test -------------------------------------------------------------------------------- /test/data/interop_deflate.ocf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/klarna/erlavro/HEAD/test/data/interop_deflate.ocf -------------------------------------------------------------------------------- /test/data/interop_snappy.ocf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/klarna/erlavro/HEAD/test/data/interop_snappy.ocf -------------------------------------------------------------------------------- /test/data/interop_no_codec.ocf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/klarna/erlavro/HEAD/test/data/interop_no_codec.ocf -------------------------------------------------------------------------------- /test/avro_binary_decoder_tests.erl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/klarna/erlavro/HEAD/test/avro_binary_decoder_tests.erl -------------------------------------------------------------------------------- /test/avro_binary_encoder_tests.erl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/klarna/erlavro/HEAD/test/avro_binary_encoder_tests.erl -------------------------------------------------------------------------------- /test/avro_json_encoder_tests.erl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/klarna/erlavro/HEAD/test/avro_json_encoder_tests.erl -------------------------------------------------------------------------------- /test/data/interop_snappy_invalid_checksum.ocf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/klarna/erlavro/HEAD/test/data/interop_snappy_invalid_checksum.ocf -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | ebin/*.app 2 | ebin/*.beam 3 | .eunit 4 | /lib/ 5 | deps/ 6 | /.rebar/ 7 | doc/ 8 | .idea/ 9 | ebin/ 10 | erlavro.iml 11 | out/ 12 | _build 13 | rebar.lock 14 | *.crashdump 15 | -------------------------------------------------------------------------------- /test/data/README.md: -------------------------------------------------------------------------------- 1 | Files in `test/data` are used in tests 2 | 3 | Files downloaded from https://cwiki.apache.org/confluence/display/AVRO/Interoperability+Testing 4 | 5 | - interop.avsc 6 | - gen_interop_data.py 7 | 8 | Generated files 9 | 10 | - interop.ocf: ocf file generated from: `./gen_interop_data.py interop.avsc interop.ocf` 11 | - interop_deflate.ocf: ocf file generated from: `./gen_interop_data.py --codec=deflate interop.avsc interop_deflate.ocf` 12 | - interop_snappy.ocf: ocf file generated from: `./gen_interop_data.py --codec=snappy interop.avsc interop_snappy.ocf` 13 | -------------------------------------------------------------------------------- /src/erlavro.app.src: -------------------------------------------------------------------------------- 1 | {application, erlavro, 2 | [ 3 | {description, "Apache Avro support for Erlang/Elixir"}, 4 | {vsn, "git"}, 5 | {registered, []}, 6 | {applications, [ 7 | kernel, 8 | stdlib, 9 | snappyer 10 | ]}, 11 | {env, []}, 12 | {modules,[]}, 13 | {licenses, ["Apache License 2.0"]}, 14 | {links, [{"Github", "https://github.com/klarna/erlavro"}]}, 15 | {build_tools, ["make", "rebar3"]}, 16 | {files, ["src", "include", "rebar.config", "rebar.config.script", 17 | "README.md", "LICENSE", "Makefile", "elvis.config"]} 18 | ]}. 19 | -------------------------------------------------------------------------------- /.github/workflows/erlang.yml: -------------------------------------------------------------------------------- 1 | name: Erlang CI 2 | 3 | on: 4 | push: 5 | branches: [ "master" ] 6 | pull_request: 7 | branches: [ "master" ] 8 | 9 | permissions: 10 | contents: read 11 | 12 | jobs: 13 | 14 | build: 15 | 16 | runs-on: ubuntu-latest 17 | 18 | strategy: 19 | matrix: 20 | otp-version: [26, 27, 28] 21 | 22 | container: 23 | image: erlang:${{ matrix.otp-version }} 24 | 25 | steps: 26 | - uses: actions/checkout@v4 27 | - name: Run tests 28 | run: make eunit && make cover 29 | - name: Run Dialyzer 30 | run: make dialyzer 31 | -------------------------------------------------------------------------------- /test/data/test.avsc: -------------------------------------------------------------------------------- 1 | {"type": "record", 2 | "name":"test", 3 | "namespace": "org.apache.avro", 4 | "fields": 5 | [{"name": "f1", 6 | "type": {"type": "record", 7 | "name": "Node", 8 | "fields": 9 | [{"name": "label", "type": "string"}, 10 | {"name": "children", 11 | "type": {"type": "array", "items": "Node"}, 12 | "default": [{"label": "default-label", "children": []}]} 13 | ]}}, 14 | {"name": "f2", "type": ["null", "string"], "default": null}, 15 | {"name": "f3", "type": ["null", "string"], "default": "null"}, 16 | {"name": "f4", "type": ["string", "null"]} 17 | ]} 18 | -------------------------------------------------------------------------------- /rebar.config: -------------------------------------------------------------------------------- 1 | %% -*- mode:erlang -*- 2 | {erl_opts, [ debug_info 3 | , warnings_as_errors 4 | , {d,'NOTEST'} 5 | ]}. 6 | {eunit_opts, [verbose]}. 7 | {xref_checks, [ undefined_function_calls 8 | , deprecated_function_calls 9 | ]}. 10 | {edoc_opts, [{preprocess, true}]}. 11 | {deps, 12 | [ 13 | {snappyer, "1.2.9"} 14 | ]}. 15 | 16 | {profiles, 17 | [{test, 18 | [{deps, [{jsone, "1.8.1"}]}] 19 | }]}. 20 | 21 | {cover_opts, [verbose]}. 22 | {cover_enabled, true}. 23 | {cover_export_enabled, true}. 24 | 25 | {dialyzer, [{warnings, [unknown]}]}. 26 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | REBAR ?= rebar3 2 | 3 | all: compile 4 | 5 | .PHONY: elvis-rock 6 | elvis-rock: 7 | elvis rock 8 | 9 | .PHONY: compile 10 | compile: deps 11 | $(REBAR) compile 12 | 13 | .PHONY: deps 14 | deps: 15 | $(REBAR) get-deps 16 | 17 | .PHONY: edoc 18 | edoc: 19 | $(REBAR) edoc 20 | 21 | .PHONY: dialyzer 22 | dialyzer: compile 23 | $(REBAR) dialyzer 24 | 25 | .PHONY: eunit 26 | eunit: 27 | $(REBAR) eunit -v 28 | 29 | .PHONY: clean 30 | clean: 31 | $(REBAR) clean 32 | 33 | .PHONY: xref 34 | xref: compile 35 | $(REBAR) xref 36 | 37 | .PHONY: hex-publish 38 | hex-publish: clean 39 | $(REBAR) hex publish 40 | 41 | .PHONY: cover 42 | cover: 43 | $(REBAR) cover -v 44 | 45 | .PHONY: coveralls 46 | coveralls: 47 | $(REBAR) coveralls send 48 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: erlang 2 | 3 | sudo: required 4 | 5 | before_install: 6 | - git clone https://github.com/erlang/rebar3.git; cd rebar3; ./bootstrap; sudo cp rebar3 /usr/bin; cd .. 7 | - git clone https://github.com/inaka/elvis.git; cd elvis; rebar3 escriptize; sudo cp _build/default/bin/elvis /usr/bin; cd .. 8 | 9 | notifications: 10 | email: false 11 | 12 | otp_release: 13 | - 21.0 14 | - 20.3 15 | - 19.3 16 | 17 | script: 18 | - make elvis-rock 19 | - make edoc 20 | - make xref 21 | - make eunit 22 | - | 23 | OTP_RELEASE=`erl -noshell -eval 'io:format(erlang:system_info(otp_release)), halt(0)'` 24 | if [ $OTP_RELEASE -eq 21 ]; then 25 | make dialyzer 26 | make cover 27 | make coveralls 28 | else 29 | make cover 30 | fi 31 | 32 | -------------------------------------------------------------------------------- /test/data/interop.avsc: -------------------------------------------------------------------------------- 1 | {"type": "record", "name":"Interop", "namespace": "org.apache.avro", 2 | "fields": [ 3 | {"name": "intField", "type": "int"}, 4 | {"name": "longField", "type": "long"}, 5 | {"name": "stringField", "type": "string"}, 6 | {"name": "boolField", "type": "boolean"}, 7 | {"name": "floatField", "type": "float"}, 8 | {"name": "doubleField", "type": "double"}, 9 | {"name": "bytesField", "type": "bytes", "default": "\u0000"}, 10 | {"name": "nullField", "type": "null"}, 11 | {"name": "arrayField", "type": {"type": "array", "items": "double"}}, 12 | {"name": "mapField", "type": 13 | {"type": "map", "values": 14 | {"type": "record", "name": "Foo", 15 | "fields": [{"name": "label", "type": "string"}]}}}, 16 | {"name": "unionField", "type": 17 | ["boolean", "double", {"type": "array", "items": "bytes"}]}, 18 | {"name": "enumField", "type": 19 | {"type": "enum", "name": "Kind", "symbols": ["A","B","C"]}}, 20 | {"name": "fixedField", "type": 21 | {"type": "fixed", "name": "MD5", "size": 16}}, 22 | {"name": "recordField", "type": 23 | {"type": "record", "name": "Node", 24 | "fields": [ 25 | {"name": "label", "type": "string"}, 26 | {"name": "children", "type": {"type": "array", "items": "Node"}}]}} 27 | ] 28 | } 29 | -------------------------------------------------------------------------------- /test/data/gen_interop_data.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python2 2 | 3 | # Licensed to the Apache Software Foundation (ASF) under one 4 | # or more contributor license agreements. See the NOTICE file 5 | # distributed with this work for additional information 6 | # regarding copyright ownership. The ASF licenses this file 7 | # to you under the Apache License, Version 2.0 (the 8 | # "License"); you may not use this file except in compliance 9 | # with the License. You may obtain a copy of the License at 10 | # 11 | # http://www.apache.org/licenses/LICENSE-2.0 12 | # 13 | # Unless required by applicable law or agreed to in writing, software 14 | # distributed under the License is distributed on an "AS IS" BASIS, 15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 | # See the License for the specific language governing permissions and 17 | # limitations under the License. 18 | import getopt 19 | import sys 20 | from avro import schema 21 | from avro import io 22 | from avro import datafile 23 | 24 | DATUM = { 25 | 'intField': 12, 26 | 'longField': 15234324L, 27 | 'stringField': unicode('hey'), 28 | 'boolField': True, 29 | 'floatField': 1234.0, 30 | 'doubleField': -1234.0, 31 | 'bytesField': '12312adf', 32 | 'nullField': None, 33 | 'arrayField': [5.0, 0.0, 12.0], 34 | 'mapField': {'a': {'label': 'a'}, 'bee': {'label': 'cee'}}, 35 | 'unionField': 12.0, 36 | 'enumField': 'C', 37 | 'fixedField': '1019181716151413', 38 | 'recordField': {'label': 'blah', 'children': [{'label': 'inner', 'children': []}]}, 39 | } 40 | 41 | def write(interop_schema, writer, codec): 42 | datum_writer = io.DatumWriter() 43 | dfw = datafile.DataFileWriter(writer, datum_writer, interop_schema, codec=codec) 44 | dfw.append(DATUM) 45 | dfw.close() 46 | 47 | if __name__ == "__main__": 48 | codec = 'null' 49 | opts, args = getopt.getopt(sys.argv[1:], 'c:', ['codec=']) 50 | for o, a in opts: 51 | if o == '--codec' or o == '-c': 52 | codec = a 53 | 54 | interop_schema = schema.parse(open(args[0], 'r').read()) 55 | writer = open(args[1], 'wb') 56 | write(interop_schema, writer, codec) 57 | -------------------------------------------------------------------------------- /test/avro_enum_tests.erl: -------------------------------------------------------------------------------- 1 | %% coding: latin-1 2 | %%%------------------------------------------------------------------- 3 | %%% Copyright (c) 2013-2016 Klarna AB 4 | %%% 5 | %%% This file is provided to you under the Apache License, 6 | %%% Version 2.0 (the "License"); you may not use this file 7 | %%% except in compliance with the License. You may obtain 8 | %%% a copy of the License at 9 | %%% 10 | %%% http://www.apache.org/licenses/LICENSE-2.0 11 | %%% 12 | %%% Unless required by applicable law or agreed to in writing, 13 | %%% software distributed under the License is distributed on an 14 | %%% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | %%% KIND, either express or implied. See the License for the 16 | %%% specific language governing permissions and limitations 17 | %%% under the License. 18 | %%% 19 | %%%------------------------------------------------------------------- 20 | -module(avro_enum_tests). 21 | 22 | -import(avro_enum, [ cast/2 23 | , get_value/1 24 | , new/2 25 | , type/2 26 | ]). 27 | 28 | -include("erlavro.hrl"). 29 | -include_lib("eunit/include/eunit.hrl"). 30 | 31 | empty_symbols_test() -> 32 | ?assertError(empty_symbols, type("FooBar", [])). 33 | 34 | non_unique_symbols_test() -> 35 | ?assertError(non_unique_symbols, type("FooBar", ["a", "c", "d", "c", "e"])). 36 | 37 | invalid_name_test() -> 38 | ?assertError({invalid_name, <<"c-1">>}, 39 | type("FooBar", ["a", "b", "c-1", "d", "c", "e"])). 40 | 41 | cast_from_string_test() -> 42 | Type = type("MyEnum", ["a", "b", "c", "d"]), 43 | {ok, Enum} = avro:cast(Type, "b"), 44 | ?assertEqual(Type, ?AVRO_VALUE_TYPE(Enum)), 45 | ?assertEqual(<<"b">>, get_value(Enum)), 46 | ?assertEqual(<<"b">>, avro:to_term(Enum)). 47 | 48 | bad_cast_from_string_test() -> 49 | Type = type("MyEnum", ["a", "b", "c", "d"]), 50 | ?assertEqual({error, {cast_error, Type, "e"}}, cast(Type, "e")). 51 | 52 | get_value_test() -> 53 | Type = type("MyEnum", ["a", "b", "c", "d"]), 54 | Value = new(Type, "b"), 55 | ?assertEqual(<<"b">>, get_value(Value)). 56 | 57 | new_error_test() -> 58 | Type = type("MyEnum", ["a", "b", "c", "d"]), 59 | ?assertException(error, {cast_error, Type, "x"}, new(Type, "x")). 60 | 61 | %%%_* Emacs ==================================================================== 62 | %%% Local Variables: 63 | %%% allout-layout: t 64 | %%% erlang-indent-level: 2 65 | %%% End: 66 | -------------------------------------------------------------------------------- /test/avro_map_tests.erl: -------------------------------------------------------------------------------- 1 | %% coding: latin-1 2 | %%%------------------------------------------------------------------- 3 | %%% Copyright (c) 2013-2016 Klarna AB 4 | %%% 5 | %%% This file is provided to you under the Apache License, 6 | %%% Version 2.0 (the "License"); you may not use this file 7 | %%% except in compliance with the License. You may obtain 8 | %%% a copy of the License at 9 | %%% 10 | %%% http://www.apache.org/licenses/LICENSE-2.0 11 | %%% 12 | %%% Unless required by applicable law or agreed to in writing, 13 | %%% software distributed under the License is distributed on an 14 | %%% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | %%% KIND, either express or implied. See the License for the 16 | %%% specific language governing permissions and limitations 17 | %%% under the License. 18 | %%% 19 | %%%------------------------------------------------------------------- 20 | -module(avro_map_tests). 21 | 22 | -include("erlavro.hrl"). 23 | -include_lib("eunit/include/eunit.hrl"). 24 | 25 | cast_test() -> 26 | Type = avro_map:type(int), 27 | {ok, Value} = avro_map:cast(Type, [{v1, 1}, {"v2", 2}, {"v3", 3}]), 28 | List = avro_map:to_list(Value), 29 | Expected = [ {<<"v1">>, avro_primitive:int(1)} 30 | , {<<"v2">>, avro_primitive:int(2)} 31 | , {<<"v3">>, avro_primitive:int(3)} 32 | ], 33 | ?assertEqual(Expected, List), 34 | 35 | {ok, MapValue} = avro_map:cast(Type, #{v1 => 1, "v2" => 2, "v3" => 3}), 36 | MapList = avro_map:to_list(MapValue), 37 | ?assertEqual(Expected, MapList). 38 | 39 | new_test() -> 40 | Type = avro_map:type(avro_primitive:long_type()), 41 | Value = avro_map:new(Type, [{"a", 1}, {"b", 2}]), 42 | ?assertMatch(#avro_value{}, Value), 43 | ?assertException(error, {type_mismatch, _, _}, 44 | avro_map:new(Type, [{"a", "x"}])), 45 | 46 | MapValue = avro_map:new(Type, #{"a" => 1, "b" => 2}), 47 | ?assertMatch(#avro_value{}, MapValue), 48 | ?assertException(error, {type_mismatch, _, _}, 49 | avro_map:new(Type, #{"a" => "x"})). 50 | 51 | 52 | to_term_test() -> 53 | Type = avro_map:type(avro_primitive:int_type()), 54 | ExpectedMappings = [{<<"v1">>, 1}, {<<"v2">>, 2}, {<<"v3">>, 3}], 55 | Value = avro_map:new(Type, ExpectedMappings), 56 | Mappings = avro:to_term(Value), 57 | ?assertEqual(ExpectedMappings, lists:keysort(1, Mappings)). 58 | 59 | %%%_* Emacs ==================================================================== 60 | %%% Local Variables: 61 | %%% allout-layout: t 62 | %%% erlang-indent-level: 2 63 | %%% End: 64 | -------------------------------------------------------------------------------- /test/avro_array_tests.erl: -------------------------------------------------------------------------------- 1 | %%%------------------------------------------------------------------- 2 | %%% Copyright (c) 2013-2016 Klarna AB 3 | %%% 4 | %%% This file is provided to you under the Apache License, 5 | %%% Version 2.0 (the "License"); you may not use this file 6 | %%% except in compliance with the License. You may obtain 7 | %%% a copy of the License at 8 | %%% 9 | %%% http://www.apache.org/licenses/LICENSE-2.0 10 | %%% 11 | %%% Unless required by applicable law or agreed to in writing, 12 | %%% software distributed under the License is distributed on an 13 | %%% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | %%% KIND, either express or implied. See the License for the 15 | %%% specific language governing permissions and limitations 16 | %%% under the License. 17 | %%% 18 | %%%------------------------------------------------------------------- 19 | -module(avro_array_tests). 20 | 21 | -include("erlavro.hrl"). 22 | -include_lib("eunit/include/eunit.hrl"). 23 | 24 | to_term_test() -> 25 | ArrayType = avro_array:type(int), 26 | {ok, Array} = avro:cast(ArrayType, [1, 2]), 27 | ?assertEqual([1, 2], avro:to_term(Array)). 28 | 29 | cast_test() -> 30 | ArrayType = avro_array:type(string), 31 | {ok, Array} = avro_array:cast(ArrayType, ["a", "b"]), 32 | ?assertEqual(ArrayType, ?AVRO_VALUE_TYPE(Array)), 33 | ?assertEqual([ avro_primitive:string("a") 34 | , avro_primitive:string("b") 35 | ], ?AVRO_VALUE_DATA(Array)). 36 | 37 | prepend_test() -> 38 | ArrayType = avro_array:type(string), 39 | {ok, Array} = avro_array:cast(ArrayType, ["b", "a"]), 40 | NewArray = avro_array:prepend(["d", "c"], Array), 41 | ExpectedValues = [avro_primitive:string(S) || S <- ["d", "c", "b", "a"]], 42 | ?assertEqual(ExpectedValues, ?AVRO_VALUE_DATA(NewArray)). 43 | 44 | new_direct_test() -> 45 | Type = avro_array:type(int), 46 | NewVersion = avro_array:new(Type, [1,2,3]), 47 | DirectVersion = avro_array:new_direct(Type, 48 | [ avro_primitive:int(1) 49 | , avro_primitive:int(2) 50 | , avro_primitive:int(3)]), 51 | ?assertEqual(NewVersion, DirectVersion). 52 | 53 | new_test() -> 54 | Type = avro_array:type(int), 55 | ?assertEqual(?AVRO_VALUE(Type, []), avro_array:new(Type)), 56 | ?assertMatch(?AVRO_VALUE(Type, [_]), avro_array:new(Type, [1])), 57 | ?assertException(error, {type_mismatch, _, _}, avro_array:new(Type, ["a"])). 58 | 59 | %%%_* Emacs ==================================================================== 60 | %%% Local Variables: 61 | %%% allout-layout: t 62 | %%% erlang-indent-level: 2 63 | %%% End: 64 | -------------------------------------------------------------------------------- /elvis.config: -------------------------------------------------------------------------------- 1 | %% -*- erlang -*- 2 | [ 3 | { 4 | elvis, 5 | [ 6 | {config, 7 | [ 8 | #{dirs => ["src"], 9 | filter => "*.erl", 10 | rules => [ {elvis_text_style, line_length, #{ limit => 80 }} 11 | , {elvis_text_style, no_trailing_whitespace} 12 | , {elvis_style, operator_spaces, 13 | #{ rules => [ {right,","} 14 | , {right,"+"} 15 | , {left,"+"} 16 | , {right,"*"} 17 | , {left,"*"} 18 | , {right,"--"} 19 | , {left,"--"} 20 | , {right,"++"} 21 | , {left,"++"} 22 | ] 23 | }} 24 | , {elvis_style, nesting_level, 25 | #{ level => 3, 26 | ignore => [] 27 | }} 28 | , {elvis_style, god_modules, 29 | #{ limit => 25, 30 | ignore => [avro] 31 | }} 32 | , {elvis_style, no_nested_try_catch} 33 | , {elvis_style, invalid_dynamic_call, 34 | #{ignore => [] 35 | }} 36 | , {elvis_style, used_ignored_variable} 37 | , {elvis_style, no_behavior_info} 38 | , {elvis_style, module_naming_convention, 39 | #{ ignore => [], 40 | regex => "^([a-z][a-z0-9]*_?)([a-z0-9]*_?)*$" 41 | }} 42 | , {elvis_style, function_naming_convention, 43 | #{ regex => "^([a-z][a-z0-9]*_?)([a-z0-9]*_?)*$" 44 | }} 45 | , {elvis_style, variable_naming_convention, 46 | #{ regex => "^_?([A-Z][0-9a-zA-Z_]*)$" 47 | }} 48 | , {elvis_style, state_record_and_type} 49 | , {elvis_style, no_spec_with_records} 50 | , {elvis_style, dont_repeat_yourself, 51 | #{ min_complexity => 15 52 | }} 53 | , {elvis_style, no_debug_call, 54 | #{ignore => [avro_decoder_hooks]}} 55 | 56 | ] 57 | }, 58 | #{dirs => ["test"], 59 | filter => "*.erl", 60 | rules => [ {elvis_text_style, line_length, #{ limit => 80 }} 61 | , {elvis_text_style, no_trailing_whitespace} 62 | ] 63 | } 64 | ] 65 | } 66 | ] 67 | } 68 | ]. 69 | 70 | %%%_* Emacs ==================================================================== 71 | %%% Local Variables: 72 | %%% allout-layout: t 73 | %%% erlang-indent-level: 2 74 | %%% End: 75 | -------------------------------------------------------------------------------- /test/avro_fixed_tests.erl: -------------------------------------------------------------------------------- 1 | %% coding: latin-1 2 | %%%------------------------------------------------------------------- 3 | %%% Copyright (c) 2013-2016 Klarna AB 4 | %%% 5 | %%% This file is provided to you under the Apache License, 6 | %%% Version 2.0 (the "License"); you may not use this file 7 | %%% except in compliance with the License. You may obtain 8 | %%% a copy of the License at 9 | %%% 10 | %%% http://www.apache.org/licenses/LICENSE-2.0 11 | %%% 12 | %%% Unless required by applicable law or agreed to in writing, 13 | %%% software distributed under the License is distributed on an 14 | %%% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | %%% KIND, either express or implied. See the License for the 16 | %%% specific language governing permissions and limitations 17 | %%% under the License. 18 | %%% 19 | %%%------------------------------------------------------------------- 20 | -module(avro_fixed_tests). 21 | 22 | -include("erlavro.hrl"). 23 | -include_lib("eunit/include/eunit.hrl"). 24 | 25 | neg_size_test() -> 26 | ?assertError({invalid_size, -1}, avro_fixed:type("FooBar", -1)). 27 | 28 | short_create_test() -> 29 | Type = avro_fixed:type("FooBar", 16), 30 | ?assertEqual(<<"FooBar">>, avro:get_type_fullname(Type)), 31 | ?assertEqual(16, avro_fixed:get_size(Type)). 32 | 33 | full_create_test() -> 34 | Type = avro_fixed:type("FooBar", 16, 35 | [ {namespace, "name.space"} 36 | , {aliases, ["Zoo", "Bee"]} 37 | ]), 38 | ?assertEqual(<<"name.space.FooBar">>, avro:get_type_fullname(Type)), 39 | ?assertEqual(16, avro_fixed:get_size(Type)). 40 | 41 | bad_cast_from_binary_test() -> 42 | Type = avro_fixed:type("FooBar", 2), 43 | ?assertEqual({error, bad_size}, avro_fixed:cast(Type, <<1,2,3>>)), 44 | ?assertEqual({error, bad_size}, avro_fixed:cast(Type, <<1>>)). 45 | 46 | cast_from_binary_test() -> 47 | Type = avro_fixed:type("FooBar", 2), 48 | Bin = <<1,2>>, 49 | ?assertEqual({ok, ?AVRO_VALUE(Type, Bin)}, avro_fixed:cast(Type, Bin)). 50 | 51 | integer_cast_test() -> 52 | Type = avro_fixed:type("FooBar", 2), 53 | Value1 = avro_fixed:new(Type, 67), %% 1 byte 54 | Value2 = avro_fixed:new(Type, 1017), %% 2 bytes 55 | Data1 = avro_fixed:get_value(Value1), 56 | Data2 = avro_fixed:get_value(Value2), 57 | ?assertEqual(67, binary:decode_unsigned(Data1)), 58 | ?assertEqual(1017, binary:decode_unsigned(Data2)). 59 | 60 | get_value_test() -> 61 | Type = avro_fixed:type("FooBar", 2), 62 | Value = avro_fixed:new(Type, <<1,2>>), 63 | ?assertEqual(<<1,2>>, avro_fixed:get_value(Value)), 64 | ?assertEqual(<<1,2>>, avro:to_term(Value)). 65 | 66 | new_error_test() -> 67 | Type = avro_fixed:type("FooBar", 2), 68 | ?assertException(error, bad_size, 69 | avro_fixed:new(Type, <<"abc">>)). 70 | 71 | integer_out_of_range_test() -> 72 | Type = avro_fixed:type("FooBar", 2), 73 | ?assertException(error, integer_out_of_range, 74 | avro_fixed:new(Type, 65536)). 75 | 76 | cast_error_test() -> 77 | Type = avro_fixed:type("FooBar", 2), 78 | ?assertEqual({error, {cast_error, Type, "a"}}, 79 | avro_fixed:cast(Type, "a")). 80 | 81 | %%%_* Emacs ==================================================================== 82 | %%% Local Variables: 83 | %%% allout-layout: t 84 | %%% erlang-indent-level: 2 85 | %%% End: 86 | -------------------------------------------------------------------------------- /changelog.md: -------------------------------------------------------------------------------- 1 | * 2.11.0 2 | - Deleted jsone as dependency. 3 | When OTP release is 27 or later, the default JSON provider module is `json`, otherwise `jsone`. 4 | For OTP release 26 or earlier version, you must add `jsone-1.8.1` or newer in your project dependency. 5 | For OTP release 27 or later, you can choose to continue using jsone by calling `avro:set_json_provider(jsone)`. 6 | 7 | * 2.10.3 8 | - Allow union type to have zero member types. 9 | * 2.10.2 10 | - Fix bytes and fixed JSON value decode 11 | * 2.10.1 12 | - Fix dialyzer error. 13 | * 2.10.0 14 | - Add map as avro store, and use it as default. 15 | - Changed to store type aliases as type's full name index, so the type store map (or dict) is less bloated. 16 | * 2.9.10 17 | - Optimize avro:is_same_type/2 18 | - Upgrade jsone to 1.8.1 19 | * 2.9.9 20 | - Enable builds with gcc-13 21 | - Improve `@aliases` support 22 | * 2.9.8 23 | - Removed support for Rebar 2 24 | * 2.9.7 25 | - Export OCF make_header and make_block functions 26 | * 2.9.6 27 | - Add snappy compression for OCF 28 | * 2.9.5 29 | - Removed workarounds for OTP < 21 30 | - Add map() to the avro:out() union type 31 | * 2.9.4 32 | - Dropped `?MODULE` from required error in erlavro.hrl to allow elixir to extract record details 33 | * 2.9.3 34 | - Add OCF decoder hook 35 | - Dialyzer fixes 36 | * 2.9.2 37 | - Move test files from priv to test/data 38 | * 2.9.1 39 | - Add `true` and `false` to the atom exceptions when converting to strings 40 | * 2.9.0 41 | - Encode atom values for string types when not `nil` or `null` 42 | * 2.8.3 43 | - Allow arbitrary atoms in record_opt_name() 44 | * 2.8.2 45 | - Encode atom `nil` as "null" for Elixir consumers 46 | * 2.8.1 47 | - Support 'object' as custom type properties 48 | * 2.8.0 49 | - Improve varint encoding performance 50 | - Support erlang map for avro record and map type in encoder 51 | - Allow decoder to decode avro record and map type as erlang map 52 | based on `record_type` and `map_type` options. 53 | * 2.7.1 54 | - Issue #81 allow missing codec in ocf 55 | * 2.7.0 56 | - Add schema compatibility check `avro:is_compatible/2` 57 | * 2.6.5 58 | - Refine macros for `get_stacktrace` deprecation 59 | * 2.6.4 60 | - Do not conditionally export functions 61 | * 2.6.3 62 | - Upgrade jsone from 1.4.3 to 1.4.6 (for OTP-21) 63 | * 2.6.2 64 | - Support Erlang 21.0 stacktraces 65 | * 2.6.1 66 | - Allow `"null"` string as default value for 'null' type in union type record fields (contributer bka9) 67 | * 2.6.0 68 | - Support Parsing Canonical Form for Schemas (contributer congini/@reachfh) 69 | * 2.5.0 70 | - Add an Erlang implementation of the Avro CRC 64 fingerprint algorithm, avro:crc64_fingerprint/1. 71 | - Add `avro:make_simple_decoder/2` and `avro:make_simple_encoder/2` to simplify most common use cases. 72 | `avro:make_decoder/2` and `avro:make_encoder/2` are not deprecated and can still be used when 73 | there is a need to use the same decoer/encoder for multiple schema. 74 | - Remove fullname-derived-namespace = namespace-in-type-attributes assertition (#60) 75 | - Add avro_ocf:make_ocf/2 to allow building ocf content in memory 76 | * 2.4.0 77 | - Support dict schema store, avro:make_decoder and avro:make_encoder accepts JSON directly 78 | - Fix default value encode/decode and validation 79 | Encoder now makes use of record field default value in case the field is mssing in input 80 | - Type reference validation options for avro:decode_schema/2 81 | - Changed default build from rebar to rebar3 82 | -------------------------------------------------------------------------------- /scripts/cover-print-not-covered-lines.escript: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env escript 2 | %% -*- erlang -*- 3 | %%! -pa _build/default/lib/erlavro/ebin 4 | 5 | %%% 6 | %%% Copyright (c) 2015-2017, Klarna AB 7 | %%% 8 | %%% Licensed under the Apache License, Version 2.0 (the "License"); 9 | %%% you may not use this file except in compliance with the License. 10 | %%% You may obtain a copy of the License at 11 | %%% 12 | %%% http://www.apache.org/licenses/LICENSE-2.0 13 | %%% 14 | %%% Unless required by applicable law or agreed to in writing, software 15 | %%% distributed under the License is distributed on an "AS IS" BASIS, 16 | %%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | %%% See the License for the specific language governing permissions and 18 | %%% limitations under the License. 19 | %%% 20 | 21 | -mode(compile). 22 | 23 | main([]) -> 24 | io:format(user, "expecting at least one coverdata file\n", []), 25 | halt(1); 26 | main(Files) -> 27 | ok = import_coverdata(Files), 28 | Modules = get_imported_modules(), 29 | Result = [{Mod, analyse_module(Mod)} || Mod <- Modules], 30 | lists:foreach(fun({Module, NotCoveredLines}) -> 31 | print_mod_summary(Module, lists:sort(NotCoveredLines)) 32 | end, Result). 33 | 34 | import_coverdata([]) -> ok; 35 | import_coverdata([Filename | Rest]) -> 36 | io:format(user, "Importing coverdata file: ~s\n", [Filename]), 37 | Parent = self(), 38 | Ref = make_ref(), 39 | erlang:spawn_link( 40 | fun() -> 41 | %% shutup the chatty prints from cover:xxx calls 42 | {ok, F} = file:open("/dev/null", [write]), 43 | group_leader(F, self()), 44 | ok = cover:import(Filename), 45 | Parent ! {ok, Ref}, 46 | %% keep it alive 47 | receive stop -> 48 | exit(normal) 49 | end 50 | end), 51 | receive 52 | {ok, Ref} -> 53 | import_coverdata(Rest) 54 | end. 55 | 56 | get_imported_modules() -> 57 | All = cover:imported_modules(), 58 | Filtered = 59 | lists:filter( 60 | fun(Mod) -> 61 | case lists:reverse(atom_to_list(Mod)) of 62 | "ETIUS_" ++ _ -> false; %% ignore coverage for xxx_SUITE 63 | _ -> true 64 | end 65 | end, All), 66 | lists:sort(Filtered). 67 | 68 | analyse_module(Module) -> 69 | {ok, Lines} = cover:analyse(Module, coverage, line), 70 | lists:foldr( 71 | fun({{_Mod, 0}, _}, Acc) -> Acc; 72 | ({{_Mod, _Line}, {1, 0}}, Acc) -> Acc; 73 | ({{_Mod, Line}, {0, 1}}, Acc) -> [Line | Acc] 74 | end, [], Lines). 75 | 76 | print_mod_summary(_Module, []) -> ok; 77 | print_mod_summary(Module, NotCoveredLines) -> 78 | io:format(user, "================ ~p ================\n", [Module]), 79 | case whicherl(Module) of 80 | Filename when is_list(Filename) -> 81 | print_lines(Filename, NotCoveredLines); 82 | _ -> 83 | erlang:error({erl_file_not_found, Module}) 84 | end. 85 | 86 | print_lines(_Filename, []) -> 87 | ok; 88 | print_lines(Filename, Lines) -> 89 | {ok, Fd} = file:open(Filename, [read]), 90 | try 91 | print_lines(Fd, 1, Lines) 92 | after 93 | file:close(Fd) 94 | end. 95 | 96 | print_lines(_Fd, _N, []) -> 97 | ok; 98 | print_lines(Fd, N, [M | Rest] = Lines) -> 99 | Continue = 100 | case io:get_line(Fd, "") of 101 | eof -> 102 | erlang:error({eof, N, Lines}); 103 | Line when N =:= M -> 104 | io:format(user, "~5p: ~s", [N, Line]), 105 | Rest; 106 | _ -> 107 | Lines 108 | end, 109 | print_lines(Fd, N+1, Continue). 110 | 111 | whicherl(Module) when is_atom(Module) -> 112 | {ok, {Module, [{compile_info, Props}]}} = 113 | beam_lib:chunks(code:which(Module), [compile_info]), 114 | proplists:get_value(source, Props). 115 | 116 | %%%_* Emacs ==================================================================== 117 | %%% Local Variables: 118 | %%% allout-layout: t 119 | %%% erlang-indent-level: 2 120 | %%% End: 121 | -------------------------------------------------------------------------------- /test/avro_fingerprint_tests.erl: -------------------------------------------------------------------------------- 1 | %% coding: latin-1 2 | %%%------------------------------------------------------------------- 3 | %%% Copyright (c) 2018 Klarna AB 4 | %%% 5 | %%% This file is provided to you under the Apache License, 6 | %%% Version 2.0 (the "License"); you may not use this file 7 | %%% except in compliance with the License. You may obtain 8 | %%% a copy of the License at 9 | %%% 10 | %%% http://www.apache.org/licenses/LICENSE-2.0 11 | %%% 12 | %%% Unless required by applicable law or agreed to in writing, 13 | %%% software distributed under the License is distributed on an 14 | %%% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | %%% KIND, either express or implied. See the License for the 16 | %%% specific language governing permissions and limitations 17 | %%% under the License. 18 | %%%------------------------------------------------------------------- 19 | -module(avro_fingerprint_tests). 20 | 21 | -include_lib("eunit/include/eunit.hrl"). 22 | 23 | % Run Java test cases from the Avro project: 24 | % https://github.com/apache/avro/blob/master/share/test/data/schema-tests.txt 25 | % The Java tests use literal signed integers, so we convert to binary when 26 | % comparing so we can use the same literal. 27 | 28 | bin(Value) -> 29 | <>. 30 | 31 | crc64(Bin) -> 32 | bin(avro:crc64_fingerprint(Bin)). 33 | 34 | java_000_test() -> 35 | ?assertEqual(bin(7195948357588979594), crc64(<<"\"null\"">>)). 36 | 37 | java_002_test() -> 38 | ?assertEqual(bin(-6970731678124411036), crc64(<<"\"boolean\"">>)). 39 | 40 | java_004_test() -> 41 | ?assertEqual(bin(8247732601305521295), crc64(<<"\"int\"">>)). 42 | 43 | java_006_test() -> 44 | ?assertEqual(bin(-3434872931120570953), crc64(<<"\"long\"">>)). 45 | 46 | java_010_test() -> 47 | ?assertEqual(bin(-8181574048448539266), crc64(<<"\"double\"">>)). 48 | 49 | java_012_test() -> 50 | ?assertEqual(bin(5746618253357095269), crc64(<<"\"bytes\"">>)). 51 | 52 | java_014_test() -> 53 | ?assertEqual(bin(-8142146995180207161), crc64(<<"\"string\"">>)). 54 | 55 | java_016_test() -> 56 | ?assertEqual(bin(-1241056759729112623), crc64(<<"[]">>)). 57 | 58 | java_017_test() -> 59 | ?assertEqual(bin(-5232228896498058493), crc64(<<"[\"int\"]">>)). 60 | 61 | java_018_test() -> 62 | ?assertEqual(bin(5392556393470105090), crc64(<<"[\"int\",\"boolean\"]">>)). 63 | 64 | java_019_test() -> 65 | ?assertEqual(bin(-4824392279771201922), 66 | crc64(<<"{\"name\":\"foo\",\"type\":\"record\",\"fields\":[]}">>)). 67 | 68 | java_020_test() -> 69 | ?assertEqual(bin(5916914534497305771), 70 | crc64(<<"{\"name\":\"x.y.foo\",\"type\":\"record\",\"fields\":[]}">>)). 71 | 72 | java_021_test() -> 73 | ?assertEqual(bin(-4616218487480524110), 74 | crc64(<<"{\"name\":\"a.b.foo\",\"type\":\"record\",\"fields\":[]}">>)). 75 | 76 | java_022_test() -> 77 | ?assertEqual(bin(-4824392279771201922), 78 | crc64(<<"{\"name\":\"foo\",\"type\":\"record\",\"fields\":[]}">>)). 79 | 80 | java_025_test() -> 81 | ?assertEqual(bin(7843277075252814651), 82 | crc64(<<"{\"name\":\"foo\",\"type\":\"record\",\"fields\":[" 83 | "{\"name\":\"f1\",\"type\":\"boolean\"}]}">>)). 84 | 85 | java_026_test() -> 86 | ?assertEqual(bin(-4860222112080293046), 87 | crc64(<<"{\"name\":\"foo\",\"type\":\"record\",\"fields\":[" 88 | "{\"name\":\"f1\",\"type\":\"boolean\"}," 89 | "{\"name\":\"f2\",\"type\":\"int\"}]}">>)). 90 | 91 | java_027_test() -> 92 | ?assertEqual(bin(-6342190197741309591), 93 | crc64(<<"{\"name\":\"foo\",\"type\":\"enum\",\"symbols\":[\"A1\"]}">>)). 94 | 95 | java_028_test() -> 96 | ?assertEqual(bin(-4448647247586288245), 97 | crc64(<<"{\"name\":\"x.y.z.foo\",\"type\":\"enum\",\"symbols\":[" 98 | "\"A1\",\"A2\"]}">>)). 99 | 100 | java_029_test() -> 101 | ?assertEqual(bin(1756455273707447556), 102 | crc64(<<"{\"name\":\"foo\",\"type\":\"fixed\",\"size\":15}">>)). 103 | 104 | java_030_test() -> 105 | ?assertEqual(bin(-3064184465700546786), 106 | crc64(<<"{\"name\":\"x.y.z.foo\",\"type\":\"fixed\"," 107 | "\"size\":32}">>)). 108 | 109 | java_031_test() -> 110 | ?assertEqual(bin(-589620603366471059), 111 | crc64(<<"{\"type\":\"array\",\"items\":\"null\"}">>)). 112 | 113 | java_032_test() -> 114 | ?assertEqual(bin(-8732877298790414990), 115 | crc64(<<"{\"type\":\"map\",\"values\":\"string\"}">>)). 116 | 117 | java_033_test() -> 118 | ?assertEqual(bin(-1759257747318642341), 119 | crc64(<<"{\"name\":\"PigValue\",\"type\":\"record\",\"fields\":[" 120 | "{\"name\":\"value\",\"type\":[" 121 | "\"null\",\"int\",\"long\",\"PigValue\"]}]}">>)). 122 | 123 | for_coverage_test() -> 124 | lists:foreach(fun(I) -> crc64(integer_to_binary(I)) end, lists:seq(1, 10000)). 125 | 126 | %%%_* Emacs ==================================================================== 127 | %%% Local Variables: 128 | %%% allout-layout: t 129 | %%% erlang-indent-level: 2 130 | %%% End: 131 | -------------------------------------------------------------------------------- /src/avro_fixed.erl: -------------------------------------------------------------------------------- 1 | %%%----------------------------------------------------------------------------- 2 | %%% 3 | %%% Copyright (c) 2013-2017 Klarna AB 4 | %%% 5 | %%% This file is provided to you under the Apache License, 6 | %%% Version 2.0 (the "License"); you may not use this file 7 | %%% except in compliance with the License. You may obtain 8 | %%% a copy of the License at 9 | %%% 10 | %%% http://www.apache.org/licenses/LICENSE-2.0 11 | %%% 12 | %%% Unless required by applicable law or agreed to in writing, 13 | %%% software distributed under the License is distributed on an 14 | %%% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | %%% KIND, either express or implied. See the License for the 16 | %%% specific language governing permissions and limitations 17 | %%% under the License. 18 | %%% 19 | %%% @author Ilya Staheev 20 | %%% @doc Avro fixed type implementation. 21 | %%% Internal data for fixed values is a binary value. 22 | %%% @end 23 | %%%----------------------------------------------------------------------------- 24 | 25 | -module(avro_fixed). 26 | 27 | %% API 28 | -export([ cast/2 29 | , get_size/1 30 | , get_value/1 31 | , new/2 32 | , resolve_fullname/2 33 | , type/2 34 | , type/3 35 | ]). 36 | 37 | -include("avro_internal.hrl"). 38 | 39 | %%%_* APIs ===================================================================== 40 | 41 | %% @doc Declare a fixed type with default properties. 42 | -spec type(name_raw(), pos_integer()) -> fixed_type(). 43 | type(Name, Size) -> 44 | type(Name, Size, []). 45 | 46 | %% @doc Declare a fixed type. 47 | -spec type(name_raw(), pos_integer(), type_props()) -> fixed_type(). 48 | type(Name0, Size, Opts) -> 49 | Ns0 = avro_util:get_opt(namespace, Opts, ?NS_GLOBAL), 50 | {Name, Ns} = avro:split_type_name(Name0, Ns0), 51 | Aliases = avro_util:get_opt(aliases, Opts, []), 52 | ok = avro_util:verify_aliases(Aliases), 53 | ?ERROR_IF(not is_integer(Size) orelse Size < 1, {invalid_size, Size}), 54 | Type = #avro_fixed_type 55 | { name = Name 56 | , namespace = Ns 57 | , aliases = avro_util:canonicalize_aliases(Aliases, Ns) 58 | , size = Size 59 | , fullname = avro:build_type_fullname(Name, Ns) 60 | , custom = avro_util:canonicalize_custom_props(Opts) 61 | }, 62 | ok = avro_util:verify_type(Type), 63 | Type. 64 | 65 | %% @doc Resolve fullname by newly discovered enclosing namespace. 66 | -spec resolve_fullname(fixed_type(), namespace()) -> fixed_type(). 67 | resolve_fullname(#avro_fixed_type{ fullname = FullName 68 | , aliases = Aliases 69 | } = T, Ns) -> 70 | NewFullname = avro:build_type_fullname(FullName, Ns), 71 | NewAliases = avro_util:canonicalize_aliases(Aliases, Ns), 72 | T#avro_fixed_type{ fullname = NewFullname 73 | , aliases = NewAliases 74 | }. 75 | 76 | %% @doc Get size of the declared type. 77 | -spec get_size(fixed_type()) -> pos_integer(). 78 | get_size(#avro_fixed_type{ size = Size }) -> Size. 79 | 80 | %% @doc Create a wrapped (boxed) value. 81 | -spec new(fixed_type(), avro:in()) -> avro_value() | no_return(). 82 | new(Type, Value) when ?IS_FIXED_TYPE(Type) -> 83 | case cast(Type, Value) of 84 | {ok, Rec} -> Rec; 85 | {error, Err} -> erlang:error(Err) 86 | end. 87 | 88 | %% @doc Return (non-recursive) data in the wrapped (boxed) value. 89 | -spec get_value(avro_value()) -> binary(). 90 | get_value(Value) when ?IS_FIXED_VALUE(Value) -> 91 | ?AVRO_VALUE_DATA(Value). 92 | 93 | %% @doc Fixed values can be casted from other fixed values, from integers 94 | %% or from binaries. 95 | %% @end 96 | -spec cast(avro_type(), term()) -> {ok, avro_value()} | {error, term()}. 97 | cast(Type, Value) when ?IS_FIXED_TYPE(Type) -> 98 | do_cast(Type, Value). 99 | 100 | %%%_* Internal functions ======================================================= 101 | 102 | %% @private 103 | -spec integer_to_fixed(pos_integer(), non_neg_integer()) -> binary(). 104 | integer_to_fixed(Size, Integer) -> 105 | Bin = binary:encode_unsigned(Integer), 106 | true = (Size >= size(Bin)), 107 | PadSize = (Size - size(Bin)) * 8, 108 | <<0:PadSize, Bin/binary>>. 109 | 110 | %% @private 111 | do_cast(Type, Value) when is_integer(Value) -> 112 | #avro_fixed_type{ size = Size } = Type, 113 | case Value >= 0 andalso Value < (1 bsl (8 * Size)) of 114 | true -> 115 | do_cast(Type, integer_to_fixed(Size, Value)); 116 | false -> 117 | {error, integer_out_of_range} 118 | end; 119 | do_cast(Type, Value) when is_binary(Value) -> 120 | #avro_fixed_type{ size = Size } = Type, 121 | case size(Value) =:= Size of 122 | true -> {ok, ?AVRO_VALUE(Type, Value)}; 123 | false -> {error, bad_size} 124 | end; 125 | do_cast(Type, Value) -> 126 | {error, {cast_error, Type, Value}}. 127 | 128 | %%%_* Emacs ==================================================================== 129 | %%% Local Variables: 130 | %%% allout-layout: t 131 | %%% erlang-indent-level: 2 132 | %%% End: 133 | -------------------------------------------------------------------------------- /test/avro_decoder_hooks_tests.erl: -------------------------------------------------------------------------------- 1 | %%%------------------------------------------------------------------- 2 | %%% Copyright (c) 2013-2016 Klarna AB 3 | %%% 4 | %%% This file is provided to you under the Apache License, 5 | %%% Version 2.0 (the "License"); you may not use this file 6 | %%% except in compliance with the License. You may obtain 7 | %%% a copy of the License at 8 | %%% 9 | %%% http://www.apache.org/licenses/LICENSE-2.0 10 | %%% 11 | %%% Unless required by applicable law or agreed to in writing, 12 | %%% software distributed under the License is distributed on an 13 | %%% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | %%% KIND, either express or implied. See the License for the 15 | %%% specific language governing permissions and limitations 16 | %%% under the License. 17 | %%% 18 | %%%------------------------------------------------------------------- 19 | -module(avro_decoder_hooks_tests). 20 | 21 | -include("avro_internal.hrl"). 22 | -include_lib("eunit/include/eunit.hrl"). 23 | 24 | debug_hook_test_() -> 25 | [ {"json", fun() -> test_debug_hook(avro_json) end} 26 | , {"binary", fun() -> test_debug_hook(avro_binary) end} 27 | ]. 28 | 29 | test_debug_hook(Encoding) -> 30 | CodecOptions = [{encoding, Encoding}], 31 | LogFun = fun(IoData) -> io:put_chars(user, IoData) end, 32 | HistLen = 10, 33 | Hook = avro_decoder_hooks:print_debug_trace(LogFun, HistLen), 34 | Union = avro_union:type([null, int]), 35 | MyRecordType = 36 | avro_record:type("MyRecord", 37 | [ define_field("f1", int) 38 | , define_field("f2", Union) 39 | , define_field("f3", string) 40 | ], 41 | [{namespace, "com.example"}]), 42 | Store = avro_schema_store:add_type(MyRecordType, avro_schema_store:new([])), 43 | Encoder = avro:make_encoder(Store, CodecOptions), 44 | Term = [{"f1", 1}, {"f3", "my-string"}, {"f2", 32}], 45 | Bin = iolist_to_binary(Encoder("com.example.MyRecord", Term)), 46 | %% Mkae a corrupted binary to decode 47 | CorruptedBin = corrupt_encoded(Encoding, Bin), 48 | Decoder = avro:make_decoder(Store, [{hook, Hook} | CodecOptions]), 49 | ?assertException(_Class, {'$hook-raised', _}, 50 | Decoder("com.example.MyRecord", CorruptedBin)), 51 | ok. 52 | 53 | tag_unions_test() -> 54 | CodecOptions = [{encoding, avro_binary}], 55 | Hook = avro_decoder_hooks:tag_unions(), 56 | Fixed = avro_fixed:type("myfixed", 2), 57 | Map = avro_map:type(int), 58 | Array = avro_array:type(int), 59 | Field = avro_record:define_field("rf1", int), 60 | Record = avro_record:type("MySubRec", [Field]), 61 | Union = avro_union:type([ null 62 | , Fixed 63 | , Map 64 | , Array 65 | , Record 66 | ]), 67 | ArrayOfUnion = avro_array:type(Union), 68 | MyRecordType = 69 | avro_record:type("MyRecord", 70 | [ define_field("f1", Array) 71 | , define_field("f2", ArrayOfUnion) 72 | ], 73 | [{namespace, "com.example"}]), 74 | Store = avro_schema_store:add_type(MyRecordType, avro_schema_store:new([])), 75 | Encoder = avro:make_encoder(Store, CodecOptions), 76 | Input = [ {"f1", [1, 2, 3]} 77 | , {"f2", [ null %% null 78 | , <<"32">> %% fixed 79 | , {"com.example.MySubRec", [{"rf1", 42}]} %% record 80 | , {"com.example.MySubRec", [{"rf1", 43}]} %% record 81 | , [4, 5, 6] %% array 82 | , [{"k", 1}] %% map 83 | ]} 84 | ], 85 | Bin = iolist_to_binary(Encoder("com.example.MyRecord", Input)), 86 | Decoder = avro:make_decoder(Store, [{hook, Hook} | CodecOptions]), 87 | ?assertEqual( 88 | [ {<<"f1">>, [1, 2, 3]} 89 | , {<<"f2">>, [ null 90 | , {<<"com.example.myfixed">>, <<"32">>} 91 | , {<<"com.example.MySubRec">>, [{<<"rf1">>, 42}]} 92 | , {<<"com.example.MySubRec">>, [{<<"rf1">>, 43}]} 93 | , [4, 5, 6] 94 | , [{<<"k">>, 1}] 95 | ]} 96 | ], Decoder("com.example.MyRecord", Bin)). 97 | 98 | %% @private 99 | corrupt_encoded(avro_binary, Bin) -> 100 | %% for binary format, chopping off the last byte should corrupt the data 101 | %% because the last element is a string, the missing byte 102 | %% should violate the encoded string length check 103 | BadSize = size(Bin) - 1, 104 | <>; 105 | corrupt_encoded(avro_json, Bin) -> 106 | %% for json, replace the last string with an integer 107 | %% to violate the type check 108 | binary:replace(Bin, <<"\"my-string\"">>, <<"42">>). 109 | 110 | %% @private 111 | define_field(Name, Type) -> avro_record:define_field(Name, Type). 112 | 113 | %%%_* Emacs ==================================================================== 114 | %%% Local Variables: 115 | %%% allout-layout: t 116 | %%% erlang-indent-level: 2 117 | %%% End: 118 | -------------------------------------------------------------------------------- /include/erlavro.hrl: -------------------------------------------------------------------------------- 1 | %%%----------------------------------------------------------------------------- 2 | %%% Copyright (c) 2013-2017 Klarna AB 3 | %%% 4 | %%% This file is provided to you under the Apache License, 5 | %%% Version 2.0 (the "License"); you may not use this file 6 | %%% except in compliance with the License. You may obtain 7 | %%% a copy of the License at 8 | %%% 9 | %%% http://www.apache.org/licenses/LICENSE-2.0 10 | %%% 11 | %%% Unless required by applicable law or agreed to in writing, 12 | %%% software distributed under the License is distributed on an 13 | %%% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | %%% KIND, either express or implied. See the License for the 15 | %%% specific language governing permissions and limitations 16 | %%% under the License. 17 | %%%----------------------------------------------------------------------------- 18 | 19 | %% Include for decoder hook implementation. 20 | 21 | -ifndef(_ERLAVRO_HRL_). 22 | -define(_ERLAVRO_HRL_, true). 23 | 24 | %% Names of primitive types 25 | -define(AVRO_NULL, <<"null">>). 26 | -define(AVRO_BOOLEAN, <<"boolean">>). 27 | -define(AVRO_INT, <<"int">>). 28 | -define(AVRO_LONG, <<"long">>). 29 | -define(AVRO_FLOAT, <<"float">>). 30 | -define(AVRO_DOUBLE, <<"double">>). 31 | -define(AVRO_BYTES, <<"bytes">>). 32 | -define(AVRO_STRING, <<"string">>). 33 | %% Other reserved types names 34 | -define(AVRO_RECORD, <<"record">>). 35 | -define(AVRO_ENUM, <<"enum">>). 36 | -define(AVRO_ARRAY, <<"array">>). 37 | -define(AVRO_MAP, <<"map">>). 38 | -define(AVRO_UNION, <<"union">>). 39 | -define(AVRO_FIXED, <<"fixed">>). 40 | 41 | -define(IS_AVRO_PRIMITIVE_NAME(N), 42 | (N =:= ?AVRO_NULL orelse 43 | N =:= ?AVRO_BOOLEAN orelse 44 | N =:= ?AVRO_INT orelse 45 | N =:= ?AVRO_LONG orelse 46 | N =:= ?AVRO_STRING orelse 47 | N =:= ?AVRO_FLOAT orelse 48 | N =:= ?AVRO_DOUBLE orelse 49 | N =:= ?AVRO_BYTES)). 50 | 51 | -define(AVRO_REQUIRED, erlang:error({required_field_missed, ?LINE})). 52 | 53 | -define(AVRO_NS_GLOBAL, <<"">>). 54 | -define(AVRO_NO_DOC, <<"">>). 55 | 56 | -record(avro_primitive_type, 57 | { name = ?AVRO_REQUIRED :: avro:name() 58 | , custom = [] :: [avro:custom_prop()] 59 | }). 60 | 61 | -record(avro_record_type, 62 | { name = ?AVRO_REQUIRED :: avro:name() 63 | , namespace = ?AVRO_NS_GLOBAL :: avro:namespace() 64 | , doc = ?AVRO_NO_DOC :: avro:typedoc() 65 | , aliases = [] :: [avro:name()] 66 | , fields = ?AVRO_REQUIRED :: [avro:record_field()] 67 | , fullname = ?AVRO_REQUIRED :: avro:fullname() 68 | , custom = [] :: [avro:custom_prop()] 69 | }). 70 | 71 | -record(avro_enum_type, 72 | { name = ?AVRO_REQUIRED :: avro:name() 73 | , namespace = ?AVRO_NS_GLOBAL :: avro:namespace() 74 | , aliases = [] :: [avro:name()] 75 | , doc = ?AVRO_NO_DOC :: avro:typedoc() 76 | , symbols = ?AVRO_REQUIRED :: [avro:enum_symbol()] 77 | , fullname = ?AVRO_REQUIRED :: avro:fullname() 78 | , custom = [] :: [avro:custom_prop()] 79 | }). 80 | 81 | -record(avro_array_type, 82 | { type = ?AVRO_REQUIRED :: avro:type_or_name() 83 | , custom = [] :: [avro:custom_prop()] 84 | }). 85 | 86 | -record(avro_map_type, 87 | { type = ?AVRO_REQUIRED :: avro:type_or_name() 88 | , custom = [] :: [avro:custom_prop()] 89 | }). 90 | 91 | -record(avro_union_type, 92 | { id2type = ?AVRO_REQUIRED :: avro_union:id2type() 93 | , name2id = ?AVRO_REQUIRED :: avro_union:name2id() 94 | }). 95 | 96 | -record(avro_fixed_type, 97 | { name = ?AVRO_REQUIRED :: avro:name() 98 | , namespace = ?AVRO_NS_GLOBAL :: avro:namespace() 99 | , aliases = [] :: [avro:name()] 100 | , size = ?AVRO_REQUIRED :: pos_integer() 101 | , fullname = ?AVRO_REQUIRED :: avro:fullname() 102 | , custom = [] :: [avro:custom_prop()] 103 | }). 104 | 105 | -record(avro_value, 106 | { type :: avro:type_or_name() 107 | , data :: avro:avro_value() 108 | }). 109 | 110 | -type avro_value() :: avro:canonicalized_value() %% primitive, fixed, enum 111 | | #avro_value{} %% union 112 | | [#avro_value{}] %% array 113 | | [{avro:name(), #avro_value{}}] %% record 114 | | avro_map:data() %% map 115 | | {json, binary()} %% serialized 116 | | {binary, binary()}. %% serialized 117 | 118 | -define(IS_AVRO_VALUE(Value), is_record(Value, avro_value)). 119 | -define(AVRO_VALUE(Type,Data), #avro_value{type = Type, data = Data}). 120 | -define(AVRO_VALUE_TYPE(Value), Value#avro_value.type). 121 | -define(AVRO_VALUE_DATA(Value), Value#avro_value.data). 122 | 123 | -type avro_encoding() :: avro_json | avro_binary. 124 | 125 | -define(AVRO_ENCODED_VALUE_JSON(Type, Value), 126 | ?AVRO_VALUE(Type, {json, Value})). 127 | -define(AVRO_ENCODED_VALUE_BINARY(Type, Value), 128 | ?AVRO_VALUE(Type, {binary, Value})). 129 | 130 | -define(AVRO_DEFAULT_DECODER_HOOK, 131 | fun(__Type__, __SubNameOrId__, Data, DecodeFun) -> DecodeFun(Data) end). 132 | 133 | -endif. 134 | 135 | %%%_* Emacs ==================================================================== 136 | %%% Local Variables: 137 | %%% allout-layout: t 138 | %%% erlang-indent-level: 2 139 | %%% End: 140 | -------------------------------------------------------------------------------- /src/avro_map.erl: -------------------------------------------------------------------------------- 1 | %%%----------------------------------------------------------------------------- 2 | %%% 3 | %%% Copyright (c) 2013-2018 Klarna AB 4 | %%% 5 | %%% This file is provided to you under the Apache License, 6 | %%% Version 2.0 (the "License"); you may not use this file 7 | %%% except in compliance with the License. You may obtain 8 | %%% a copy of the License at 9 | %%% 10 | %%% http://www.apache.org/licenses/LICENSE-2.0 11 | %%% 12 | %%% Unless required by applicable law or agreed to in writing, 13 | %%% software distributed under the License is distributed on an 14 | %%% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | %%% KIND, either express or implied. See the License for the 16 | %%% specific language governing permissions and limitations 17 | %%% under the License. 18 | %%% 19 | %%% @author Ilya Staheev 20 | %%% @doc Handling of Avro maps. 21 | %%% Data are kept internally as a #{binary() -> avro_value()} 22 | %%% @end 23 | %%%----------------------------------------------------------------------------- 24 | 25 | -module(avro_map). 26 | 27 | %% API 28 | -export([ cast/2 29 | , encode/3 30 | , get_items_type/1 31 | , new/2 32 | , resolve_fullname/2 33 | , to_list/1 34 | , to_term/1 35 | , type/1 36 | , type/2 37 | , update_items_type/2 38 | ]). 39 | 40 | -export_type([data/0]). 41 | 42 | -include("avro_internal.hrl"). 43 | 44 | -define(IS_IOLIST(S), (is_list(S) orelse is_binary(S))). 45 | 46 | -type key() :: binary(). 47 | -type value() :: avro_value(). 48 | -type data() :: #{key() => value()}. 49 | 50 | -type input_key() :: avro:name_raw(). 51 | -type input_value() :: avro:in(). 52 | -type input_data() :: [{input_key(), input_value()}] | 53 | #{input_key() => input_value()}. 54 | 55 | %%%_* APIs ===================================================================== 56 | 57 | %% @doc Define a map type. 58 | -spec type(avro_type()) -> map_type(). 59 | type(Type) -> type(Type, []). 60 | 61 | %% @doc Define a map type with given custom properties. 62 | -spec type(avro_type(), [custom_prop()]) -> map_type(). 63 | type(Type, CustomProps) -> 64 | #avro_map_type{ type = avro_util:canonicalize_type_or_name(Type) 65 | , custom = avro_util:canonicalize_custom_props(CustomProps) 66 | }. 67 | 68 | %% @doc Resolve fullname by newly discovered enclosing namespace. 69 | -spec resolve_fullname(map_type(), namespace()) -> map_type(). 70 | resolve_fullname(Map, Ns) -> 71 | F = fun(T) -> avro:resolve_fullname(T, Ns) end, 72 | update_items_type(Map, F). 73 | 74 | %% @doc Return the map-value's type definition. 75 | -spec get_items_type(map_type()) -> avro_type(). 76 | get_items_type(#avro_map_type{ type = SubType }) -> 77 | SubType. 78 | 79 | %% @doc Evaluate callback to update itmes type. 80 | -spec update_items_type(map_type(), fun((type_or_name()) -> type_or_name())) -> 81 | map_type(). 82 | update_items_type(#avro_map_type{type = IT} = T, F) -> 83 | T#avro_map_type{type = F(IT)}. 84 | 85 | %% @doc Create a new typed (boxed) value. 86 | %% Raise an 'error' in case of failure. 87 | %% @end 88 | -spec new(map_type(), input_data()) -> avro_value() | no_return(). 89 | new(Type, Data) when ?IS_MAP_TYPE(Type) -> 90 | case cast(Type, Data) of 91 | {ok, Value} -> Value; 92 | {error, Err} -> erlang:error(Err) 93 | end. 94 | 95 | %% @doc Recursively unbox the wrapped avro_value(). 96 | -spec to_term(avro_value()) -> [{key(), value()}]. 97 | to_term(Map) -> 98 | [{K, avro:to_term(V)} || {K, V} <- to_list(Map)]. 99 | 100 | %% @hidden Return the typed value as a kv-list. 101 | %% NOTE: The value is not recursively unboxed as what to_term/1 does. 102 | %% @end 103 | -spec to_list(avro_value()) -> [{key(), avro_value()}]. 104 | to_list(Value) when ?IS_MAP_VALUE(Value) -> 105 | lists:keysort(1, maps:to_list(?AVRO_VALUE_DATA(Value))). 106 | 107 | %% @hidden Value is other Avro map value or a kv-list with iolist keys. 108 | -spec cast(avro_type(), input_data()) -> {ok, avro_value()} | {error, term()}. 109 | cast(Type, Value) when ?IS_MAP_TYPE(Type) -> 110 | do_cast(Type, Value). 111 | 112 | %% @hidden 113 | -spec encode(type_or_name(), input_data(), fun()) -> iolist(). 114 | encode(Type, Value, EncodeFun) when is_map(Value) -> 115 | encode(Type, maps:to_list(Value), EncodeFun); 116 | encode(Type, Value, EncodeFun) -> 117 | ItemsType = avro_map:get_items_type(Type), 118 | lists:map(fun({K, V}) -> 119 | try 120 | EncodeFun(ItemsType, K, V) 121 | catch 122 | C : E : Stacktrace -> 123 | ?RAISE_ENC_ERR(C, E, [{map, Type}, 124 | {key, K}], Stacktrace) 125 | end 126 | end, Value). 127 | 128 | %%%_* Internal Functions ======================================================= 129 | 130 | %% @private 131 | -spec do_cast(map_type(), input_data()) -> 132 | {ok, avro_value()} | {error, any()}. 133 | do_cast(Type, KvList0) when is_list(KvList0) -> 134 | #avro_map_type{type = ItemsType} = Type, 135 | MapFun = 136 | fun({K, V}) -> 137 | Key = ?NAME(K), 138 | Value = case avro:cast(ItemsType, V) of 139 | {ok, CV} -> CV; 140 | {error, Reason} -> throw({?MODULE, Reason}) 141 | end, 142 | {Key, Value} 143 | end, 144 | try 145 | KvList = lists:map(MapFun, KvList0), 146 | {ok, ?AVRO_VALUE(Type, maps:from_list(KvList))} 147 | catch 148 | throw : {?MODULE, Reason} -> 149 | {error, Reason} 150 | end; 151 | do_cast(Type, Map) when is_map(Map) -> 152 | do_cast(Type, maps:to_list(Map)). 153 | 154 | %%%_* Emacs ==================================================================== 155 | %%% Local Variables: 156 | %%% allout-layout: t 157 | %%% erlang-indent-level: 2 158 | %%% End: 159 | -------------------------------------------------------------------------------- /src/avro_array.erl: -------------------------------------------------------------------------------- 1 | %%%----------------------------------------------------------------------------- 2 | %%% Copyright (c) 2013-2018 Klarna AB 3 | %%% 4 | %%% This file is provided to you under the Apache License, 5 | %%% Version 2.0 (the "License"); you may not use this file 6 | %%% except in compliance with the License. You may obtain 7 | %%% a copy of the License at 8 | %%% 9 | %%% http://www.apache.org/licenses/LICENSE-2.0 10 | %%% 11 | %%% Unless required by applicable law or agreed to in writing, 12 | %%% software distributed under the License is distributed on an 13 | %%% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | %%% KIND, either express or implied. See the License for the 15 | %%% specific language governing permissions and limitations 16 | %%% under the License. 17 | %%% 18 | %%% @author Ilya Staheev 19 | %%%----------------------------------------------------------------------------- 20 | -module(avro_array). 21 | 22 | %% API 23 | -export([ cast/2 24 | , encode/3 25 | , get_items/1 26 | , get_items_type/1 27 | , new/1 28 | , new/2 29 | , prepend/2 30 | , resolve_fullname/2 31 | , to_term/1 32 | , type/1 33 | , type/2 34 | , update_items_type/2 35 | ]). 36 | 37 | %% API to be used only inside erlavro 38 | -export([new_direct/2]). 39 | 40 | -include("avro_internal.hrl"). 41 | 42 | %%%_* APIs ===================================================================== 43 | 44 | %% @doc Define array type. 45 | -spec type(type_or_name()) -> array_type(). 46 | type(Type) -> type(Type, []). 47 | 48 | %% @doc Define array type with custom properties. 49 | -spec type(type_or_name(), [custom_prop()]) -> array_type(). 50 | type(Type, CustomProps) -> 51 | #avro_array_type{ type = avro_util:canonicalize_type_or_name(Type) 52 | , custom = avro_util:canonicalize_custom_props(CustomProps) 53 | }. 54 | 55 | %% @doc Resolve children type's fullnames. 56 | -spec resolve_fullname(array_type(), namespace()) -> array_type(). 57 | resolve_fullname(Array, Ns) -> 58 | update_items_type(Array, fun(T) -> avro:resolve_fullname(T, Ns) end). 59 | 60 | %% @doc Update children types by evaluating callback function. 61 | -spec update_items_type(array_type(), 62 | fun((type_or_name()) -> type_or_name())) -> 63 | array_type(). 64 | update_items_type(#avro_array_type{type = ST} = T, F) -> 65 | T#avro_array_type{type = F(ST)}. 66 | 67 | %% @doc Get array element type. 68 | -spec get_items_type(array_type()) -> avro_type(). 69 | get_items_type(ArrayType) when ?IS_ARRAY_TYPE(ArrayType) -> 70 | ArrayType#avro_array_type.type. 71 | 72 | %% @doc Create a wrapped (boxed) empty array avro value. 73 | -spec new(array_type()) -> avro_value(). 74 | new(Type) -> 75 | new(Type, []). 76 | 77 | %% @doc Create a wrapped (boxed) avro value with given array data. 78 | -spec new(array_type(), [term()]) -> avro_value() | no_return(). 79 | new(Type, List) when ?IS_ARRAY_TYPE(Type) -> 80 | case cast(Type, List) of 81 | {ok, Value} -> Value; 82 | {error, Err} -> erlang:error(Err) 83 | end. 84 | 85 | %% @doc Special optimized version of new which assumes that all items in List 86 | %% have been already casted to items type of the array, so we can skip checking 87 | %% types one more time during casting. Should only be used inside erlavro. 88 | %% @end 89 | -spec new_direct(array_type(), [avro:in()]) -> avro_value(). 90 | new_direct(Type, List) when ?IS_ARRAY_TYPE(Type) -> 91 | ?AVRO_VALUE(Type, List). 92 | 93 | %% @doc Returns array contents as a list of avro values. 94 | -spec get_items(avro_value()) -> [avro_value()]. 95 | get_items(Value) when ?IS_ARRAY_VALUE(Value) -> 96 | ?AVRO_VALUE_DATA(Value). 97 | 98 | %% @doc Prepend elements to the array. 99 | -spec prepend([term()], avro_value()) -> avro_value() | no_return(). 100 | prepend(Items0, Value) when ?IS_ARRAY_VALUE(Value) -> 101 | Type = ?AVRO_VALUE_TYPE(Value), 102 | Data = ?AVRO_VALUE_DATA(Value), 103 | #avro_array_type{type = ItemType} = Type, 104 | {ok, Items} = cast_items(ItemType, Items0, []), 105 | new_direct(Type, Items ++ Data). 106 | 107 | %% @hidden Only other Avro array type or erlang list can be casted to arrays. 108 | -spec cast(array_type(), [avro:in()]) -> {ok, avro_value()} | {error, term()}. 109 | cast(Type, Value) when ?IS_ARRAY_TYPE(Type) -> 110 | do_cast(Type, Value). 111 | 112 | %% @hidden Recursively unbox typed value. 113 | -spec to_term(avro_value()) -> list(). 114 | to_term(Array) when ?IS_ARRAY_VALUE(Array) -> 115 | [ avro:to_term(Item) || Item <- ?AVRO_VALUE_DATA(Array) ]. 116 | 117 | %% @hidden Encoder help function. For internal use only. 118 | -spec encode(type_or_name(), list(), fun()) -> list(). 119 | encode(Type, Value, EncodeFun) -> 120 | ItemsType = avro_array:get_items_type(Type), 121 | lists:map(fun(Element) -> EncodeFun(ItemsType, Element) end, Value). 122 | 123 | %%%=================================================================== 124 | %%% Internal functions 125 | %%%=================================================================== 126 | 127 | %% @private 128 | -spec do_cast(array_type(), [avro:in()]) -> 129 | {ok, avro_value()} | {error, term()}. 130 | do_cast(Type, Items) when is_list(Items) -> 131 | #avro_array_type{type = ItemType} = Type, 132 | case cast_items(ItemType, Items, []) of 133 | {ok, ResArray} -> {ok, ?AVRO_VALUE(Type, ResArray)}; 134 | {error, Reason} -> {error, Reason} 135 | end. 136 | 137 | %% @private 138 | -spec cast_items(avro_type(), [term()], [avro_value()]) -> 139 | {ok, [avro_value()]} | {error, any()}. 140 | cast_items(_TargetType, [], Acc) -> 141 | {ok, lists:reverse(Acc)}; 142 | cast_items(TargetType, [Item|H], Acc) -> 143 | case avro:cast(TargetType, Item) of 144 | {ok, Value} -> cast_items(TargetType, H, [Value|Acc]); 145 | Err -> Err 146 | end. 147 | 148 | %%%_* Emacs ============================================================ 149 | %%% Local Variables: 150 | %%% allout-layout: t 151 | %%% erlang-indent-level: 2 152 | %%% End: 153 | -------------------------------------------------------------------------------- /src/avro_primitive.erl: -------------------------------------------------------------------------------- 1 | %%%----------------------------------------------------------------------------- 2 | %%% Copyright (c) 2013-2017 Klarna AB 3 | %%% 4 | %%% This file is provided to you under the Apache License, 5 | %%% Version 2.0 (the "License"); you may not use this file 6 | %%% except in compliance with the License. You may obtain 7 | %%% a copy of the License at 8 | %%% 9 | %%% http://www.apache.org/licenses/LICENSE-2.0 10 | %%% 11 | %%% Unless required by applicable law or agreed to in writing, 12 | %%% software distributed under the License is distributed on an 13 | %%% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | %%% KIND, either express or implied. See the License for the 15 | %%% specific language governing permissions and limitations 16 | %%% under the License. 17 | %%% 18 | %%% @author Ilya Staheev 19 | %%% @doc 20 | %%% 21 | %%% @end 22 | %%%----------------------------------------------------------------------------- 23 | 24 | -module(avro_primitive). 25 | 26 | %% API 27 | -export([ type/2 28 | , boolean_type/0 29 | , bytes_type/0 30 | , double_type/0 31 | , float_type/0 32 | , int_type/0 33 | , long_type/0 34 | , string_type/0 35 | , null_type/0 36 | ]). 37 | 38 | -export([ boolean/1 39 | , bytes/1 40 | , cast/2 41 | , double/1 42 | , float/1 43 | , get_value/1 44 | , int/1 45 | , long/1 46 | , null/0 47 | , string/1 48 | ]). 49 | 50 | -include("avro_internal.hrl"). 51 | 52 | %%%=================================================================== 53 | %%% API: Types 54 | %%%=================================================================== 55 | 56 | -spec type(name_raw(), [custom_prop()]) -> primitive_type(). 57 | type(Name0, CustomProps) -> 58 | Name = ?NAME(Name0), 59 | case ?IS_AVRO_PRIMITIVE_NAME(Name) of 60 | true -> 61 | Custom = avro_util:canonicalize_custom_props(CustomProps), 62 | #avro_primitive_type{ name = Name 63 | , custom = Custom 64 | }; 65 | false -> 66 | erlang:error({unknown_name, Name}) 67 | end. 68 | 69 | null_type() -> type(?AVRO_NULL, []). 70 | 71 | boolean_type() -> type(?AVRO_BOOLEAN, []). 72 | 73 | int_type() -> type(?AVRO_INT, []). 74 | 75 | long_type() -> type(?AVRO_LONG, []). 76 | 77 | float_type() -> type(?AVRO_FLOAT, []). 78 | 79 | double_type() -> type(?AVRO_DOUBLE, []). 80 | 81 | bytes_type() -> type(?AVRO_BYTES, []). 82 | 83 | string_type() -> type(?AVRO_STRING, []). 84 | 85 | %%%=================================================================== 86 | %%% API: Casting 87 | %%%=================================================================== 88 | 89 | -spec cast(avro_type(), term()) -> {ok, avro_value()} | {error, term()}. 90 | cast(Type, null) when ?IS_NULL_TYPE(Type) -> 91 | {ok, ?AVRO_VALUE(Type, null)}; 92 | % For Elixir compatibility 93 | cast(Type, nil) when ?IS_NULL_TYPE(Type) -> 94 | {ok, ?AVRO_VALUE(Type, null)}; 95 | cast(Type, Value) when ?IS_BOOLEAN_TYPE(Type) andalso 96 | is_boolean(Value) -> 97 | {ok, ?AVRO_VALUE(Type, Value)}; 98 | cast(Type, Value) when ?IS_INT_TYPE(Type) andalso 99 | Value >= ?INT4_MIN andalso 100 | Value =< ?INT4_MAX -> 101 | {ok, ?AVRO_VALUE(Type, Value)}; 102 | cast(Type, Value) when ?IS_LONG_TYPE(Type) andalso 103 | Value >= ?INT8_MIN andalso 104 | Value =< ?INT8_MAX -> 105 | {ok, ?AVRO_VALUE(Type, Value)}; 106 | cast(Type, Value) when ?IS_FLOAT_TYPE(Type) andalso 107 | is_integer(Value) -> 108 | {ok, ?AVRO_VALUE(Type, erlang:float(Value))}; 109 | cast(Type, Value) when ?IS_FLOAT_TYPE(Type) andalso 110 | is_float(Value) -> 111 | {ok, ?AVRO_VALUE(Type, Value)}; 112 | cast(Type, Value) when ?IS_DOUBLE_TYPE(Type) andalso 113 | is_integer(Value) -> 114 | {ok, ?AVRO_VALUE(Type, erlang:float(Value))}; 115 | cast(Type, Value) when ?IS_DOUBLE_TYPE(Type) andalso 116 | is_float(Value) -> 117 | {ok, ?AVRO_VALUE(Type, Value)}; 118 | cast(Type, Value) when ?IS_BYTES_TYPE(Type) andalso 119 | is_binary(Value) -> 120 | {ok, ?AVRO_VALUE(Type, Value)}; 121 | cast(Type, Value) when ?IS_STRING_TYPE(Type) andalso 122 | (is_list(Value) orelse is_binary(Value)) -> 123 | {ok, ?AVRO_VALUE(Type, erlang:iolist_to_binary(Value))}; 124 | % Encode atom values for string types when not null, nil, or booleans 125 | cast(Type, Value) when ?IS_STRING_TYPE(Type) andalso 126 | (is_atom(Value) andalso 127 | (Value =/= null andalso Value =/= nil andalso 128 | Value =/= true andalso Value =/= false)) -> 129 | {ok, ?AVRO_VALUE(Type, erlang:atom_to_binary(Value, utf8))}; 130 | cast(Type, Value) -> {error, {type_mismatch, Type, Value}}. 131 | 132 | %%%=================================================================== 133 | %%% API: Helpers 134 | %%%=================================================================== 135 | 136 | null() -> from_cast(cast(null_type(), null)). 137 | 138 | boolean(Value) -> from_cast(cast(boolean_type(), Value)). 139 | 140 | int(Value) -> from_cast(cast(int_type(), Value)). 141 | 142 | long(Value) -> from_cast(cast(long_type(), Value)). 143 | 144 | float(Value) -> from_cast(cast(float_type(), Value)). 145 | 146 | double(Value) -> from_cast(cast(double_type(), Value)). 147 | 148 | bytes(Value) -> from_cast(cast(bytes_type(), Value)). 149 | 150 | string(Value) -> from_cast(cast(string_type(), Value)). 151 | 152 | %% Get underlying erlang value from an Avro primitive value 153 | -spec get_value(avro_value()) -> canonicalized_value(). 154 | get_value(Value) when ?IS_PRIMITIVE_TYPE(?AVRO_VALUE_TYPE(Value)) -> 155 | ?AVRO_VALUE_DATA(Value). 156 | 157 | %%%=================================================================== 158 | %%% Internal functions 159 | %%%=================================================================== 160 | 161 | %% @private 162 | from_cast({ok, Value}) -> Value; 163 | from_cast({error, Err}) -> erlang:error(Err). 164 | 165 | %%%_* Emacs ============================================================ 166 | %%% Local Variables: 167 | %%% allout-layout: t 168 | %%% erlang-indent-level: 2 169 | %%% End: 170 | -------------------------------------------------------------------------------- /src/avro_enum.erl: -------------------------------------------------------------------------------- 1 | %%%----------------------------------------------------------------------------- 2 | %%% Copyright (c) 2013-2017 Klarna AB 3 | %%% 4 | %%% This file is provided to you under the Apache License, 5 | %%% Version 2.0 (the "License"); you may not use this file 6 | %%% except in compliance with the License. You may obtain 7 | %%% a copy of the License at 8 | %%% 9 | %%% http://www.apache.org/licenses/LICENSE-2.0 10 | %%% 11 | %%% Unless required by applicable law or agreed to in writing, 12 | %%% software distributed under the License is distributed on an 13 | %%% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | %%% KIND, either express or implied. See the License for the 15 | %%% specific language governing permissions and limitations 16 | %%% under the License. 17 | %%% 18 | %%% @author Ilya Staheev 19 | %%% @doc Avro enum type implementation. 20 | %%% 21 | %%% All symbols in an enum must be unique; duplicates are prohibited. 22 | %%% 23 | %%% Internal data for an enum is the symbol string itself. 24 | %%% @end 25 | %%%----------------------------------------------------------------------------- 26 | -module(avro_enum). 27 | 28 | %% API 29 | -export([ cast/2 30 | , get_index/1 31 | , get_index/2 32 | , get_symbol_from_index/2 33 | , get_value/1 34 | , new/2 35 | , resolve_fullname/2 36 | , type/2 37 | , type/3 38 | ]). 39 | 40 | -include("avro_internal.hrl"). 41 | 42 | -type symbol() :: avro:enum_symbol(). 43 | -type symbol_raw() :: avro:enum_symbol_raw(). 44 | -type index() :: avro:enum_index(). 45 | 46 | -define(SYMBOL(X), avro_util:canonicalize_name(X)). 47 | -define(IS_SYMBOL_RAW(T), (is_atom(T) orelse is_list(T) orelse is_binary(T))). 48 | 49 | %%%============================================================================= 50 | %%% API 51 | %%%============================================================================= 52 | 53 | %% @doc Declare a enum type with default properties. 54 | -spec type(name_raw(), [symbol_raw()]) -> enum_type() | no_return(). 55 | type(Name, Symbols) -> 56 | type(Name, Symbols, []). 57 | 58 | %% @doc Declare a enum type. 59 | -spec type(name_raw(), [symbol_raw()], avro:type_props()) -> 60 | enum_type() | no_return(). 61 | type(Name0, Symbols0, Opts) -> 62 | Ns0 = avro_util:get_opt(namespace, Opts, ?NS_GLOBAL), 63 | {Name, Ns} = avro:split_type_name(Name0, Ns0), 64 | Symbols = lists:map(fun(S) -> ?SYMBOL(S) end, Symbols0), 65 | ok = check_symbols(Symbols), 66 | Doc = avro_util:get_opt(doc, Opts, ?NO_DOC), 67 | Aliases0 = avro_util:get_opt(aliases, Opts, []), 68 | ok = avro_util:verify_aliases(Aliases0), 69 | Aliases = avro_util:canonicalize_aliases(Aliases0, Ns), 70 | Type = #avro_enum_type 71 | { name = Name 72 | , namespace = Ns 73 | , aliases = Aliases 74 | , doc = ?DOC(Doc) 75 | , symbols = Symbols 76 | , fullname = avro:build_type_fullname(Name, Ns) 77 | , custom = avro_util:canonicalize_custom_props(Opts) 78 | }, 79 | ok = avro_util:verify_type(Type), 80 | Type. 81 | 82 | %% @doc Resolve fullname by newly discovered enclosing namespace. 83 | -spec resolve_fullname(enum_type(), namespace()) -> enum_type(). 84 | resolve_fullname(#avro_enum_type{ fullname = Fullname 85 | , aliases = Aliases 86 | } = T, Ns) -> 87 | NewFullname = avro:build_type_fullname(Fullname, Ns), 88 | NewAliases = avro_util:canonicalize_aliases(Aliases, Ns), 89 | T#avro_enum_type{ fullname = NewFullname 90 | , aliases = NewAliases 91 | }. 92 | 93 | %% @doc Create a enum wrapped (boxed) value. 94 | -spec new(enum_type(), avro_value() | symbol_raw()) -> 95 | avro_value() | no_return(). 96 | new(Type, Value) when ?IS_ENUM_TYPE(Type) -> 97 | case cast(Type, Value) of 98 | {ok, Rec} -> Rec; 99 | {error, Err} -> erlang:error(Err) 100 | end. 101 | 102 | %% @doc Get the enum symbol. 103 | -spec get_value(avro_value()) -> symbol(). 104 | get_value(Value) when ?IS_ENUM_VALUE(Value) -> 105 | ?AVRO_VALUE_DATA(Value). 106 | 107 | %% @doc Get symbol index from boxed value. 108 | -spec get_index(avro_value()) -> index(). 109 | get_index(Value) when ?IS_ENUM_VALUE(Value) -> 110 | Type = ?AVRO_VALUE_TYPE(Value), 111 | Symbol = ?AVRO_VALUE_DATA(Value), 112 | get_index(Type, Symbol). 113 | 114 | %% @doc Get symbol index from type definition. 115 | -spec get_index(enum_type(), symbol_raw()) -> index(). 116 | get_index(Type, Symbol) -> 117 | get_index(?SYMBOL(Symbol), Type#avro_enum_type.symbols, 0). 118 | 119 | %% @doc Find symbol from index. 120 | -spec get_symbol_from_index(enum_type(), index()) -> symbol(). 121 | get_symbol_from_index(T, Index) when ?IS_ENUM_TYPE(T) -> 122 | true = (Index < length(T#avro_enum_type.symbols)), 123 | lists:nth(Index + 1, T#avro_enum_type.symbols). 124 | 125 | %% @doc Enums can be casted from other enums or strings. 126 | -spec cast(avro_type(), symbol_raw()) -> {ok, avro_value()} | {error, term()}. 127 | cast(Type, Value) when ?IS_ENUM_TYPE(Type) -> 128 | do_cast(Type, Value). 129 | 130 | %%%=================================================================== 131 | %%% Internal functions 132 | %%%=================================================================== 133 | 134 | %% @private 135 | -spec check_symbols([symbol()]) -> ok | no_return(). 136 | check_symbols(Symbols) -> 137 | SymLen = length(Symbols), 138 | ?ERROR_IF(SymLen =:= 0, empty_symbols), 139 | ?ERROR_IF(length(lists:usort(Symbols)) =/= SymLen, non_unique_symbols), 140 | avro_util:verify_names(Symbols). 141 | 142 | %% @private 143 | -spec do_cast(enum_type(), symbol_raw()) -> 144 | {ok, avro_value()} | {error, any()}. 145 | do_cast(Type, Value0) when ?IS_SYMBOL_RAW(Value0) -> 146 | Value = ?SYMBOL(Value0), 147 | case is_valid_symbol(Type, Value) of 148 | true -> {ok, ?AVRO_VALUE(Type, Value)}; 149 | false -> {error, {cast_error, Type, Value0}} 150 | end. 151 | 152 | %% @private 153 | -spec is_valid_symbol(enum_type(), symbol()) -> boolean(). 154 | is_valid_symbol(Type, Symbol) -> 155 | lists:member(Symbol, Type#avro_enum_type.symbols). 156 | 157 | %% @private 158 | -spec get_index(symbol(), [symbol()], index()) -> index(). 159 | get_index(Symbol, [Symbol | _Symbols], Index) -> 160 | Index; 161 | get_index(Symbol, [_ | Symbols], Index) -> 162 | get_index(Symbol, Symbols, Index + 1). 163 | 164 | %%%_* Emacs ============================================================ 165 | %%% Local Variables: 166 | %%% allout-layout: t 167 | %%% erlang-indent-level: 2 168 | %%% End: 169 | -------------------------------------------------------------------------------- /test/avro_union_tests.erl: -------------------------------------------------------------------------------- 1 | %% coding: latin-1 2 | %%%------------------------------------------------------------------- 3 | %%% Copyright (c) 2013-2016 Klarna AB 4 | %%% 5 | %%% This file is provided to you under the Apache License, 6 | %%% Version 2.0 (the "License"); you may not use this file 7 | %%% except in compliance with the License. You may obtain 8 | %%% a copy of the License at 9 | %%% 10 | %%% http://www.apache.org/licenses/LICENSE-2.0 11 | %%% 12 | %%% Unless required by applicable law or agreed to in writing, 13 | %%% software distributed under the License is distributed on an 14 | %%% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | %%% KIND, either express or implied. See the License for the 16 | %%% specific language governing permissions and limitations 17 | %%% under the License. 18 | %%% 19 | %%%------------------------------------------------------------------- 20 | -module(avro_union_tests). 21 | 22 | -include_lib("eunit/include/eunit.hrl"). 23 | -include("avro_internal.hrl"). 24 | 25 | get_record(N) -> 26 | Name = "R" ++ integer_to_list(N), 27 | avro_record:type(Name, 28 | [avro_record:define_field("F", int)], 29 | [{namespace, "com.klarna.test"}]). 30 | make_name(N) -> 31 | "com.klarna.test.R" ++ integer_to_list(N). 32 | 33 | tiny_union() -> 34 | avro_union:type([get_record(N) || N <- lists:seq(1,5)]). 35 | 36 | big_union() -> 37 | avro_union:type([get_record(N) || N <- lists:seq(1,200)]). 38 | 39 | duplicated_member_test() -> 40 | ?assertError({<<"duplicated union member">>, <<"int">>}, 41 | avro_union:type([int, null, int])). 42 | 43 | union_can_not_be_a_member_of_union_test() -> 44 | ?assertError(<<"union should not have union as member">>, 45 | avro_union:type([avro_union:type([int])])). 46 | 47 | new_direct_test() -> 48 | Type = avro_union:type([int, string]), 49 | NewVersion = avro_union:new(Type, "Foo"), 50 | DirectVersion = avro_union:new_direct(Type, avro_primitive:string("Foo")), 51 | ?assertEqual(NewVersion, DirectVersion). 52 | 53 | lookup_type_from_tiny_union_test() -> 54 | Type = tiny_union(), 55 | ExpectedRec1 = get_record(1), 56 | ?assertEqual({ok, ExpectedRec1}, 57 | avro_union:lookup_type("com.klarna.test.R1", Type)), 58 | ?assertEqual({ok, ExpectedRec1}, 59 | avro_union:lookup_type(0, Type)), 60 | ExpectedRec2 = get_record(2), 61 | ?assertEqual({ok, ExpectedRec2}, 62 | avro_union:lookup_type("com.klarna.test.R2", Type)), 63 | ?assertEqual({ok, ExpectedRec2}, 64 | avro_union:lookup_type(1, Type)). 65 | 66 | 67 | lookup_type_from_big_union_test() -> 68 | Type = big_union(), 69 | ExpectedRec = get_record(100), 70 | ?assertEqual({ok, ExpectedRec}, 71 | avro_union:lookup_type("com.klarna.test.R100", Type)), 72 | ?assertEqual({ok, ExpectedRec}, 73 | avro_union:lookup_type(99, Type)). 74 | 75 | to_term_test() -> 76 | Type = avro_union:type([null, int]), 77 | Value1 = avro_union:new(Type, null), 78 | Value2 = avro_union:new(Type, 1), 79 | ?assertEqual(null, avro:to_term(Value1)), 80 | ?assertEqual(1, avro:to_term(Value2)). 81 | 82 | cast_test() -> 83 | Type = avro_union:type([null, long]), 84 | ?assertMatch({ok, #avro_value{}}, avro_union:cast(Type, {long, 1})), 85 | ?assertMatch({ok, #avro_value{}}, avro_union:cast(Type, null)), 86 | ?assertEqual({error, type_mismatch}, avro_union:cast(Type, "str")), 87 | ?assertException(error, type_mismatch, avro_union:new(Type, "str")). 88 | 89 | unknown_member_cast_test() -> 90 | Type = avro_union:type([null, long]), 91 | ?assertEqual({error, {unknown_member, Type, 2}}, 92 | avro_union:cast(Type, {2, "s"})). 93 | 94 | unknown_member_encode_test() -> 95 | Type = avro_union:type([null, long]), 96 | EncodeFun = fun(_MemberType, InputValue, MemberIdInteger) -> 97 | {encoded, MemberIdInteger, InputValue} 98 | end, 99 | ?assertEqual({encoded, 0, null}, 100 | avro_union:encode(Type, {0, null}, EncodeFun)), 101 | ?assertEqual({encoded, 0, null}, 102 | avro_union:encode(Type, {null, null}, EncodeFun)), 103 | ?assertEqual({encoded, 1, 42}, 104 | avro_union:encode(Type, {long, 42}, EncodeFun)), 105 | ?assertError({unknown_member, Type, 2}, 106 | avro_union:encode(Type, {2, "s"}, EncodeFun)), 107 | ?assertError({unknown_member, Type, "int"}, 108 | avro_union:encode(Type, {"int", 2}, EncodeFun)). 109 | 110 | loop_over_encode_test() -> 111 | Type = avro_union:type([null, long]), 112 | EncodeFun = fun(_MemberType, InputValue, MemberIdInteger) -> 113 | case MemberIdInteger of 114 | 0 -> null = InputValue; 115 | 1 -> 42 = InputValue 116 | end, 117 | {encoded, MemberIdInteger, InputValue} 118 | end, 119 | ?assertEqual({encoded, 0, null}, 120 | avro_union:encode(Type, null, EncodeFun)), 121 | ?assertEqual({encoded, 1, 42}, 122 | avro_union:encode(Type, 42, EncodeFun)), 123 | ?assertException(error, {failed_to_encode_union, Type, "s"}, 124 | avro_union:encode(Type, "s", EncodeFun)). 125 | 126 | big_union_of_names_test() -> 127 | Type = avro_union:type([make_name(N) || N <- lists:seq(1,200)]), 128 | EncodeFun = fun(_MemberType, InputValue, MemberIdInteger) -> 129 | {encoded, MemberIdInteger, InputValue} 130 | end, 131 | ?assertEqual({encoded, 0, value}, 132 | avro_union:encode(Type, {<<"com.klarna.test.R1">>, value}, 133 | EncodeFun)), 134 | ?assertEqual({encoded, 199, value}, 135 | avro_union:encode(Type, {<<"com.klarna.test.R200">>, value}, 136 | EncodeFun)), 137 | ?assertException(error, {unknown_member, Type, <<"com.klarna.test.x">>}, 138 | avro_union:encode(Type, {<<"com.klarna.test.x">>, value}, 139 | EncodeFun)). 140 | 141 | union_of_string_and_int_array_test() -> 142 | %% Make sure tagging union members will work for union of 143 | %% string and int/long arrays. 144 | Union = avro_union:type([ avro_primitive:string_type() 145 | , avro_array:type(avro_primitive:int_type()) 146 | ]), 147 | StringEncodeFun = fun(_StringType, _StringValue, 0) -> string end, 148 | ArrayEncodeFun = fun(_ArrayType, _ArraryValue, 1) -> array end, 149 | ?assertEqual(string, 150 | avro_union:encode(Union, {string, "abc"}, StringEncodeFun)), 151 | ?assertEqual(array, 152 | avro_union:encode(Union, {array, [$a, $b, $c]}, ArrayEncodeFun)), 153 | ?assertEqual(string, 154 | avro_union:encode(Union, <<"abc">>, StringEncodeFun)), 155 | ok. 156 | 157 | no_member_test() -> 158 | Union = avro_union:type([]), 159 | F = fun(_StringType, _StringValue, 0) -> error(unexpected) end, 160 | ?assertError({unknown_member, _, string}, 161 | avro_union:encode(Union, {string, "abc"}, F)). 162 | 163 | %%%_* Emacs ==================================================================== 164 | %%% Local Variables: 165 | %%% allout-layout: t 166 | %%% erlang-indent-level: 2 167 | %%% End: 168 | -------------------------------------------------------------------------------- /test/avro_ocf_tests.erl: -------------------------------------------------------------------------------- 1 | %% coding: latin-1 2 | %%%------------------------------------------------------------------- 3 | %%% Copyright (c) 2013-2018 Klarna AB 4 | %%% 5 | %%% This file is provided to you under the Apache License, 6 | %%% Version 2.0 (the "License"); you may not use this file 7 | %%% except in compliance with the License. You may obtain 8 | %%% a copy of the License at 9 | %%% 10 | %%% http://www.apache.org/licenses/LICENSE-2.0 11 | %%% 12 | %%% Unless required by applicable law or agreed to in writing, 13 | %%% software distributed under the License is distributed on an 14 | %%% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | %%% KIND, either express or implied. See the License for the 16 | %%% specific language governing permissions and limitations 17 | %%% under the License. 18 | %%% 19 | %%%------------------------------------------------------------------- 20 | -module(avro_ocf_tests). 21 | 22 | -include("avro_internal.hrl"). 23 | -include_lib("eunit/include/eunit.hrl"). 24 | 25 | -record(header, { magic 26 | , meta 27 | , sync 28 | }). 29 | 30 | interop_test() -> 31 | InteropOcfFile = test_data("interop.ocf"), 32 | {Header, Schema, Objects} = avro_ocf:decode_file(InteropOcfFile), 33 | Lkup = avro:make_lkup_fun(Schema), 34 | MyFile = test_data("interop.ocf.test"), 35 | {ok, Fd} = file:open(MyFile, [write]), 36 | try 37 | ok = avro_ocf:write_header(Fd, Header), 38 | ok = avro_ocf:append_file(Fd, Header, Lkup, Schema, Objects) 39 | after 40 | file:close(Fd) 41 | end, 42 | {Header1, Schema1, Objects1} = avro_ocf:decode_file(MyFile), 43 | ?assertEqual(Header#header{meta = []}, Header1#header{meta = []}), 44 | ?assertEqual(lists:keysort(1, Header#header.meta), 45 | lists:keysort(1, Header1#header.meta)), 46 | ?assertEqual(Schema, Schema1), 47 | ?assertEqual(Objects, Objects1). 48 | 49 | decode_deflate_file_test() -> 50 | InteropOcfFile = test_data("interop_deflate.ocf"), 51 | {Header, _Schema, Objects} = avro_ocf:decode_file(InteropOcfFile), 52 | ?assertEqual(<<"deflate">>, 53 | proplists:get_value(<<"avro.codec">>, Header#header.meta)), 54 | ?assertEqual(<<"hey">>, 55 | proplists:get_value(<<"stringField">>, hd(Objects))). 56 | 57 | decode_no_codec_file_test() -> 58 | InteropOcfFile = test_data("interop_no_codec.ocf"), 59 | {Header, _Schema, Objects} = avro_ocf:decode_file(InteropOcfFile), 60 | ?assertEqual(undefined, 61 | proplists:get_value(<<"avro.codec">>, Header#header.meta)), 62 | ?assertEqual(<<"hey">>, 63 | proplists:get_value(<<"stringField">>, hd(Objects))). 64 | 65 | decode_snappy_file_test() -> 66 | InteropOcfFile = test_data("interop_snappy.ocf"), 67 | {Header, _Schema, Objects} = avro_ocf:decode_file(InteropOcfFile), 68 | ?assertEqual(<<"snappy">>, 69 | proplists:get_value(<<"avro.codec">>, Header#header.meta)), 70 | ?assertEqual(<<"hey">>, 71 | proplists:get_value(<<"stringField">>, hd(Objects))). 72 | 73 | decode_snappy_file_invalid_checksum_test() -> 74 | InteropOcfFile = test_data("interop_snappy_invalid_checksum.ocf"), 75 | ?assertException(error, {invalid_checksum, _}, 76 | avro_ocf:decode_file(InteropOcfFile)). 77 | 78 | write_file_test() -> 79 | OcfFile = test_data("my.ocf.test"), 80 | Store = undefined, %% should not require lookup 81 | Fields = [ avro_record:define_field("f1", int, []) 82 | , avro_record:define_field("f2", string, []) 83 | ], 84 | Type = avro_record:type("rec", Fields, [{namespace, "my.ocf.test"}]), 85 | Obj = [{"f1", 1}, {"f2", "foo"}], 86 | ok = avro_ocf:write_file(OcfFile, Store, Type, [Obj]), 87 | {_Header, Type, Objs} = avro_ocf:decode_file(OcfFile), 88 | ?assertEqual([[{<<"f1">>, 1}, {<<"f2">>, <<"foo">>}]], Objs). 89 | 90 | write_deflate_file_test() -> 91 | OcfFile = test_data("deflate.ocf.test"), 92 | Store = undefined, %% should not require lookup 93 | Fields = [ avro_record:define_field("f1", int, []) 94 | , avro_record:define_field("f2", string, []) 95 | ], 96 | Type = avro_record:type("rec", Fields, [{namespace, "deflate.ocf.test"}]), 97 | Obj = [{"f1", 1}, {"f2", "foo"}], 98 | Meta = [{<<"avro.codec">>, <<"deflate">>}], 99 | ok = avro_ocf:write_file(OcfFile, Store, Type, [Obj], Meta), 100 | {_Header, Type, Objs} = avro_ocf:decode_file(OcfFile), 101 | ?assertEqual([[{<<"f1">>, 1}, {<<"f2">>, <<"foo">>}]], Objs). 102 | 103 | write_snappy_file_test() -> 104 | OcfFile = test_data("snappy.ocf.test"), 105 | Store = undefined, %% should not require lookup 106 | Fields = [ avro_record:define_field("f1", int, []) 107 | , avro_record:define_field("f2", string, []) 108 | ], 109 | Type = avro_record:type("rec", Fields, [{namespace, "snappy.ocf.test"}]), 110 | Obj = [{"f1", 1}, {"f2", "foo"}], 111 | Meta = [{<<"avro.codec">>, <<"snappy">>}], 112 | ok = avro_ocf:write_file(OcfFile, Store, Type, [Obj], Meta), 113 | {_Header, Type, Objs} = avro_ocf:decode_file(OcfFile), 114 | ?assertEqual([[{<<"f1">>, 1}, {<<"f2">>, <<"foo">>}]], Objs). 115 | 116 | root_level_union_test() -> 117 | OcfFile = test_data("union.ocf.test"), 118 | Store = undefined, %% should not require lookup 119 | Fields = [ avro_record:define_field("f1", int, []) 120 | , avro_record:define_field("f2", string, []) 121 | ], 122 | Type1 = avro_record:type("rec", Fields, [{namespace, "my.ocf.test"}]), 123 | Type2 = avro_primitive:type(int, []), 124 | Type = avro_union:type([Type1, Type2]), 125 | Obj1 = [{"f1", 1}, {"f2", "foo"}], 126 | Obj2 = 42, 127 | ok = avro_ocf:write_file(OcfFile, Store, Type, [Obj1, Obj2], 128 | [{"my-meta", <<0>>}]), 129 | {_Header, TypeDecoded, Objs} = avro_ocf:decode_file(OcfFile), 130 | ?assertEqual(Type, TypeDecoded), 131 | ?assertEqual([[{<<"f1">>, 1}, {<<"f2">>, <<"foo">>}], 42], Objs). 132 | 133 | meta_test() -> 134 | ?assertError({reserved_meta_key, "avro.x"}, 135 | avro_ocf:make_header(ignore, [{"avro.x", ignore}])), 136 | ?assertError({bad_meta_value, atom}, 137 | avro_ocf:make_header(ignore, [{"x", atom}])), 138 | ?assertError({bad_codec, <<"lzw">>}, 139 | avro_ocf:make_header(ignore, [{"avro.codec", <<"lzw">>}])), 140 | _ = avro_ocf:make_header(<<"long">>), 141 | _ = avro_ocf:make_header(<<"int">>, [{"a", <<"b">>}]), 142 | _ = avro_ocf:make_header(<<"int">>, [{<<"avro.codec">>, <<"null">>}]), 143 | ok. 144 | 145 | make_ocf_test() -> 146 | L = lists:seq(1, 1000), 147 | Type = avro_primitive:type(long, []), 148 | Header = avro_ocf:make_header(Type), 149 | Encoder = avro:make_simple_encoder(Type, []), 150 | Objects = [Encoder(I) || I <- L], 151 | Bin = iolist_to_binary(avro_ocf:make_ocf(Header, Objects)), 152 | {_, _, DecodedL} = avro_ocf:decode_binary(Bin), 153 | ?assertEqual(L, DecodedL). 154 | 155 | decoder_hook_test() -> 156 | Fields = [ avro_record:define_field("f1", int, []) 157 | , avro_record:define_field("f2", null, []) 158 | ], 159 | Type = avro_record:type("rec", Fields, [{namespace, "my.ocf.test"}]), 160 | Header = avro_ocf:make_header(Type), 161 | Encoder = avro:make_simple_encoder(Type, []), 162 | Object = [{"f1", 1}, {"f2", null}], 163 | Objects = [Encoder(Object)], 164 | Bin = iolist_to_binary(avro_ocf:make_ocf(Header, Objects)), 165 | Hook = fun(Typ, _, Data, DecodeFun) -> 166 | case avro:get_type_name(Typ) of 167 | <<"null">> -> {<<"modifiedNull">>, Data}; 168 | _ -> DecodeFun(Data) 169 | end 170 | end, 171 | Options = avro:make_decoder_options([{hook, Hook}]), 172 | {_, _, Objs} = avro_ocf:decode_binary(Bin, Options), 173 | ?assertEqual([[{<<"f1">>, 1}, {<<"f2">>, <<"modifiedNull">>}]], Objs). 174 | 175 | test_data(FileName) -> 176 | filename:join([code:lib_dir(erlavro), "test", "data", FileName]). 177 | 178 | %%%_* Emacs ==================================================================== 179 | %%% Local Variables: 180 | %%% allout-layout: t 181 | %%% erlang-indent-level: 2 182 | %%% End: 183 | -------------------------------------------------------------------------------- /test/avro_record_tests.erl: -------------------------------------------------------------------------------- 1 | %% coding: latin-1 2 | %%%------------------------------------------------------------------- 3 | %%% Copyright (c) 2013-2018 Klarna AB 4 | %%% 5 | %%% This file is provided to you under the Apache License, 6 | %%% Version 2.0 (the "License"); you may not use this file 7 | %%% except in compliance with the License. You may obtain 8 | %%% a copy of the License at 9 | %%% 10 | %%% http://www.apache.org/licenses/LICENSE-2.0 11 | %%% 12 | %%% Unless required by applicable law or agreed to in writing, 13 | %%% software distributed under the License is distributed on an 14 | %%% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | %%% KIND, either express or implied. See the License for the 16 | %%% specific language governing permissions and limitations 17 | %%% under the License. 18 | %%% 19 | %%%------------------------------------------------------------------- 20 | -module(avro_record_tests). 21 | 22 | -import(avro_record, [ new/2 23 | , type/2 24 | , type/3 25 | , define_field/2 26 | , define_field/3 27 | , get_field_def/2 28 | , to_list/1 29 | , set_value/3 30 | , set_values/2 31 | , update/3 32 | , get_field_type/2 33 | , get_value/2 34 | , cast_value/2 35 | , cast/2 36 | ]). 37 | 38 | -include_lib("eunit/include/eunit.hrl"). 39 | -include("avro_internal.hrl"). 40 | 41 | type_test() -> 42 | Field = define_field("invno", long), 43 | Schema = type("Test", [Field], [{namespace, "name.space"}]), 44 | ?assertEqual(<<"name.space.Test">>, avro:get_type_fullname(Schema)), 45 | ?assertEqual({ok, Field}, get_field_def("invno", Schema)). 46 | 47 | get_field_def_test() -> 48 | Field1 = define_field(f1, long), 49 | Field2 = define_field("f2", long, [{aliases, [a, b]}]), 50 | Field3 = define_field("f3", long), 51 | Record = type("Test", [Field1, Field2, Field3]), 52 | ?assertEqual(false, get_field_def("f4", Record)), 53 | ?assertEqual({ok, Field2}, get_field_def("f2", Record)), 54 | ?assertEqual({ok, Field3}, get_field_def("f3", Record)), 55 | ?assertEqual({ok, Field2}, get_field_def(b, Record)). 56 | 57 | get_field_type_test() -> 58 | Field = define_field("invno", long), 59 | Schema = type("Test", [Field], [{namespace, "name.space"}]), 60 | ?assertEqual(avro_primitive:long_type(), 61 | get_field_type("invno", Schema)). 62 | 63 | default_fields_test() -> 64 | Field = define_field("invno", long, 65 | [{default, avro_primitive:long(10)}]), 66 | Schema = type("Test", [Field], [{namespace, "name.space"}]), 67 | Rec = new(Schema, []), 68 | ?assertEqual(avro_primitive:long(10), get_value("invno", Rec)), 69 | ?assertException(error, {unknown_field, <<"no_such_field">>}, 70 | get_value("no_such_field", Rec)). 71 | 72 | get_set_test() -> 73 | Schema = type("Test", [define_field("invno", long), 74 | define_field("uname", string) 75 | ], 76 | [{namespace, "name.space"}]), 77 | Rec0 = new(Schema, [{"invno", 0}, {"uname", "some-name"}]), 78 | Rec1 = set_value("invno", avro_primitive:long(1), Rec0), 79 | ?assertEqual(avro_primitive:long(1), get_value("invno", Rec1)), 80 | Rec2 = set_values([{"invno", 2}, {"uname", "new-name"}], Rec1), 81 | ?assertEqual(avro_primitive:long(2), get_value("invno", Rec2)), 82 | ?assertEqual(avro_primitive:string("new-name"), get_value("uname", Rec2)), 83 | ?assertException(error, {<<"invno">>, _}, set_value("invno", "string", Rec2)), 84 | ?assertException(error, {unknown_field, <<"x">>}, set_value("x", "y", Rec2)). 85 | 86 | update_test() -> 87 | Schema = type("Test", [define_field("invno", long)], 88 | [{namespace, "name.space"}]), 89 | Rec0 = new(Schema, [{"invno", 10}]), 90 | Rec1 = update("invno", 91 | fun(X) -> 92 | avro_primitive:long(avro_primitive:get_value(X)*2) 93 | end, 94 | Rec0), 95 | ?assertEqual(avro_primitive:long(20), get_value("invno", Rec1)). 96 | 97 | to_list_test() -> 98 | Schema = type("Test", [ define_field("invno", long) 99 | , define_field("name", string) 100 | ], 101 | [{namespace, "name.space"}]), 102 | Rec = new(Schema, [ {"invno", avro_primitive:long(1)} 103 | , {"name", avro_primitive:string("some name")} 104 | ]), 105 | L = to_list(Rec), 106 | ?assertEqual(2, length(L)), 107 | ?assertEqual({<<"invno">>, avro_primitive:long(1)}, 108 | lists:keyfind(<<"invno">>, 1, L)), 109 | ?assertEqual({<<"name">>, avro_primitive:string("some name")}, 110 | lists:keyfind(<<"name">>, 1, L)). 111 | 112 | to_term_test() -> 113 | Schema = type("Test", 114 | [ define_field(invno, long) 115 | , define_field("name", string) 116 | ], 117 | [{namespace, "name.space"}]), 118 | Rec = new(Schema, [ {"invno", avro_primitive:long(1)} 119 | , {"name", avro_primitive:string("some name")} 120 | ]), 121 | Fields = avro:to_term(Rec), 122 | ?assertEqual(2, length(Fields)), 123 | ?assertEqual({<<"invno">>, 1}, lists:keyfind(<<"invno">>, 1, Fields)), 124 | ?assertEqual({<<"name">>, <<"some name">>}, 125 | lists:keyfind(<<"name">>, 1, Fields)). 126 | 127 | cast_test() -> 128 | RecordType = type("Record", 129 | [ define_field("a", string) 130 | , define_field("b", int) 131 | ], 132 | [ {namespace, "name.space"} ]), 133 | {ok, Record} = cast(RecordType, [{"b", 1}, {"a", "foo"}]), 134 | ?assertEqual(avro_primitive:string("foo"), get_value("a", Record)), 135 | ?assertEqual(avro_primitive:int(1), get_value("b", Record)), 136 | 137 | {ok, Record1} = cast(RecordType, #{"b" => 1, "a" => "foo"}), 138 | ?assertEqual(avro_primitive:string("foo"), get_value("a", Record1)), 139 | ?assertEqual(avro_primitive:int(1), get_value("b", Record1)). 140 | 141 | cast_error_test() -> 142 | RecordType = type("Record", 143 | [define_field("a", long)], 144 | [ {namespace, "name.space"} ]), 145 | ?assertMatch({error, {<<"a">>, _}}, 146 | cast(RecordType, [{"a", "foo"}])). 147 | 148 | cast_by_aliases_test() -> 149 | RecordType = type("Record", 150 | [ define_field("a", string, [{aliases, ["al1", "al2"]}]) 151 | , define_field("b", int, [{aliases, ["al3", "al4"]}]) 152 | ], 153 | [ {namespace, "name.space"} 154 | ]), 155 | {ok, Record} = cast(RecordType, [{"al4", 1}, {"al1", "foo"}]), 156 | ?assertEqual(avro_primitive:string("foo"), get_value("a", Record)), 157 | ?assertEqual(avro_primitive:int(1), get_value("b", Record)). 158 | 159 | encode_test() -> 160 | EncodeFun = fun(FieldName, _FieldType, Input) -> 161 | {FieldName, {encoded, Input}} 162 | end, 163 | Type = type("Test", 164 | [ define_field("field1", long) 165 | , define_field("field2", string)], 166 | [ {namespace, "name.space"} ]), 167 | ?assertError(?ENC_ERR(required_field_missed, 168 | [{record, <<"name.space.Test">>}, 169 | {field, <<"field2">>}]), 170 | avro_record:encode(Type, [{<<"field1">>, 1}], EncodeFun)), 171 | ?assertEqual([{<<"field1">>, {encoded, 1}}, 172 | {<<"field2">>, {encoded, foo}}], 173 | avro_record:encode(Type, 174 | [{"field2", foo}, {"field1", 1}], 175 | EncodeFun)). 176 | 177 | %%%_* Emacs ==================================================================== 178 | %%% Local Variables: 179 | %%% allout-layout: t 180 | %%% erlang-indent-level: 2 181 | %%% End: 182 | -------------------------------------------------------------------------------- /include/avro_internal.hrl: -------------------------------------------------------------------------------- 1 | %%%----------------------------------------------------------------------------- 2 | %%% Copyright (c) 2013-2018 Klarna AB 3 | %%% 4 | %%% This file is provided to you under the Apache License, 5 | %%% Version 2.0 (the "License"); you may not use this file 6 | %%% except in compliance with the License. You may obtain 7 | %%% a copy of the License at 8 | %%% 9 | %%% http://www.apache.org/licenses/LICENSE-2.0 10 | %%% 11 | %%% Unless required by applicable law or agreed to in writing, 12 | %%% software distributed under the License is distributed on an 13 | %%% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | %%% KIND, either express or implied. See the License for the 15 | %%% specific language governing permissions and limitations 16 | %%% under the License. 17 | %%%----------------------------------------------------------------------------- 18 | 19 | -ifndef(AVRO_INTERNAL_HRL). 20 | -define(AVRO_INTERNAL_HRL, true). 21 | 22 | -include("erlavro.hrl"). 23 | 24 | -define(INT4_MIN, -2147483648). 25 | -define(INT4_MAX, 2147483647). 26 | 27 | -define(INT8_MIN, -9223372036854775808). 28 | -define(INT8_MAX, 9223372036854775807). 29 | 30 | -define(NS_GLOBAL, ?AVRO_NS_GLOBAL). 31 | -define(NO_DOC, ?AVRO_NO_DOC). 32 | 33 | -type ordering() :: ascending | descending | ignore. 34 | 35 | -define(NO_VALUE, undefined). 36 | 37 | -record(avro_record_field, 38 | { name = ?AVRO_REQUIRED :: name() 39 | , doc = ?NS_GLOBAL :: typedoc() 40 | , type = ?AVRO_REQUIRED :: type_or_name() 41 | , default :: ?NO_VALUE | avro:in() | avro_value() 42 | , order = ascending :: ordering() 43 | , aliases = [] :: [name()] 44 | }). 45 | 46 | -type record_field() :: #avro_record_field{}. 47 | 48 | -type primitive_type() :: #avro_primitive_type{}. 49 | -type array_type() :: #avro_array_type{}. 50 | -type enum_type() :: #avro_enum_type{}. 51 | -type fixed_type() :: #avro_fixed_type{}. 52 | -type map_type() :: #avro_map_type{}. 53 | -type record_type() :: #avro_record_type{}. 54 | -type union_type() :: #avro_union_type{}. 55 | 56 | -type avro_type() :: primitive_type() 57 | | array_type() 58 | | enum_type() 59 | | fixed_type() 60 | | map_type() 61 | | record_type() 62 | | union_type(). 63 | 64 | -type type_or_name() :: avro_type() | name_raw(). 65 | 66 | -type custom_prop_name() :: binary(). 67 | -type custom_prop_value() :: avro_json_compat:json_value(). 68 | -type custom_prop() :: {custom_prop_name(), custom_prop_value()}. 69 | 70 | -define(ASSIGNED_NAME, <<"_erlavro_assigned">>). 71 | 72 | -define(NAME(X), avro_util:canonicalize_name(X)). 73 | -define(DOC(X), unicode:characters_to_binary(X, utf8)). 74 | 75 | -define(IS_NAME_RAW(N), (is_atom(N) orelse is_list(N) orelse is_binary(N))). 76 | -define(IS_NAME(N), is_binary(N)). 77 | 78 | -type name_raw() :: atom() | string() | binary(). 79 | -type namespace_raw() :: atom() | string() | binary(). 80 | -type enum_symbol_raw() :: atom() | string() | binary(). 81 | 82 | %% name, namespace and enum symbols are 83 | %% canonicalized to binary() internally 84 | -type name() :: binary(). 85 | -type namespace() :: binary(). 86 | -type enum_symbol() :: binary(). 87 | -type fullname() :: binary(). 88 | -type typedoc() :: string() | binary(). 89 | -type enum_index() :: non_neg_integer(). 90 | -type union_index() :: non_neg_integer(). 91 | -type lkup_fun() :: fun((fullname()) -> avro_type()). 92 | -type schema_store() :: avro_schema_store:store(). 93 | -type canonicalized_value() :: null | boolean() | integer() | float() | binary(). 94 | 95 | -type type_prop_name() :: namespace | doc | aliases | custom_prop_name(). 96 | -type type_prop_value() :: namespace() | typedoc() | [name()] | custom_prop_value(). 97 | -type type_props() :: [{type_prop_name(), type_prop_value()}]. 98 | 99 | -type avro_json() :: avro_json_compat:json_value(). 100 | -type avro_binary() :: iolist(). 101 | 102 | -type avro_codec() :: null | deflate | snappy. 103 | 104 | %% Type checks 105 | 106 | -define(IS_PRIMITIVE_TYPE(Type), is_record(Type, avro_primitive_type)). 107 | 108 | -define(IS_NULL_TYPE(Type), 109 | ?IS_PRIMITIVE_TYPE(Type) andalso 110 | Type#avro_primitive_type.name =:= ?AVRO_NULL). 111 | 112 | -define(IS_BOOLEAN_TYPE(Type), 113 | ?IS_PRIMITIVE_TYPE(Type) andalso 114 | Type#avro_primitive_type.name =:= ?AVRO_BOOLEAN). 115 | 116 | -define(IS_INT_TYPE(Type), 117 | ?IS_PRIMITIVE_TYPE(Type) andalso 118 | Type#avro_primitive_type.name =:= ?AVRO_INT). 119 | 120 | -define(IS_LONG_TYPE(Type), 121 | ?IS_PRIMITIVE_TYPE(Type) andalso 122 | Type#avro_primitive_type.name =:= ?AVRO_LONG). 123 | 124 | -define(IS_FLOAT_TYPE(Type), 125 | ?IS_PRIMITIVE_TYPE(Type) andalso 126 | Type#avro_primitive_type.name =:= ?AVRO_FLOAT). 127 | 128 | -define(IS_DOUBLE_TYPE(Type), 129 | ?IS_PRIMITIVE_TYPE(Type) andalso 130 | Type#avro_primitive_type.name =:= ?AVRO_DOUBLE). 131 | 132 | -define(IS_BYTES_TYPE(Type), 133 | ?IS_PRIMITIVE_TYPE(Type) andalso 134 | Type#avro_primitive_type.name =:= ?AVRO_BYTES). 135 | 136 | -define(IS_STRING_TYPE(Type), 137 | ?IS_PRIMITIVE_TYPE(Type) andalso 138 | Type#avro_primitive_type.name =:= ?AVRO_STRING). 139 | 140 | -define(IS_RECORD_TYPE(Type), is_record(Type, avro_record_type)). 141 | -define(IS_ENUM_TYPE(Type), is_record(Type, avro_enum_type)). 142 | -define(IS_ARRAY_TYPE(Type), is_record(Type, avro_array_type)). 143 | -define(IS_MAP_TYPE(Type), is_record(Type, avro_map_type)). 144 | -define(IS_UNION_TYPE(Type), is_record(Type, avro_union_type)). 145 | -define(IS_FIXED_TYPE(Type), is_record(Type, avro_fixed_type)). 146 | 147 | -define(IS_TYPE_RECORD(Type), is_tuple(Type)). 148 | 149 | -define(IS_PRIMITIVE_VALUE(Value), ?IS_PRIMITIVE_TYPE(?AVRO_VALUE_TYPE(Value))). 150 | -define(IS_RECORD_VALUE(Value), ?IS_RECORD_TYPE(?AVRO_VALUE_TYPE(Value))). 151 | -define(IS_ENUM_VALUE(Value), ?IS_ENUM_TYPE(?AVRO_VALUE_TYPE(Value))). 152 | -define(IS_ARRAY_VALUE(Value), ?IS_ARRAY_TYPE(?AVRO_VALUE_TYPE(Value))). 153 | -define(IS_MAP_VALUE(Value), ?IS_MAP_TYPE(?AVRO_VALUE_TYPE(Value))). 154 | -define(IS_UNION_VALUE(Value), ?IS_UNION_TYPE(?AVRO_VALUE_TYPE(Value))). 155 | -define(IS_FIXED_VALUE(Value), ?IS_FIXED_TYPE(?AVRO_VALUE_TYPE(Value))). 156 | 157 | -type decoder_hook_fun() :: 158 | fun((avro_type(), name() | integer(), avro:in(), 159 | fun((avro:in()) -> avro:out())) -> avro:out()). 160 | 161 | %% By default, the hook fun does nothing else but calling the decode function. 162 | -define(DEFAULT_DECODER_HOOK, ?AVRO_DEFAULT_DECODER_HOOK). 163 | 164 | -type decoder_options() :: #{ encoding := avro_binary | avro_json 165 | , map_type := proplist | map 166 | , record_type := proplist | map 167 | , is_wrapped := boolean() 168 | , hook := decoder_hook_fun() 169 | }. 170 | 171 | %% Throw an exception in case the value is already encoded. 172 | -define(ASSERT_AVRO_VALUE(VALUE), 173 | case VALUE of 174 | {json, _} -> erlang:throw({value_already_encoded, VALUE}); 175 | {binary, _} -> erlang:throw({value_already_encoded, VALUE}); 176 | _ -> ok 177 | end). 178 | 179 | -define(ERROR_IF(Cond, Err), 180 | case Cond of 181 | true -> erlang:error(Err); 182 | false -> ok 183 | end). 184 | 185 | -define(ERROR_IF_NOT(Cond, Err), ?ERROR_IF(not (Cond), Err)). 186 | 187 | -define(ENC_ERR(Reason, Context), 188 | {'$avro_encode_error', Reason, Context}). 189 | 190 | -define(RAISE_ENC_ERR(EXCEPTION_CLASS, EXCEPTION_REASON, THIS_CONTEXT, STACK), 191 | begin 192 | {Reason, Context} = 193 | case EXCEPTION_REASON of 194 | ?ENC_ERR(ReasonX, ContextX) -> 195 | {ReasonX, THIS_CONTEXT ++ ContextX}; 196 | _ -> 197 | {EXCEPTION_REASON, THIS_CONTEXT} 198 | end, 199 | erlang:raise(EXCEPTION_CLASS, ?ENC_ERR(Reason, Context), STACK) 200 | end). 201 | -endif. 202 | 203 | %%%_* Emacs ==================================================================== 204 | %%% Local Variables: 205 | %%% allout-layout: t 206 | %%% erlang-indent-level: 2 207 | %%% End: 208 | -------------------------------------------------------------------------------- /test/avro_json_encoder_canon_tests.erl: -------------------------------------------------------------------------------- 1 | %% coding: latin-1 2 | %%%------------------------------------------------------------------- 3 | %%% Copyright (c) 2018 Klarna AB 4 | %%% 5 | %%% This file is provided to you under the Apache License, 6 | %%% Version 2.0 (the "License"); you may not use this file 7 | %%% except in compliance with the License. You may obtain 8 | %%% a copy of the License at 9 | %%% 10 | %%% http://www.apache.org/licenses/LICENSE-2.0 11 | %%% 12 | %%% Unless required by applicable law or agreed to in writing, 13 | %%% software distributed under the License is distributed on an 14 | %%% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | %%% KIND, either express or implied. See the License for the 16 | %%% specific language governing permissions and limitations 17 | %%% under the License. 18 | %%%------------------------------------------------------------------- 19 | -module(avro_json_encoder_canon_tests). 20 | 21 | -include_lib("eunit/include/eunit.hrl"). 22 | 23 | canon(Json) -> 24 | Schema = avro_json_decoder:decode_schema(Json), 25 | avro:encode_schema(Schema, [{canon, true}]). 26 | 27 | %% Helper to compare canonical forms by decoding both and comparing schemas 28 | %% This avoids issues with JSON key ordering differences between 29 | %% jsone and native json 30 | canon_equal(Json1, Json2) -> 31 | Schema1 = avro_json_decoder:decode_schema(Json1), 32 | Schema2 = avro_json_decoder:decode_schema(Json2), 33 | Schema1 =:= Schema2. 34 | 35 | % Run Java test cases from the Avro project: 36 | % https://github.com/apache/avro/blob/master/share/test/data/schema-tests.txt 37 | 38 | java_primitive_test() -> 39 | ?assertEqual(<<"\"null\"">>, canon(<<"\"null\"">>)), % 000 40 | ?assertEqual(<<"\"null\"">>, canon(<<"{\"type\":\"null\"}">>)), % 001 41 | ?assertEqual(<<"\"boolean\"">>, canon(<<"\"boolean\"">>)), % 002 42 | ?assertEqual(<<"\"boolean\"">>, canon(<<"{\"type\":\"boolean\"}">>)), % 003 43 | ?assertEqual(<<"\"int\"">>, canon(<<"\"int\"">>)), % 004 44 | ?assertEqual(<<"\"int\"">>, canon(<<"{\"type\":\"int\"}">>)), % 005 45 | ?assertEqual(<<"\"long\"">>,canon(<<"\"long\"">>)), % 006 46 | ?assertEqual(<<"\"long\"">>, canon(<<"{\"type\":\"long\"}">>)), % 007 47 | ?assertEqual(<<"\"float\"">>, canon(<<"\"float\"">>)), % 008 48 | ?assertEqual(<<"\"float\"">>, canon(<<"{\"type\":\"float\"}">>)), % 009 49 | ?assertEqual(<<"\"double\"">>, canon(<<"\"double\"">>)), % 010 50 | ?assertEqual(<<"\"double\"">>, canon(<<"{\"type\":\"double\"}">>)), % 011 51 | ?assertEqual(<<"\"bytes\"">>, canon(<<"\"bytes\"">>)), % 012 52 | ?assertEqual(<<"\"bytes\"">>, canon(<<"{\"type\":\"bytes\"}">>)), % 013 53 | ?assertEqual(<<"\"string\"">>, canon(<<"\"string\"">>)), % 014 54 | ?assertEqual(<<"\"string\"">>, canon(<<"{\"type\":\"string\"}">>)), % 015 55 | % avro_json_decoder:decode_schema/1 considers empty enum invalid 56 | % ?assertEqual(<<"[]">>, canon(<<"[ ]">>)), % 016 57 | ?assertEqual(<<"[\"int\"]">>, canon(<<"[ \"int\" ]">>)), % 017 58 | ?assertEqual(<<"[\"int\",\"boolean\"]">>, 59 | canon(<<"[ \"int\" , {\"type\":\"boolean\"} ]">>)). % 018 60 | 61 | % Put fields in standard order, without whitespace 62 | java_019_test() -> 63 | Expected = <<"{\"name\":\"foo\",\"type\":\"record\",\"fields\":[]}">>, 64 | Actual = canon(<<"{\"fields\":[], " 65 | "\"type\":\"record\", \"name\":\"foo\"}">>), 66 | ?assert(canon_equal(Expected, Actual)). 67 | 68 | java_020_test() -> 69 | Expected = <<"{\"name\":\"x.y.foo\",\"type\":\"record\",\"fields\":[]}">>, 70 | Actual = canon(<<"{\"fields\":[], \"type\":\"record\", \"name\":\"foo\", " 71 | "\"namespace\":\"x.y\"}">>), 72 | ?assert(canon_equal(Expected, Actual)). 73 | 74 | java_021_test() -> 75 | % https://avro.apache.org/docs/1.8.2/spec.html#names 76 | % 77 | % "A fullname is specified. If the name specified contains a dot, then it is 78 | % assumed to be a fullname, and any namespace also specified is ignored. For 79 | % example, use "name": "org.foo.X" to indicate the fullname org.foo.X." 80 | Expected = <<"{\"name\":\"a.b.foo\",\"type\":\"record\",\"fields\":[]}">>, 81 | Actual = canon(<<"{\"fields\":[], \"type\":\"record\", " 82 | "\"name\":\"a.b.foo\", \"namespace\":\"x.y\"}">>), 83 | ?assert(canon_equal(Expected, Actual)). 84 | 85 | java_022_test() -> 86 | Expected = <<"{\"name\":\"foo\",\"type\":\"record\",\"fields\":[]}">>, 87 | Actual = canon(<<"{\"fields\":[], \"type\":\"record\", " 88 | "\"name\":\"foo\", \"doc\":\"Useful info\"}">>), 89 | ?assert(canon_equal(Expected, Actual)). 90 | 91 | java_023_test() -> 92 | Expected = <<"{\"name\":\"foo\",\"type\":\"record\",\"fields\":[]}">>, 93 | Actual = canon(<<"{\"fields\":[], \"type\":\"record\", " 94 | "\"name\":\"foo\", \"aliases\":[\"foo\",\"bar\"]}">>), 95 | ?assert(canon_equal(Expected, Actual)). 96 | 97 | java_024_test() -> 98 | Expected = <<"{\"name\":\"foo\",\"type\":\"record\",\"fields\":[]}">>, 99 | Actual = canon(<<"{\"fields\":[], \"type\":\"record\", " 100 | "\"name\":\"foo\", \"doc\":\"foo\", " 101 | "\"aliases\":[\"foo\",\"bar\"]}">>), 102 | ?assert(canon_equal(Expected, Actual)). 103 | 104 | java_025_test() -> 105 | Expected = <<"{\"name\":\"foo\",\"type\":\"record\",\"fields\":[" 106 | "{\"name\":\"f1\",\"type\":\"boolean\"}]}">>, 107 | Actual = canon(<<"{\"fields\":[{\"type\":{\"type\":\"boolean\"}, " 108 | "\"name\":\"f1\"}], \"type\":\"record\", " 109 | "\"name\":\"foo\"}">>), 110 | ?assert(canon_equal(Expected, Actual)). 111 | 112 | java_026_test() -> 113 | Result = canon(<< 114 | "{ \"fields\":[{\"type\":\"boolean\", \"aliases\":[], \"name\":\"f1\", " 115 | "\"default\":true},", 10:8, 116 | " {\"order\":\"descending\",\"name\":\"f2\",\"doc\":\"Hello\"," 117 | "\"type\":\"int\"}],", 10:8, 118 | " \"type\":\"record\", \"name\":\"foo\"", 10:8, 119 | "}", 10:8 120 | >>), 121 | Expected = <<"{\"name\":\"foo\",\"type\":\"record\",\"fields\":[" 122 | "{\"name\":\"f1\",\"type\":\"boolean\"}," 123 | "{\"name\":\"f2\",\"type\":\"int\"}]}">>, 124 | ?assert(canon_equal(Expected, Result)). 125 | 126 | java_027_test() -> 127 | Expected = <<"{\"name\":\"foo\",\"type\":\"enum\"," 128 | "\"symbols\":[\"A1\"]}">>, 129 | Actual = canon(<<"{\"type\":\"enum\", \"name\":\"foo\", " 130 | "\"symbols\":[\"A1\"]}">>), 131 | ?assert(canon_equal(Expected, Actual)). 132 | 133 | java_028_test() -> 134 | Expected = <<"{\"name\":\"x.y.z.foo\",\"type\":\"enum\"," 135 | "\"symbols\":[\"A1\",\"A2\"]}">>, 136 | Actual = canon(<<"{\"namespace\":\"x.y.z\", \"type\":\"enum\", " 137 | "\"name\":\"foo\", \"doc\":\"foo bar\", " 138 | "\"symbols\":[\"A1\", \"A2\"]}">>), 139 | ?assert(canon_equal(Expected, Actual)). 140 | 141 | java_029_test() -> 142 | Expected = <<"{\"name\":\"foo\",\"type\":\"fixed\",\"size\":15}">>, 143 | Actual = canon(<<"{\"name\":\"foo\",\"type\":\"fixed\",\"size\":15}">>), 144 | ?assert(canon_equal(Expected, Actual)). 145 | 146 | java_030_test() -> 147 | Expected = <<"{\"name\":\"x.y.z.foo\",\"type\":\"fixed\",\"size\":32}">>, 148 | Actual = canon(<<"{\"namespace\":\"x.y.z\", \"type\":\"fixed\", " 149 | "\"name\":\"foo\", \"doc\":\"foo bar\", \"size\":32}">> 150 | ), 151 | ?assert(canon_equal(Expected, Actual)). 152 | 153 | java_031_test() -> 154 | Expected = <<"{\"type\":\"array\",\"items\":\"null\"}">>, 155 | Actual = canon(<<"{ \"items\":{\"type\":\"null\"}, " 156 | "\"type\":\"array\"}">>), 157 | ?assert(canon_equal(Expected, Actual)). 158 | 159 | java_032_test() -> 160 | Expected = <<"{\"type\":\"map\",\"values\":\"string\"}">>, 161 | Actual = canon(<<"{ \"values\":\"string\", \"type\":\"map\"}">>), 162 | ?assert(canon_equal(Expected, Actual)). 163 | java_033_test() -> 164 | Expected = <<"{\"name\":\"PigValue\",\"type\":\"record\",\"fields\":[" 165 | "{\"name\":\"value\",\"type\":[\"null\",\"int\",\"long\"," 166 | "\"PigValue\"]}]}">>, 167 | Actual = canon(<<" {\"name\":\"PigValue\",\"type\":\"record\",", 10:8, 168 | " \"fields\":[{\"name\":\"value\", \"type\":[\"null\", " 169 | "\"int\", \"long\", \"PigValue\"]}]}", 10:8>>), 170 | ?assert(canon_equal(Expected, Actual)). 171 | %%%_* Emacs ==================================================================== 172 | %%% Local Variables: 173 | %%% allout-layout: t 174 | %%% erlang-indent-level: 2 175 | %%% End: 176 | -------------------------------------------------------------------------------- /src/avro_json_compat.erl: -------------------------------------------------------------------------------- 1 | %%%----------------------------------------------------------------------------- 2 | %%% Copyright (c) 2025 Klarna AB 3 | %%% 4 | %%% This file is provided to you under the Apache License, 5 | %%% Version 2.0 (the "License"); you may not use this file 6 | %%% except in compliance with the License. You may obtain 7 | %%% a copy of the License at 8 | %%% 9 | %%% http://www.apache.org/licenses/LICENSE-2.0 10 | %%% 11 | %%% Unless required by applicable law or agreed to in writing, 12 | %%% software distributed under the License is distributed on an 13 | %%% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | %%% KIND, either express or implied. See the License for the 15 | %%% specific language governing permissions and limitations 16 | %%% under the License. 17 | %%% 18 | %%% @doc Compatibility layer for JSON encoding/decoding. 19 | %%% Uses Erlang's native json module on OTP 27+ 20 | %%% falls back to jsone on older versions. 21 | %%% @end 22 | %%%----------------------------------------------------------------------------- 23 | 24 | -module(avro_json_compat). 25 | 26 | %% Suppress Dialyzer warnings about json module functions on OTP < 27 27 | -if(?OTP_RELEASE < 27). 28 | -dialyzer({nowarn_function, [encode/2, decode/2, encoder/2]}). 29 | -endif. 30 | 31 | -export([set_provider/1, get_provider/0]). 32 | -export([encode/1, encode/2, decode/1, decode/2]). 33 | -export([inline/1]). 34 | -export_type([json_value/0]). 35 | 36 | %% Type definitions compatible with both jsone and native json 37 | -type json_obj() :: #{binary() => json_value()} | 38 | [{binary(), json_value()}] | 39 | {[{binary(), json_value()}]}. 40 | 41 | %% NB: atom() only for encoder inputs. 42 | %% No atom created when decode. 43 | -type string_value() :: atom() | binary(). 44 | 45 | -type json_value() :: null 46 | | boolean() 47 | | integer() 48 | | float() 49 | | string_value() 50 | | [json_value()] 51 | | json_obj(). 52 | 53 | -type encode_option() :: native_utf8. 54 | -type decode_option() :: {object_format, tuple | map | proplist}. 55 | 56 | %% Suppress warnings about json module functions on OTP < 27 57 | -if(?OTP_RELEASE < 27). 58 | -compile(nowarn_undefined_function). 59 | -endif. 60 | 61 | -define(PROVIDER_PTP_KEY, erlavro_json_provider). 62 | 63 | %%%_* APIs ===================================================================== 64 | 65 | %% @doc Set `json' or `jsone' as the JSON encode/decode provider. 66 | -spec set_provider(json | jsone) -> ok. 67 | set_provider(Module) -> 68 | persistent_term:put(?PROVIDER_PTP_KEY, Module). 69 | 70 | %% @doc Get current JSON encode/decode provider. 71 | -spec get_provider() -> json | jsone. 72 | get_provider() -> 73 | case persistent_term:get(?PROVIDER_PTP_KEY, undefined) of 74 | undefined -> 75 | Module = get_available_provider([json, jsone]), 76 | persistent_term:put(?PROVIDER_PTP_KEY, Module), 77 | Module; 78 | Module -> 79 | Module 80 | end. 81 | 82 | %% @doc Make a inline (already encoded JSON) JSON value. 83 | %% This is a compatible format for jsone. 84 | inline(JSON) -> {{json, JSON}}. 85 | 86 | %% @doc Encode Erlang term to JSON. 87 | %% Equivalent to jsone:encode/1 88 | %% Returns iodata() (binary() or iolist()) 89 | -spec encode(json_value()) -> iodata(). 90 | encode(Value) -> encode(Value, []). 91 | 92 | %% @doc Encode Erlang term to JSON with options. 93 | -spec encode(json_value(), [encode_option()]) -> iodata(). 94 | encode(Value, Options) -> 95 | case get_provider() of 96 | json -> 97 | iolist_to_binary(json:encode(Value, fun encoder/2)); 98 | Module -> 99 | apply(Module, encode, [Value, Options]) 100 | end. 101 | 102 | %% Custom encoder callback for json:encode/2 103 | %% Handles conversion from jsone format (lists of tuples) to 104 | %% native json format inline 105 | encoder(Value, Encode) when is_list(Value) -> 106 | %% Check if it's a list of key-value pairs (object) or a plain list (array) 107 | case is_key_value_list(Value) of 108 | true -> 109 | %% It's an object: [{Key, Value}] -> encode as key-value list 110 | %% Convert atom keys to binary keys for native json 111 | ConvertedList = [{convert_key(K), V} || {K, V} <- Value], 112 | json:encode_key_value_list(ConvertedList, Encode); 113 | false -> 114 | %% It's an array: [Value] -> encode as array 115 | json:encode_value(Value, Encode) 116 | end; 117 | encoder({{json, JSON}}, _Encode) -> 118 | iolist_to_binary(JSON); 119 | encoder({[]}, _Encode) -> 120 | <<"{}">>; 121 | encoder({[{_, _} | _] = KvList}, Encode) -> 122 | encoder(KvList, Encode); 123 | encoder(Value, Encode) -> 124 | %% Primitive value or map - use default encoding 125 | json:encode_value(Value, Encode). 126 | 127 | %% @doc Decode JSON binary to Erlang term. 128 | %% Default object format is tuple (compatible with jsone default). 129 | -spec decode(binary()) -> json_value(). 130 | decode(JSON) -> 131 | decode(JSON, [{object_format, tuple}]). 132 | 133 | %% @doc Decode JSON binary to Erlang term with options. 134 | %% Options: 135 | %% {object_format, tuple} - Return objects as {[{Key, Value}]} tuples 136 | %% (default, jsone format) 137 | %% {object_format, map} - Return objects as #{Key => Value} maps 138 | %% {object_format, proplist} - Return objects as [{Key, Value}] proplist 139 | -spec decode(binary(), [decode_option()]) -> json_value(). 140 | decode(JSON, Options) -> 141 | case get_provider() of 142 | json -> 143 | JsonBinary = iolist_to_binary(JSON), 144 | ObjectFormat = proplists:get_value(object_format, Options, tuple), 145 | Decoded = json:decode(JsonBinary), 146 | convert_object_format(Decoded, ObjectFormat); 147 | Module -> 148 | apply(Module, decode, [JSON, Options]) 149 | end. 150 | 151 | %%%_* Internal functions ======================================================= 152 | 153 | %% @private Check if a list represents a key-value object (proplist) 154 | %% Accepts both binary keys and atom keys 155 | -spec is_key_value_list([term()]) -> boolean(). 156 | is_key_value_list([{K, _V} | _]) when is_binary(K) orelse is_atom(K) -> 157 | true; 158 | is_key_value_list(_) -> 159 | false. 160 | 161 | %% @private Convert key to binary format (native json requires binary keys) 162 | -spec convert_key(atom() | binary()) -> binary(). 163 | convert_key(Key) when is_atom(Key) -> 164 | atom_to_binary(Key, utf8); 165 | convert_key(Key) when is_binary(Key) -> 166 | Key. 167 | 168 | %% @private Convert decoded JSON value's object format 169 | %% jsone with {object_format, tuple} returns objects as {Fields} 170 | %% where Fields is [{Key, Value}] 171 | %% native json returns objects as #{Key => Value} maps 172 | -spec convert_object_format(json_value(), tuple | map | proplist) -> 173 | json_value(). 174 | convert_object_format(Value, Format) when is_map(Value) -> 175 | case Format of 176 | tuple -> 177 | %% Convert map to jsone format: {[{Key, Value}]} 178 | %% Recursively convert nested maps in values 179 | %% Note: maps don't preserve JSON key order, so the resulting tuple order 180 | %% may differ from the original JSON order 181 | Fields = [{Key, convert_object_format(V, Format)} 182 | || {Key, V} <- maps:to_list(Value)], 183 | {Fields}; 184 | map -> 185 | Value; 186 | proplist -> 187 | %% Convert to proplist, recursively convert nested maps 188 | [{Key, convert_object_format(V, Format)} 189 | || {Key, V} <- maps:to_list(Value)] 190 | end; 191 | convert_object_format({Fields}, Format) when is_list(Fields) -> 192 | %% Already in jsone tuple format, but need to recurse into 193 | %% nested objects/arrays 194 | {[{Key, convert_object_format(Value, Format)} 195 | || {Key, Value} <- Fields]}; 196 | convert_object_format(List, Format) when is_list(List) -> 197 | %% Check if it's a list of key-value pairs (proplist) or a JSON array 198 | case is_key_value_list(List) of 199 | true -> 200 | %% It's a proplist, recurse into values 201 | [{Key, convert_object_format(Value, Format)} || {Key, Value} <- List]; 202 | false -> 203 | %% It's a JSON array, recurse into elements 204 | [convert_object_format(Item, Format) || Item <- List] 205 | end; 206 | convert_object_format(Value, _Format) -> 207 | %% Primitive value (null, boolean, number, binary) 208 | Value. 209 | 210 | get_available_provider([]) -> 211 | error(erlavro_no_json_provider); 212 | get_available_provider([Module | Rest]) -> 213 | try 214 | apply(Module, module_info, [module]) 215 | catch 216 | _:_ -> 217 | get_available_provider(Rest) 218 | end. 219 | 220 | %%%_* Emacs ==================================================================== 221 | %%% Local Variables: 222 | %%% allout-layout: t 223 | %%% erlang-indent-level: 2 224 | %%% End: 225 | -------------------------------------------------------------------------------- /test/avro_schema_store_tests.erl: -------------------------------------------------------------------------------- 1 | %% coding: latin-1 2 | %%%------------------------------------------------------------------- 3 | %%% Copyright (c) 2013-2018 Klarna AB 4 | %%% 5 | %%% This file is provided to you under the Apache License, 6 | %%% Version 2.0 (the "License"); you may not use this file 7 | %%% except in compliance with the License. You may obtain 8 | %%% a copy of the License at 9 | %%% 10 | %%% http://www.apache.org/licenses/LICENSE-2.0 11 | %%% 12 | %%% Unless required by applicable law or agreed to in writing, 13 | %%% software distributed under the License is distributed on an 14 | %%% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | %%% KIND, either express or implied. See the License for the 16 | %%% specific language governing permissions and limitations 17 | %%% under the License. 18 | %%% 19 | %%%------------------------------------------------------------------- 20 | -module(avro_schema_store_tests). 21 | 22 | -include_lib("eunit/include/eunit.hrl"). 23 | 24 | get_all_types_test() -> 25 | TestFun = 26 | fun(Store) -> 27 | Store1 = 28 | avro_schema_store:add_type("assigned.name", test_record(), Store), 29 | ?assertEqual( 30 | [ avro_fixed:type("MyFixed", 16, [{namespace, "com.klarna.test.bix"}]) 31 | , avro_enum:type("MyEnum", ["A"], [{namespace, "another.name"}]) 32 | , flat_test_record() 33 | , flat_sub_record() 34 | ], avro_schema_store:get_all_types(Store1)), 35 | ok = avro_schema_store:close(Store) 36 | end, 37 | ok = TestFun(avro_schema_store:new([{name, ?MODULE}])), 38 | ok = TestFun(avro_schema_store:new([dict])), 39 | ok = TestFun(avro_schema_store:new([map])). 40 | 41 | is_store_test() -> 42 | ?assertNot(avro_schema_store:is_store(<<"json">>)), 43 | ?assert(avro_schema_store:is_store(avro_schema_store:new([dict]))), 44 | ?assert(avro_schema_store:is_store(avro_schema_store:new([map]))), 45 | ?assert(avro_schema_store:is_store(avro_schema_store:new([]))). 46 | 47 | ensure_store_test() -> 48 | Pass = try 49 | avro_schema_store:ensure_store("not a store") 50 | catch 51 | _:_ -> throw 52 | end, 53 | ?assertEqual(throw, Pass), 54 | ?assertEqual(1, avro_schema_store:ensure_store(1)), 55 | ?assertEqual(atom, avro_schema_store:ensure_store(atom)), 56 | ?assertEqual({dict, dict:new()}, 57 | avro_schema_store:ensure_store({dict, dict:new()})), 58 | ?assertEqual(#{}, avro_schema_store:ensure_store(#{})). 59 | 60 | flatten_type_test() -> 61 | Type = avro_array:type(test_record()), 62 | Expected = 63 | { avro_array:type("com.klarna.test.bix.TestRecord") 64 | , [ flat_test_record() 65 | , flat_sub_record() 66 | , avro_enum:type("MyEnum", ["A"], [{namespace, "another.name"}]) 67 | , avro_fixed:type("MyFixed", 16, [{namespace, "com.klarna.test.bix"}]) 68 | ] 69 | }, 70 | ?assertEqual(Expected, avro:flatten_type(Type)). 71 | 72 | add_type_test() -> 73 | TestFun = 74 | fun(Store) -> 75 | Store1 = avro_schema_store:add_type(test_record(), Store), 76 | ?assertEqual({ok, flat_test_record()}, 77 | lookup("com.klarna.test.bix.TestRecord", Store1)), 78 | ?assertEqual({ok, flat_test_record()}, 79 | lookup("com.klarna.test.bix.TestRecordAlias1", Store1)), 80 | ?assertEqual({ok, flat_sub_record()}, 81 | lookup("com.klarna.test.bix.TestSubRecord", Store1)), 82 | ?assertEqual({ok, flat_sub_record()}, 83 | lookup("com.klarna.test.bix.TestSubRecordAlias", Store1)), 84 | ok = avro_schema_store:close(Store1) 85 | end, 86 | ok = TestFun(avro_schema_store:new()), 87 | ok = TestFun(avro_schema_store:new([dict])), 88 | ok = TestFun(avro_schema_store:new([map])). 89 | 90 | lookup(Name, Store) -> 91 | avro_schema_store:lookup_type(Name, Store). 92 | 93 | import_test() -> 94 | AvscFile = test_data("interop.avsc"), 95 | Store = avro_schema_store:new([{name, ?MODULE}], [AvscFile]), 96 | ?assertEqual(?MODULE, Store), 97 | ets:delete(Store), 98 | ok. 99 | 100 | import_unnamed_test() -> 101 | UnionName = "com.klarna.test.union", 102 | AvscFile = test_data(UnionName ++ ".avsc"), 103 | UnionType = avro_union:type([null, long]), 104 | UnionJSON = avro_json_encoder:encode_type(UnionType), 105 | ok = file:write_file(AvscFile, UnionJSON), 106 | try 107 | Store = avro_schema_store:new([], [AvscFile]), 108 | ?assertException(error, {unnamed_type, UnionType}, 109 | avro_schema_store:import_schema_json(UnionJSON, Store)), 110 | ?assertEqual({ok, UnionType}, 111 | avro_schema_store:lookup_type(UnionName, Store)) 112 | after 113 | file:delete(AvscFile) 114 | end. 115 | 116 | name_clash_test() -> 117 | Name = <<"com.klarna.test.union">>, 118 | Type = avro_union:type([null, long]), 119 | AnotherType = avro_primitive:string_type(), 120 | Store = avro_schema_store:new([]), 121 | %% ok to add the type 122 | Store = avro_schema_store:add_type(Name, Type, Store), 123 | %% ok to add the exact type again 124 | Store = avro_schema_store:add_type(Name, Type, Store), 125 | ?assertException(error, {name_clash, Name, AnotherType, Type}, 126 | avro_schema_store:add_type(Name, AnotherType, Store)). 127 | 128 | import_failure_test() -> 129 | Filename = "no-such-file", 130 | ?assertException(error, {failed_to_read_schema_file, Filename, enoent}, 131 | avro_schema_store:import_file(Filename, ignore)). 132 | 133 | expand_type_test() -> 134 | AvscFile = test_data("interop.avsc"), 135 | Store = avro_schema_store:new([], [AvscFile]), 136 | {ok, FlatType} = 137 | avro_schema_store:lookup_type("org.apache.avro.Interop", Store), 138 | {ok, TruthJSON} = file:read_file(AvscFile), 139 | TruthType = avro_json_decoder:decode_schema(TruthJSON), 140 | Type = avro:expand_type("org.apache.avro.Interop", Store), 141 | %% compare decoded type instead of JSON schema because 142 | %% the order of JSON object fields lacks deterministic 143 | ?assertEqual(TruthType, Type), 144 | %% also try to expand a flattened wrapper type, which should 145 | %% have the exact same effect as expanding from its fullname 146 | ?assertEqual(TruthType, avro:expand_type(FlatType, Store)), 147 | ok. 148 | 149 | %% @private 150 | sub_record() -> 151 | avro_record:type( 152 | "TestSubRecord", 153 | [ define_field("sub_field1", boolean) 154 | , define_field("sub_field2", avro_enum:type("MyEnum", ["A"], 155 | [{namespace, "another.name"}])) 156 | ], 157 | [ {namespace, "com.klarna.test.bix"} 158 | , {doc, "Some doc"} 159 | , {aliases, ["TestSubRecordAlias"]} 160 | ]). 161 | 162 | %% @private 163 | flat_sub_record() -> 164 | avro_record:type( 165 | "TestSubRecord", 166 | [ define_field("sub_field1", boolean), 167 | define_field("sub_field2", "another.name.MyEnum") ], 168 | [ {namespace, "com.klarna.test.bix"} 169 | , {doc, "Some doc"} 170 | , {aliases, ["TestSubRecordAlias"]} 171 | ]). 172 | 173 | %% @private 174 | test_record() -> 175 | avro_record:type( 176 | "TestRecord", 177 | [ %% simple type 178 | define_field("field1", int) 179 | %% huge nested type 180 | , define_field("field2", 181 | avro_array:type( 182 | avro_union:type( 183 | [ string 184 | , sub_record() 185 | , avro_fixed:type("MyFixed", 16, 186 | [{namespace, "com.klarna.test.bix"}]) 187 | ]))) 188 | %% named type without explicit namespace 189 | , define_field("field3", "com.klarna.test.bix.SomeType") 190 | ], 191 | [ {namespace, "com.klarna.test.bix"} 192 | , {doc, "Some doc"} 193 | , {aliases, ["TestRecordAlias1", "TestRecordAlias2"]} 194 | ] 195 | ). 196 | 197 | %% @private 198 | flat_test_record() -> 199 | avro_record:type( 200 | "TestRecord", 201 | [ %% simple type 202 | define_field("field1", int) 203 | %% huge nested type 204 | , define_field("field2", avro_array:type( 205 | avro_union:type( 206 | [ string 207 | , "com.klarna.test.bix.TestSubRecord" 208 | , "com.klarna.test.bix.MyFixed" 209 | ]))) 210 | %% named type without explicit namespace 211 | , define_field("field3", "com.klarna.test.bix.SomeType") 212 | ], 213 | [ {namespace, "com.klarna.test.bix"} 214 | , {doc, "Some doc"} 215 | , {aliases, ["TestRecordAlias1", "TestRecordAlias2"]} 216 | ]). 217 | 218 | test_data(FileName) -> 219 | filename:join([code:lib_dir(erlavro), "test", "data", FileName]). 220 | 221 | define_field(Name, Type) -> avro_record:define_field(Name, Type). 222 | 223 | %%%_* Emacs ==================================================================== 224 | %%% Local Variables: 225 | %%% allout-layout: t 226 | %%% erlang-indent-level: 2 227 | %%% End: 228 | -------------------------------------------------------------------------------- /src/avro_binary_encoder.erl: -------------------------------------------------------------------------------- 1 | %% coding: latin-1 2 | %%%----------------------------------------------------------------------------- 3 | %%% 4 | %%% Copyright (c) 2016-2018 Klarna Bank AB (publ) 5 | %%% 6 | %%% This file is provided to you under the Apache License, 7 | %%% Version 2.0 (the "License"); you may not use this file 8 | %%% except in compliance with the License. You may obtain 9 | %%% a copy of the License at 10 | %%% 11 | %%% http://www.apache.org/licenses/LICENSE-2.0 12 | %%% 13 | %%% Unless required by applicable law or agreed to in writing, 14 | %%% software distributed under the License is distributed on an 15 | %%% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 16 | %%% KIND, either express or implied. See the License for the 17 | %%% specific language governing permissions and limitations 18 | %%% under the License. 19 | %%% 20 | %%% @doc 21 | %%% Encodes Avro values to binary format according to Avro 1.7.5 22 | %%% specification. 23 | %%% 24 | %%% Schema is written following parsing canonical form recommendations 25 | %%% but keeps all information (attributes are kept even if they are 26 | %%% not relevant for parsing). 27 | %%% @end 28 | %%%----------------------------------------------------------------------------- 29 | 30 | -module(avro_binary_encoder). 31 | 32 | %% APIs for typed data encoding 33 | -export([ encode_value/1 34 | , encode/3 35 | ]). 36 | 37 | -include("avro_internal.hrl"). 38 | 39 | %% Exported for test 40 | -export([ int/1 41 | , long/1 42 | , string/1 43 | , zigzag/2 44 | ]). 45 | 46 | -type index() :: non_neg_integer(). %% zero based 47 | 48 | %%%_* APIs ===================================================================== 49 | 50 | %% @doc Encode avro value in binary format. 51 | -spec encode_value(avro_value()) -> iodata(). 52 | encode_value(?AVRO_ENCODED_VALUE_BINARY(_Type, _Value = Encoded)) -> 53 | Encoded; 54 | encode_value(V) when ?IS_PRIMITIVE_TYPE(?AVRO_VALUE_TYPE(V)) -> 55 | encode_prim(?AVRO_VALUE_TYPE(V), ?AVRO_VALUE_DATA(V)); 56 | encode_value(Record) when ?IS_RECORD_VALUE(Record) -> 57 | [encode_value(X) || {_FieldName, X} <- avro_record:to_list(Record)]; 58 | encode_value(Enum) when ?IS_ENUM_VALUE(Enum) -> 59 | int(avro_enum:get_index(Enum)); 60 | encode_value(Array) when ?IS_ARRAY_VALUE(Array) -> 61 | Count = length(?AVRO_VALUE_DATA(Array)), 62 | block(Count, [encode_value(I) || I <- ?AVRO_VALUE_DATA(Array)]); 63 | encode_value(Map) when ?IS_MAP_VALUE(Map) -> 64 | KvList = avro_map:to_list(Map), 65 | Count = length(KvList), 66 | block(Count, [[string(K), encode_value(V)] || {K, V} <- KvList]); 67 | encode_value(Fixed) when ?IS_FIXED_VALUE(Fixed) -> 68 | ?AVRO_VALUE_DATA(Fixed); 69 | encode_value(Union) when ?IS_UNION_VALUE(Union) -> 70 | TypedData = ?AVRO_VALUE_DATA(Union), 71 | Index = avro_union:get_child_type_index(Union), 72 | [long(Index), encode_value(TypedData)]. 73 | 74 | %% @doc Encode unwrapped (raw) values directly without (possibilly 75 | %% recursive) type info wrapped with values. 76 | %% i.e. data can be recursive, but recursive types are resolved by 77 | %% schema lookup 78 | %% @end 79 | -spec encode(avro:schema_all(), type_or_name(), avro_value() | avro:in()) -> 80 | iodata(). 81 | encode(Sc, Type, Input) -> 82 | Lkup = avro_util:ensure_lkup_fun(Sc), 83 | do_encode(Lkup, Type, Input). 84 | 85 | %%%_* Internal functions ======================================================= 86 | 87 | %% Tested in OTP-21, dialyzer had trouble understanding the 3 arg 88 | %% for the call to enc/3. 89 | -dialyzer({nowarn_function, [do_encode/3]}). 90 | -spec do_encode(lkup_fun(), type_or_name(), avro_value() | avro:in()) -> 91 | iodata(). 92 | do_encode(Lkup, Type, #avro_value{type = T} = V) -> 93 | case avro:is_same_type(Type, T) of 94 | true -> encode_value(V); 95 | false -> enc(Lkup, Type, V) %% try deeper 96 | end; 97 | do_encode(Lkup, TypeName, Value) when ?IS_NAME_RAW(TypeName) -> 98 | enc(Lkup, Lkup(?NAME(TypeName)), Value); 99 | do_encode(Lkup, Type, Value) -> 100 | enc(Lkup, Type, Value). 101 | 102 | -spec enc(schema_store() | lkup_fun(), type_or_name(), 103 | avro_value() | avro:in()) -> iodata(). 104 | enc(_Lkup, Type, Value) when ?IS_PRIMITIVE_TYPE(Type) -> 105 | {ok, AvroValue} = avro:cast(Type, Value), 106 | encode_value(AvroValue); 107 | enc(Lkup, Type, Value) when ?IS_RECORD_TYPE(Type) -> 108 | avro_record:encode(Type, Value, 109 | fun(_FN, FT, FV) -> encode(Lkup, FT, FV) end); 110 | enc(_Lkup, Type, Value) when ?IS_ENUM_TYPE(Type) -> 111 | int(avro_enum:get_index(Type, Value)); 112 | enc(Lkup, Type, Value) when ?IS_ARRAY_TYPE(Type) -> 113 | Count = length(Value), 114 | Encoded = avro_array:encode(Type, Value, 115 | fun(IType, Item) -> encode(Lkup, IType, Item) end), 116 | block(Count, Encoded); 117 | enc(Lkup, Type, Value) when ?IS_MAP_TYPE(Type) -> 118 | Encoded = avro_map:encode(Type, Value, 119 | fun(IType, K, V) -> [string(K), encode(Lkup, IType, V)] end), 120 | Count = case is_map(Value) of 121 | true -> maps:size(Value); 122 | false -> length(Value) 123 | end, 124 | block(Count, Encoded); 125 | enc(_Lkup, Type, Value) when ?IS_FIXED_TYPE(Type) -> 126 | %% force binary size check for the value 127 | encode_value(avro_fixed:new(Type, Value)); 128 | enc(Lkup, Type, Union) when ?IS_UNION_TYPE(Type) -> 129 | avro_union:encode(Type, Union, 130 | fun(MemberT, Value, Index) -> 131 | [long(Index), encode(Lkup, MemberT, Value)] 132 | end). 133 | 134 | -spec encode_prim(avro_type(), avro:in()) -> iodata(). 135 | encode_prim(T, _) when ?IS_NULL_TYPE(T) -> null(); 136 | encode_prim(T, V) when ?IS_BOOLEAN_TYPE(T) -> bool(V); 137 | encode_prim(T, V) when ?IS_INT_TYPE(T) -> int(V); 138 | encode_prim(T, V) when ?IS_LONG_TYPE(T) -> long(V); 139 | encode_prim(T, V) when ?IS_FLOAT_TYPE(T) -> float(V); 140 | encode_prim(T, V) when ?IS_DOUBLE_TYPE(T) -> double(V); 141 | encode_prim(T, V) when ?IS_BYTES_TYPE(T) -> bytes(V); 142 | encode_prim(T, V) when ?IS_STRING_TYPE(T) -> string(V). 143 | 144 | %% Encode blocks, for arrays and maps 145 | %% 1. Blocks start with a 'long' type count 146 | %% 2. If count is negative (abs value for real count), it should be followed by 147 | %% a 'long' type data size 148 | %% 3. A serial of blocks end with a zero-count block 149 | %% 150 | %% in erlavro implementation, blocks are always encoded with negative count 151 | %% followed by size 152 | %% 153 | %% This block size permits fast skipping through data, 154 | %% e.g., when projecting a record to a subset of its fields. 155 | %% 156 | %% The blocked representation also permits one to read and write arrays/maps 157 | %% larger than can be buffered in memory, since one can start writing items 158 | %% without knowing the full length of the array/map. 159 | %% 160 | %% Although we are not trying to optimise memory usage 161 | %% (hard to do so with current erlavro typing mechanism 162 | %% because it requires everyting in memory already). 163 | %% This is however benifical when concatinating large lists which have chunks 164 | %% encoded in different processes etc. 165 | -spec block(index(), iodata()) -> iodata(). 166 | block(0, []) -> [0]; 167 | block(Count, Payload) when is_binary(Payload) -> 168 | Header = iolist_to_binary([long(-Count), long(size(Payload))]), 169 | [Header, Payload, 0]; 170 | block(Count, Payload) -> 171 | block(Count, iolist_to_binary(Payload)). 172 | 173 | -spec null() -> binary(). 174 | null() -> <<>>. 175 | 176 | -spec bool(boolean()) -> <<_:8>>. 177 | bool(false) -> <<0>>; 178 | bool(true) -> <<1>>. 179 | 180 | -spec int(integer()) -> iodata(). 181 | int(Int) -> 182 | ZzInt = zigzag(int, Int), 183 | varint(ZzInt). 184 | 185 | -spec long(integer()) -> iodata(). 186 | long(Long) -> 187 | ZzLong = zigzag(long, Long), 188 | varint(ZzLong). 189 | 190 | -compile({no_auto_import, [float/1]}). 191 | -spec float(float()) -> binary(). 192 | float(Float) when is_float(Float) -> 193 | <>. 194 | 195 | -spec double(float()) -> binary(). 196 | double(Double) when is_float(Double) -> 197 | <>. 198 | 199 | -spec bytes(binary()) -> iodata(). 200 | bytes(Data) when is_binary(Data) -> 201 | [long(byte_size(Data)), Data]. 202 | 203 | -spec string(atom() | iodata()) -> iodata(). 204 | string(Atom) when is_atom(Atom) -> 205 | string(atom_to_binary(Atom, utf8)); 206 | string(String) when is_list(String) -> 207 | %% NOTE: not unicode:chardata_to_binary(String) 208 | %% we do not want to deal with utf8 in erlavro 209 | string(iolist_to_binary(String)); 210 | string(String) when is_binary(String) -> 211 | [long(size(String)), String]. 212 | 213 | %% ZigZag encode/decode 214 | %% https://developers.google.com/protocol-buffers/docs/encoding?&csw=1#types 215 | -compile({inline, [zigzag/2]}). 216 | -spec zigzag(int | long, integer()) -> integer(). 217 | zigzag(int, Int) -> (Int bsl 1) bxor (Int bsr 31); 218 | zigzag(long, Int) -> (Int bsl 1) bxor (Int bsr 63). 219 | 220 | %% Variable-length format 221 | %% http://lucene.apache.org/core/3_5_0/fileformats.html#VInt 222 | -spec varint(integer()) -> iodata(). 223 | varint(I) when I =< 127 -> [I]; 224 | varint(I) -> [128 + (I band 127) | varint(I bsr 7)]. 225 | 226 | %%%_* Emacs ==================================================================== 227 | %%% Local Variables: 228 | %%% allout-layout: t 229 | %%% erlang-indent-level: 2 230 | %%% End: 231 | -------------------------------------------------------------------------------- /src/avro_decoder_hooks.erl: -------------------------------------------------------------------------------- 1 | %%%----------------------------------------------------------------------------- 2 | %%% 3 | %%% Copyright (c) 2016-2017 Klarna AB 4 | %%% 5 | %%% This file is provided to you under the Apache License, 6 | %%% Version 2.0 (the "License"); you may not use this file 7 | %%% except in compliance with the License. You may obtain 8 | %%% a copy of the License at 9 | %%% 10 | %%% http://www.apache.org/licenses/LICENSE-2.0 11 | %%% 12 | %%% Unless required by applicable law or agreed to in writing, 13 | %%% software distributed under the License is distributed on an 14 | %%% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | %%% KIND, either express or implied. See the License for the 16 | %%% specific language governing permissions and limitations 17 | %%% under the License. 18 | %%% 19 | %%%----------------------------------------------------------------------------- 20 | 21 | %% @doc This module is a collection of `eravro' supported decoder hooks 22 | %% 23 | %% Decoder hook is an anonymous function to be evaluated by 24 | %% the JSON or binary decoder to amend either schmea or data (input or output). 25 | %% 26 | %% For example: 27 | %% 28 | %% A hook can be used to fast-skip undesired data fields of records 29 | %% or undesired data of big maps etc. 30 | %% e.g. To dig out only the field named "MyField" in "MyRecord", the 31 | %% JSON decoder hook may probably look like: 32 | %% 33 | %%
 34 | %% fun(Type, SubNameOrIndex, Data, DecodeFun) ->
 35 | %%      case {avro:get_type_fullname(Type), SubNameOrIndex} of
 36 | %%        {"com.example.MyRecord", "MyField"} ->
 37 | %%          DecodeFun(Data);
 38 | %%        {"com.example.MyRecord", _OtherFields} ->
 39 | %%          ignored;
 40 | %%        _OtherType ->
 41 | %%          DecodeFun(Data)
 42 | %%      end
 43 | %% end.
 44 | %% 
45 | %% 46 | %% A hook can be used for debug. For example, below hook should print 47 | %% the decoding stack along the decode function traverses through the bytes. 48 | %% 49 | %%
 50 | %% fun(Type, SubNameOrIndex, Data, DecodeFun) ->
 51 | %%      SubInfo = case is_integer(SubNameOrIndex) of
 52 | %%                  true  -> integer_to_list(SubNameOrIndex);
 53 | %%                  false -> SubNameOrIndex
 54 | %%                end,
 55 | %%      io:format("~s.~s\n", [avro:get_type_name(Type), SubInfo]),
 56 | %%      DecodeFun(Data)
 57 | %% end
 58 | %% 
59 | %% 60 | %% A hook can also be used as a monkey patch to fix some corrupted data. 61 | %% @end 62 | 63 | -module(avro_decoder_hooks). 64 | 65 | -export([ tag_unions/0 66 | , pretty_print_hist/0 67 | , print_debug_trace/2 68 | ]). 69 | 70 | -include("erlavro.hrl"). 71 | 72 | -define(PD_PP_INDENTATION, '$avro_decoder_pp_indentation'). 73 | -define(PD_DECODER_HIST, '$avro_decoder_hist'). 74 | -define(REASON_TAG, '$hook-raised'). 75 | 76 | -type count() :: non_neg_integer(). 77 | -type trace_hist_entry() :: {push, _, _} | {pop, _} | pop. 78 | 79 | %% @doc By default, decoders do not tag union values. 80 | %% This hook function is to tag union values with union type names 81 | %% NOTE: null values are not tagged 82 | %% @end 83 | -spec tag_unions() -> avro:decoder_hook_fun(). 84 | tag_unions() -> fun tag_unions/4. 85 | 86 | %% @doc This hook is useful when a decoder has failed on decoding, 87 | %% try to decode it again with this hook to inspect the decode history 88 | %% and the avro type stack where the failure happened 89 | %% NOTE: Always call this API to retrieve the hook, never save the hook 90 | %% and re-use for different decode attempts 91 | %% @end. 92 | -spec print_debug_trace(fun((iodata()) -> ok), count()) -> 93 | avro:decoder_hook_fun(). 94 | print_debug_trace(PrintFun, MaxHistoryLength) -> 95 | ok = erase_hist(), 96 | fun(T, Sub, Data, DecodeFun) -> 97 | print_trace_on_failure(T, Sub, Data, DecodeFun, PrintFun, MaxHistoryLength) 98 | end. 99 | 100 | %% @doc This hook prints the type tree with indentation, and the leaf values 101 | %% to the current group leader. 102 | %% @end 103 | -spec pretty_print_hist() -> avro:decoder_hook_fun(). 104 | pretty_print_hist() -> 105 | _ = erase(?PD_PP_INDENTATION), 106 | fun(T, SubInfo, Data, DecodeFun) -> 107 | Name = avro:get_type_fullname(T), 108 | Indentation = 109 | case get(?PD_PP_INDENTATION) of 110 | undefined -> 0; 111 | Indentati -> Indentati 112 | end, 113 | IndentationStr = lists:duplicate(Indentation * 2, $\s), 114 | ToPrint = 115 | [ IndentationStr 116 | , Name 117 | , case SubInfo of 118 | "" -> ": "; 119 | I when is_integer(I) -> [$., integer_to_list(I), "\n"]; 120 | B when is_binary(B) -> [$., B, "\n"]; 121 | _ -> "\n" 122 | end 123 | ], 124 | io:put_chars(user, ToPrint), 125 | _ = put(?PD_PP_INDENTATION, Indentation + 1), 126 | DecodeResult = DecodeFun(Data), 127 | ResultToPrint = get_pretty_print_result(DecodeResult), 128 | _ = pretty_print_result(SubInfo, ResultToPrint, IndentationStr), 129 | _ = put(?PD_PP_INDENTATION, Indentation), 130 | DecodeResult 131 | end. 132 | 133 | %%%_* Internal functions ======================================================= 134 | 135 | %% @private 136 | tag_unions(#avro_union_type{} = T, SubInfo, DecodeIn, DecodeFun) -> 137 | Result = DecodeFun(DecodeIn), 138 | Name = get_union_member_name(T, SubInfo), 139 | case Result of 140 | {Value, Tail} when is_binary(Tail) -> 141 | %% used as binary decoder hook 142 | {maybe_tag(Name, Value), Tail}; 143 | Value -> 144 | %% used as JSON decoder hook 145 | maybe_tag(Name, Value) 146 | end; 147 | tag_unions(_T, _SubInfo, DecodeIn, DecodeFun) -> 148 | %% Not a union, pass through 149 | DecodeFun(DecodeIn). 150 | 151 | %% @private 152 | get_union_member_name(Type, Id) when is_integer(Id) -> 153 | %% when decoding avro binary, lookup member name by union member index. 154 | {ok, ChildType} = avro_union:lookup_type(Id, Type), 155 | case is_binary(ChildType) of 156 | true -> ChildType; 157 | false -> avro:get_type_fullname(ChildType) 158 | end; 159 | get_union_member_name(_Type, Name) when is_binary(Name) -> 160 | %% when decoding JSON, the value is already tagged with union member name 161 | Name. 162 | 163 | %% @private Never tag primitives and unnamed complex types. 164 | maybe_tag(N, Value) when ?IS_AVRO_PRIMITIVE_NAME(N) -> Value; 165 | maybe_tag(?AVRO_ARRAY, Value) -> Value; 166 | maybe_tag(?AVRO_MAP, Value) -> Value; 167 | maybe_tag(Name, Value) -> {Name, Value}. 168 | 169 | %% @private 170 | print_trace_on_failure(T, Sub, Data, DecodeFun, PrintFun, HistCount) -> 171 | Name = avro:get_type_fullname(T), 172 | ok = add_hist({push, Name, Sub}), 173 | try 174 | decode_and_add_trace(Sub, Data, DecodeFun) 175 | catch 176 | C : R : Stacktrace 177 | when not (is_tuple(R) andalso element(1, R) =:= ?REASON_TAG) -> 178 | %% catch only the very first error 179 | ok = print_trace(PrintFun, HistCount), 180 | ok = erase_hist(), 181 | erlang:raise(C, {?REASON_TAG, R}, Stacktrace) 182 | end. 183 | 184 | %% @private 185 | decode_and_add_trace(Sub, Data, DecodeFun) -> 186 | Result = DecodeFun(Data), 187 | Value = 188 | case Result of 189 | {V, Tail} when is_binary(Tail) -> 190 | %% binary decoder 191 | V; 192 | _ -> 193 | %% JSON decoder 194 | Result 195 | end, 196 | case Sub =:= [] orelse Value =:= [] of 197 | true -> add_hist({pop, Value}); %% add stack hist with decoded value 198 | false -> add_hist(pop) 199 | end, 200 | Result. 201 | 202 | %% @private 203 | -spec erase_hist() -> ok. 204 | erase_hist() -> 205 | _ = erlang:erase(?PD_DECODER_HIST), 206 | ok. 207 | 208 | %% @private 209 | -spec get_hist() -> [trace_hist_entry()]. 210 | get_hist() -> 211 | case erlang:get(?PD_DECODER_HIST) of 212 | undefined -> []; 213 | S -> S 214 | end. 215 | 216 | %% @private Use process dictionary to keep the decoder stack trace. 217 | -spec add_hist(trace_hist_entry()) -> ok. 218 | add_hist(NewOp) -> 219 | erlang:put(?PD_DECODER_HIST, [NewOp | get_hist()]), 220 | ok. 221 | 222 | %% @private Print decoder trace (stack and history) using the given function. 223 | print_trace(PrintFun, HistCount) -> 224 | Hist = lists:reverse(get_hist()), 225 | {Stack, History} = format_trace(Hist, _Stack = [], _History = [], HistCount), 226 | PrintFun(["avro type stack:\n", Stack, "\n", 227 | "decode history:\n", History]). 228 | 229 | %% @private Format the trace hisotry into printable format. 230 | %% Return the type stack and last N decode history entries as iodata(). 231 | %% @end 232 | -spec format_trace(TraceHist :: [trace_hist_entry()], 233 | TypeStack :: [{avro:name(), atom() | string() | integer()}], 234 | FormattedTrace :: iodata(), 235 | MaxHistEntryCount :: count()) -> {iodata(), iodata()}. 236 | format_trace([], Stack, Hist, _HistCount) -> 237 | {io_lib:format("~p", [lists:reverse(Stack)]), lists:reverse(Hist)}; 238 | format_trace([{push, Name, Sub} | Rest], Stack, Hist, HistCount) -> 239 | Padding = lists:duplicate(length(Stack) * 2, $\s), 240 | Line = bin([Padding, Name, 241 | case Sub of 242 | [] -> ""; 243 | none -> ""; 244 | I when is_integer(I) -> [".", integer_to_list(I)]; 245 | S when is_binary(S) -> [".", S] 246 | end, "\n"]), 247 | NewHist = lists:sublist([Line | Hist], HistCount), 248 | format_trace(Rest, [{Name, Sub} | Stack], NewHist, HistCount); 249 | format_trace([{pop, V} | Rest], Stack, Hist, HistCount) -> 250 | Padding = lists:duplicate(length(Stack) * 2, $\s), 251 | Line = bin([Padding, io_lib:format("~100000p", [V]), "\n"]), 252 | NewHist = lists:sublist([Line | Hist], HistCount), 253 | format_trace(Rest, tl(Stack), NewHist, HistCount); 254 | format_trace([pop | Rest], Stack, Hist, HistCount) -> 255 | format_trace(Rest, tl(Stack), Hist, HistCount). 256 | 257 | %% @private 258 | bin(IoData) -> iolist_to_binary(IoData). 259 | 260 | %% @private 261 | get_pretty_print_result(JsonResult) when ?IS_AVRO_VALUE(JsonResult) -> 262 | %% JSON value passed to hooks is always wrapped 263 | ?AVRO_VALUE_DATA(JsonResult); 264 | get_pretty_print_result({Result, Tail}) when is_binary(Tail) -> 265 | %% binary decode result 266 | Result. 267 | 268 | %% @private 269 | pretty_print_result(_Sub = [], Result, _IndentationStr) -> 270 | %% print the value if it's a leaf in the type tree 271 | io:put_chars(user, [io_lib:print(Result)]); 272 | pretty_print_result(_Sub, _Result, _IndentationStr) -> 273 | ok. 274 | 275 | %%%_* Emacs ==================================================================== 276 | %%% Local Variables: 277 | %%% allout-layout: t 278 | %%% erlang-indent-level: 2 279 | %%% End: 280 | -------------------------------------------------------------------------------- /src/avro_binary_decoder.erl: -------------------------------------------------------------------------------- 1 | %% coding: latin-1 2 | %%%----------------------------------------------------------------------------- 3 | %%% 4 | %%% Copyright (c) 2016-2018 Klarna AB 5 | %%% 6 | %%% This file is provided to you under the Apache License, 7 | %%% Version 2.0 (the "License"); you may not use this file 8 | %%% except in compliance with the License. You may obtain 9 | %%% a copy of the License at 10 | %%% 11 | %%% http://www.apache.org/licenses/LICENSE-2.0 12 | %%% 13 | %%% Unless required by applicable law or agreed to in writing, 14 | %%% software distributed under the License is distributed on an 15 | %%% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 16 | %%% KIND, either express or implied. See the License for the 17 | %%% specific language governing permissions and limitations 18 | %%% under the License. 19 | %%% 20 | %%% @doc 21 | %%% Decode Avro values from binary format according to Avro 1.7.5 22 | %%% specification. 23 | %%% 24 | %%% Schema is written following parsing canonical form recommendations 25 | %%% but keeps all information (attributes are kept even if they are 26 | %%% not relevant for parsing). 27 | %%% @end 28 | %%%----------------------------------------------------------------------------- 29 | 30 | -module(avro_binary_decoder). 31 | 32 | -export([ decode/3 33 | , decode/4 34 | , decode_stream/3 35 | , decode_stream/4 36 | ]). 37 | 38 | %% Exported for test 39 | -export([zigzag/1]). 40 | 41 | -include("avro_internal.hrl"). 42 | 43 | -type hook() :: decoder_hook_fun(). 44 | -type index() :: pos_integer(). 45 | -type block_item_decode_fun() :: 46 | fun((index(), binary()) -> {avro:out(), binary()}). 47 | 48 | %%%_* APIs ===================================================================== 49 | 50 | %% @doc decode/4 equivalent with default hook fun. 51 | -spec decode(iodata(), type_or_name(), 52 | schema_store() | lkup_fun()) -> avro:out(). 53 | decode(IoData, Type, StoreOrLkupFun) -> 54 | decode(IoData, Type, StoreOrLkupFun, avro:make_decoder_options([])). 55 | 56 | 57 | %% @doc Decode bytes into unwrapped avro value, assuming the input bytes 58 | %% matches the given schema without tailing bytes. 59 | %% @end 60 | -spec decode(iodata(), type_or_name(), 61 | schema_store() | lkup_fun(), 62 | decoder_options()) -> avro:out(). 63 | decode(IoData, Type, StoreOrLkupFun, Options) -> 64 | %% return decoded value as raw erlang term directly 65 | Lkup = avro_util:ensure_lkup_fun(StoreOrLkupFun), 66 | {Value, <<>>} = do_decode(IoData, Type, Lkup, Options), 67 | Value. 68 | 69 | %% @doc decode_stream/4 equivalent with default hook fun. 70 | -spec decode_stream(iodata(), type_or_name(), 71 | schema_store() | lkup_fun()) -> 72 | {avro:out(), binary()}. 73 | decode_stream(IoData, Type, StoreOrLkupFun) -> 74 | decode_stream(IoData, Type, StoreOrLkupFun, ?DEFAULT_DECODER_HOOK). 75 | 76 | %% @doc Decode the header of a byte stream, return unwrapped value and tail 77 | %% bytes in a tuple. 78 | %% @end 79 | -spec decode_stream(iodata(), type_or_name(), 80 | schema_store() | lkup_fun(), hook() | decoder_options()) -> 81 | {avro:out(), binary()}. 82 | decode_stream(IoData, Type, StoreOrLkupFun, Hook) when is_function(Hook) -> 83 | decode_stream(IoData, Type, StoreOrLkupFun, 84 | avro:make_decoder_options([{hook, Hook}])); 85 | decode_stream(IoData, Type, StoreOrLkupFun, Options) when is_map(Options) -> 86 | do_decode(IoData, Type, avro_util:ensure_lkup_fun(StoreOrLkupFun), Options). 87 | 88 | %%%_* Internal functions ======================================================= 89 | 90 | %% @private 91 | -spec do_decode(iodata(), type_or_name(), lkup_fun(), 92 | decoder_options()) -> {avro:out(), binary()}. 93 | do_decode(IoData, Type, Lkup, Options) when is_list(IoData) -> 94 | do_decode(iolist_to_binary(IoData), Type, Lkup, Options); 95 | do_decode(Bin, TypeName, Lkup, Options) when ?IS_NAME_RAW(TypeName) -> 96 | do_decode(Bin, Lkup(?NAME(TypeName)), Lkup, Options); 97 | do_decode(Bin, Type, Lkup, 98 | #{hook := Hook} = Options) when is_function(Hook, 4) -> 99 | dec(Bin, Type, Lkup, Options). 100 | 101 | %% @private 102 | -spec dec(binary(), avro_type(), lkup_fun(), 103 | decoder_options()) -> {avro:out(), binary()}. 104 | dec(Bin, T, _Lkup, #{hook := Hook}) when ?IS_PRIMITIVE_TYPE(T) -> 105 | Hook(T, "", Bin, fun(B) -> prim(B, T#avro_primitive_type.name) end); 106 | dec(Bin, T, Lkup, #{hook := Hook} = Options) when ?IS_RECORD_TYPE(T) -> 107 | Hook(T, none, Bin, fun(B) -> dec_record(B, T, Lkup, Options) end); 108 | dec(Bin, T, _Lkup, #{hook := Hook}) when ?IS_ENUM_TYPE(T) -> 109 | {Index, Tail} = int(Bin), 110 | Hook(T, Index, Tail, 111 | fun(B) -> 112 | Symbol = avro_enum:get_symbol_from_index(T, Index), 113 | {Symbol, B} 114 | end); 115 | dec(Bin, T, Lkup, Options) when ?IS_ARRAY_TYPE(T) -> 116 | ItemsType = avro_array:get_items_type(T), 117 | ItemDecodeFun = 118 | fun(Index, BinIn) -> 119 | dec_item(T, Index, ItemsType, BinIn, Lkup, Options) 120 | end, 121 | blocks(Bin, ItemDecodeFun); 122 | dec(Bin, T, Lkup, 123 | #{map_type := MapType} = Options) when ?IS_MAP_TYPE(T) -> 124 | ItemsType = avro_map:get_items_type(T), 125 | ItemDecodeFun = 126 | fun(_Index, BinIn) -> 127 | {Key, Tail1} = prim(BinIn, ?AVRO_STRING), 128 | {Value, Tail} = dec_item(T, Key, ItemsType, Tail1, Lkup, Options), 129 | {{Key, Value}, Tail} 130 | end, 131 | {KVs, Tail} = blocks(Bin, ItemDecodeFun), 132 | case MapType of 133 | proplist -> {KVs, Tail}; 134 | map -> {maps:from_list(KVs), Tail} 135 | end; 136 | dec(Bin, T, Lkup, Options) when ?IS_UNION_TYPE(T) -> 137 | {Index, Tail} = long(Bin), 138 | {ok, MemberType} = avro_union:lookup_type(Index, T), 139 | dec_item(T, Index, MemberType, Tail, Lkup, Options); 140 | dec(Bin, T, _Lkup, #{hook := Hook}) when ?IS_FIXED_TYPE(T) -> 141 | Hook(T, "", Bin, 142 | fun(B) -> 143 | Size = avro_fixed:get_size(T), 144 | <> = B, 145 | {Value, Tail} 146 | end). 147 | 148 | %% @private 149 | -spec dec_record(binary(), record_type(), lkup_fun(), 150 | decoder_options()) -> {avro:out(), binary()}. 151 | dec_record(Bin, T, Lkup, #{record_type := RecordType} = Options) -> 152 | FieldTypes = avro_record:get_all_field_types(T), 153 | {FieldValuesReversed, Tail} = 154 | lists:foldl( 155 | fun({FieldName, FieldType}, {Values, BinIn}) -> 156 | {Value, BinOut} = dec_item(T, FieldName, FieldType, 157 | BinIn, Lkup, Options), 158 | {[{FieldName, Value} | Values], BinOut} 159 | end, {[], Bin}, FieldTypes), 160 | FieldValues1 = case RecordType of 161 | proplist -> lists:reverse(FieldValuesReversed); 162 | map -> maps:from_list(FieldValuesReversed) 163 | end, 164 | {FieldValues1, Tail}. 165 | 166 | %% @private Common decode logic for map/array items, union members, 167 | %% and record fields. 168 | %% @end 169 | -spec dec_item(avro_type(), name() | non_neg_integer(), type_or_name(), 170 | binary(), lkup_fun(), decoder_options()) -> 171 | {avro:out(), binary()}. 172 | dec_item(ParentType, ItemId, ItemsType, Input, Lkup, 173 | #{hook := Hook} = Options) -> 174 | Hook(ParentType, ItemId, Input, 175 | fun(B) -> do_decode(B, ItemsType, Lkup, Options) end). 176 | 177 | %% @private Decode primitive values. 178 | %% NOTE: keep all binary decoding exceptions to error:{badmatch, _} 179 | %% to simplify higher level try catches when detecting error 180 | %% @end 181 | -spec prim(binary(), _PrimitiveName :: name()) -> {avro:out(), binary()}. 182 | prim(Bin, ?AVRO_NULL) -> 183 | {null, Bin}; 184 | prim(Bin, ?AVRO_BOOLEAN) -> 185 | <> = Bin, 186 | {Bool =:= 1, Rest}; 187 | prim(Bin, ?AVRO_INT) -> 188 | int(Bin); 189 | prim(Bin, ?AVRO_LONG) -> 190 | long(Bin); 191 | prim(Bin, ?AVRO_FLOAT) -> 192 | <> = Bin, 193 | {Float, Rest}; 194 | prim(Bin, ?AVRO_DOUBLE) -> 195 | <> = Bin, 196 | {Float, Rest}; 197 | prim(Bin, ?AVRO_BYTES) -> 198 | bytes(Bin); 199 | prim(Bin, ?AVRO_STRING) -> 200 | bytes(Bin). 201 | 202 | %% @private 203 | -spec bytes(binary()) -> {binary(), binary()}. 204 | bytes(Bin) -> 205 | {Size, Rest} = long(Bin), 206 | <> = Rest, 207 | {Bytes, Tail}. 208 | 209 | %% @private 210 | -spec blocks(binary(), block_item_decode_fun()) -> {[avro:out()], binary()}. 211 | blocks(Bin, ItemDecodeFun) -> 212 | blocks(Bin, ItemDecodeFun, _Index = 1, _Acc = []). 213 | 214 | %% @private 215 | -spec blocks(binary(), block_item_decode_fun(), index(), [avro:out()]) -> 216 | {[avro:out()], binary()}. 217 | blocks(Bin, ItemDecodeFun, Index, Acc) -> 218 | {Count0, Rest} = long(Bin), 219 | case Count0 =:= 0 of 220 | true -> 221 | %% a serial of blocks ends with 0 222 | {lists:reverse(Acc), Rest}; 223 | false -> 224 | {Count, Tail0} = 225 | case Count0 < 0 of 226 | true -> 227 | %% block start with negative count number 228 | %% is followed by the block size in bytes 229 | %% here we simply discard the size info 230 | {_Size, Rest1} = long(Rest), 231 | {-Count0, Rest1}; 232 | false -> 233 | {Count0, Rest} 234 | end, 235 | block(Tail0, ItemDecodeFun, Index, Acc, Count) 236 | end. 237 | 238 | %% @private 239 | -spec block(binary(), block_item_decode_fun(), 240 | index(), [avro:out()], non_neg_integer()) -> 241 | {[avro:out()], binary()}. 242 | block(Bin, ItemDecodeFun, Index, Acc, 0) -> 243 | blocks(Bin, ItemDecodeFun, Index, Acc); 244 | block(Bin, ItemDecodeFun, Index, Acc, Count) -> 245 | {Item, Tail} = ItemDecodeFun(Index, Bin), 246 | block(Tail, ItemDecodeFun, Index + 1, [Item | Acc], Count-1). 247 | 248 | %% @private 249 | -spec int(binary()) -> {integer(), binary()}. 250 | int(Bin) -> zigzag(varint(Bin, 0, 0, 32)). 251 | 252 | %% @private 253 | -spec long(binary()) -> {integer(), binary()}. 254 | long(Bin) -> zigzag(varint(Bin, 0, 0, 64)). 255 | 256 | %% @private 257 | -spec zigzag({integer(), binary()} | integer()) -> 258 | {integer(), binary()} | integer(). 259 | zigzag({Int, TailBin}) -> {zigzag(Int), TailBin}; 260 | zigzag(Int) -> (Int bsr 1) bxor -(Int band 1). 261 | 262 | %% @private 263 | -spec varint(binary(), integer(), integer(), integer()) -> 264 | {integer(), binary()}. 265 | varint(Bin, Acc, AccBits, MaxBits) -> 266 | <> = Bin, 267 | true = (AccBits < MaxBits), %% assert 268 | NewAcc = (Value bsl AccBits) bor Acc, 269 | case Tag =:= 0 of 270 | true -> {NewAcc, Tail}; 271 | false -> varint(Tail, NewAcc, AccBits + 7, MaxBits) 272 | end. 273 | 274 | %%%_* Emacs ==================================================================== 275 | %%% Local Variables: 276 | %%% allout-layout: t 277 | %%% erlang-indent-level: 2 278 | %%% End: 279 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | Avro support for Erlang/Elixir (http://avro.apache.org/). 2 | 3 | Current version implements Apache Avro 1.8.1 specification. 4 | 5 | License: Apache License 2.0 6 | 7 | [![Build Status](https://travis-ci.org/klarna/erlavro.svg?branch=master)](https://travis-ci.org/klarna/erlavro) [![Coverage Status](https://coveralls.io/repos/github/klarna/erlavro/badge.svg?branch=master)](https://coveralls.io/github/klarna/erlavro?branch=master) 8 | 9 | # Avro Type and Erlang Spec Mapping 10 | 11 | ``` 12 | name_raw() :: atom() | string() | binary(). 13 | name() :: binary(). 14 | key_raw() :: atom() | sting() | binary(). 15 | key() :: binary(). 16 | tag() :: binary(). 17 | ``` 18 | 19 | | Avro | Encoder Input | Decoder Output | Notes | 20 | | ------- | ------------------------------- | ---------------------------- | --------------------------------------------- | 21 | | null | `null` | `null` | No implicit `undefined` transformation | 22 | | boolean | `boolean()` | `boolean()` | | 23 | | int | `integer()` | `integer()` | `-2147483648..2147483647` | 24 | | long | `integer()` | `integer()` | `-9223372036854775808..9223372036854775807` | 25 | | float | `integer() \| float()` | `float()` | | 26 | | double | `integer() \| float()` | `float()` | | 27 | | bytes | `binary()` | `binary()` | | 28 | | string | `iolist()` | `binary()` | | 29 | | enum | `name_raw()` | `name()` | | 30 | | fixed | `binary()` | `binary()` | | 31 | | array | `[in()]` | `[out()]` | | 32 | | map | `[{key_raw(), in()}] \| map()` | `[{key(), out()}] \| map()` | Decoder output depends on map_type option | 33 | | record | `[{name_raw(), in()}] \| map()` | `[{name(), out()}] \| map()` | Decoder output depends on record_type option | 34 | | union | `in() \| {tag(), in()}` | `out() \| {tag(), out()}` | See notes about unions below | 35 | 36 | Where `in()` and `out()` refer to the input and output type specs recursively. 37 | 38 | ## Important Notes about Unicode Strings 39 | 40 | The binary encoder/decoder will respect whatever is given in the input (bytes). 41 | i.e. The encoder will NOT try to be smart and encode the input `string()` to utf8 (or whatsoever), 42 | and the decoder will not try to validate or decode the input `binary()` as unicode character list. 43 | 44 | The encode caller should make sure the input is of spec `[byte()] | binary()`, 45 | NOT a unicode character list which may possibly contain some code points greater than 255. 46 | 47 | # Examples 48 | 49 | ## Load Avro Schema file(s) (demonstrating in Erlang shell) 50 | 51 | See `test/data/interop.avsc` for avro schema definition. 52 | 53 | ```erlang 54 | 1> {ok, SchemaJSON} = file:read_file("test/data/interop.avsc"). 55 | 2> Term = hd(element(3, avro_ocf:decode_file("test/data/interop.ocf"))). 56 | [{"intField",12}, 57 | {"longField",15234324}, 58 | {"stringField","hey"}, 59 | {"boolField",true}, 60 | {"floatField",1234.0}, 61 | {"doubleField",-1234.0}, 62 | {"bytesField",<<"12312adf">>}, 63 | {"nullField",null}, 64 | {"arrayField",[5.0,0.0,12.0]}, 65 | {"mapField", 66 | [{"a",[{"label","a"}]},{"bee",[{"label","cee"}]}]}, 67 | {"unionField",12.0}, 68 | {"enumField","C"}, 69 | {"fixedField",<<"1019181716151413">>}, 70 | {"recordField", 71 | [{"label","blah"}, 72 | {"children",[[{"label","inner"},{"children",[]}]]}]}] 73 | 3> Encoder = avro:make_simple_encoder(SchemaJSON, []). 74 | 4> Decoder = avro:make_simple_decoder(SchemaJSON, []). 75 | 5> Encoded = iolist_to_binary(Encoder(Term)). 76 | 6> Term =:= Decoder(Encoded). 77 | true 78 | ``` 79 | 80 | ## Define avro schema using erlavro APIs 81 | 82 | ### Avro Binary Encode/Decode 83 | 84 | ```erlang 85 | MyRecordType = 86 | avro_record:type( 87 | <<"MyRecord">>, 88 | [avro_record:define_field(f1, int), 89 | avro_record:define_field(f2, string)], 90 | [{namespace, 'com.example'}]), 91 | Encoder = avro:make_simple_encoder(MyRecordType, []), 92 | Decoder = avro:make_simple_decoder(MyRecordType, []), 93 | Term = [{<<"f1">>, 1}, {<<"f2">>, <<"my string">>}], 94 | Bin = Encoder(Term), 95 | [{<<"f1">>, 1}, {<<"f2">>, <<"my string">>}] = Decoder(Bin), 96 | ok. 97 | ``` 98 | 99 | ### Avro JSON Encode/Decode 100 | 101 | ```erlang 102 | MyRecordType = 103 | avro_record:type( 104 | "MyRecord", 105 | [avro_record:define_field("f1", int), 106 | avro_record:define_field("f2", string)], 107 | [{namespace, "com.example"}]), 108 | Encoder = avro:make_simple_encoder(MyRecordType, [{encoding, avro_json}]), 109 | Decoder = avro:make_simple_decoder(MyRecordType, [{encoding, avro_json}]), 110 | Term = [{<<"f1">>, 1}, {<<"f2">>, <<"my string">>}], 111 | JSON = Encoder(Term), 112 | Term = Decoder(JSON), 113 | io:put_chars(user, JSON), 114 | ok. 115 | ``` 116 | 117 | JSON to expect: 118 | 119 | ```json 120 | {"f1":1,"f2":"my string"} 121 | ``` 122 | 123 | ### Encoded Value as a Part of Parent Object 124 | 125 | ```erlang 126 | CodecOptions = [], %% [{encoding, avro_json}] for JSON encode/decode 127 | NullableInt = avro_union:type([null, int]), 128 | MyRecordType1 = 129 | avro_record:type( 130 | "MyRecord1", 131 | [avro_record:define_field("f1", NullableInt), 132 | avro_record:define_field("f2", string)], 133 | [{namespace, "com.example"}]), 134 | MyRecordType2 = 135 | avro_record:type( 136 | "MyRecord2", 137 | [avro_record:define_field("f1", string), 138 | avro_record:define_field("f2", NullableInt)], 139 | [{namespace, "com.example"}]), 140 | MyUnion = avro_union:type([MyRecordType1, MyRecordType2]), 141 | MyArray = avro_array:type(MyUnion), 142 | Lkup = fun(_) -> erlang:error("not expecting type lookup because " 143 | "all types are fully constructed. " 144 | "i.e. no name references") end, 145 | %% Encode Records with type info wrapped 146 | %% so they can be used as a drop-in part of wrapper object 147 | WrappedEncoder = avro:make_encoder(Lkup, [wrapped | CodecOptions]), 148 | T1 = [{"f1", null}, {"f2", <<"str1">>}], 149 | T2 = [{"f1", <<"str2">>}, {"f2", 2}], 150 | %% Encode the records with type info wrapped 151 | R1 = WrappedEncoder(MyRecordType1, T1), 152 | R2 = WrappedEncoder(MyRecordType2, T2), 153 | %% Tag the union values for better encoding performance 154 | U1 = {"com.example.MyRecord1", R1}, 155 | U2 = {"com.example.MyRecord2", R2}, 156 | %% This encoder returns iodata result without type info wrapped 157 | BinaryEncoder = avro:make_encoder(Lkup, CodecOptions), 158 | %% Construct the array from encoded elements 159 | Bin = iolist_to_binary(BinaryEncoder(MyArray, [U1, U2])), 160 | %% Tag the decoded values 161 | Hook = avro_decoder_hooks:tag_unions(), 162 | Decoder = avro:make_decoder(Lkup, [{hook, Hook} | CodecOptions]), 163 | [ {<<"com.example.MyRecord1">>, [{<<"f1">>, null}, {<<"f2">>, <<"str1">>}]} 164 | , {<<"com.example.MyRecord2">>, [{<<"f1">>, <<"str2">>}, {<<"f2">>, 2}]} 165 | ] = Decoder(MyArray, Bin), 166 | ok. 167 | ``` 168 | 169 | # Decoder Hooks 170 | 171 | Decoder hook is an anonymous function to be evaluated by the JSON or binary decoder to amend data before and/or after decoding. 172 | Some hook use cases for example: 173 | 174 | * Tag union value with type name. e.g. `avro_decoder_hooks:tag_unions/0`. 175 | * Apply `string_to_atom/1` on record field names or map keys. 176 | * Debugging. e.g. `avro_decoder_hooks:print_debug_trace/2` gives you a hook which can print decode history and stack upon failure. 177 | * For JSON decoder, fast-skip undesired data fields in records or keys in maps. 178 | * Monkey patching corrupted data. 179 | 180 | The default decoder hook does nothing but just passing through the decode call: 181 | 182 | ``` 183 | fun(__Type__, __SubNameOrId__, Data, DecodeFun) -> 184 | DecodeFun(Data) 185 | end 186 | ``` 187 | 188 | This is a typical way to implement a hook which actually does something 189 | 190 | ``` 191 | fun(Type, SubNameOrIndex, Data0, DecodeFun) -> 192 | Data = amend_data(Data0), 193 | Result = DecodeFun(Data), 194 | amend_result(Result) 195 | end 196 | ``` 197 | You can of course also splice-up two hooks by one wrapping around the other: 198 | 199 | ``` 200 | Hook1 = fun(T, S, D, F) -> ... end, 201 | fun(Type, SubNameOrIndex, Data0, DecodeFun) -> 202 | Data = amend_data(Data0), 203 | Result = Hook1(Type, SubNameOrIndex, Data, DecodeFun), 204 | amend_result(Result) 205 | end 206 | ``` 207 | 208 | Please find more details and a few examples in `avro_decoder_hooks.erl` 209 | 210 | # Important Notes About Unions 211 | 212 | ### Union Values Should be Tagged with Type Name for Better Encoding Performance 213 | 214 | For a big union like below 215 | 216 | ``` 217 | [ 218 | "com.exmpale.MyRecord1", 219 | "com.example.MyRecord2", 220 | ... and many more ... 221 | ] 222 | ``` 223 | There are two ways to encode such unions 224 | 225 | * Untagged: `Encoder(UnionType, MyRecord)` where `MyRecord` is of spec `[{field_name(), field_value()}]` 226 | * Tagged: `Encoder(UnionType, MyRecord)` where `MyRecord` is of spec `{"com.example.MyRecordX", [{field_name(), field_value()}]}` 227 | 228 | For `Untagged`, the encoder will have to TRY to encode using the union member types one after another until success. 229 | This is completely fine for small unions (e.g. a union of `null` and `long`), however quite expensive (and sometimes can be problematic) for records. 230 | Therefore we are recommending the `Tagged` way, because it'll help the encoder to find the member quickly. 231 | 232 | ### Caution when unioning string type and int/long arrays. 233 | 234 | As `[integer()]` list is `string()` in Erlang, this will confuse the encoder. 235 | Please make sure to use `binary()` as avro string encoding input or tag it, 236 | and always tag int/long array value like `{array, [1, 2, 3]}`. 237 | 238 | ### Union Values Are Decoded Without Tags by Default 239 | 240 | A bit contradicting to the recommended union encoding, the decoded values are NOT tagged by DEFAULT. 241 | Because we believe the use case of tagged unions in decoder output is not as common. 242 | You may use the decoder hook `avro_decoer_hooks:tag_unions/0` to have the decoded values tagged. 243 | NOTE: only named complex types are tagged by this hook, you can of course write your own hook for a different tagging behaviour. 244 | 245 | # Object container file encoding/decoding 246 | 247 | See `avro_ocf.erl` for details 248 | 249 | # Logical types and custom type properties. 250 | 251 | NOTE: There is no logical type or custom type properties based on avro 'union' type. 252 | 253 | `erlavro` encodes/decodes logical types as well as custom type properties, 254 | but (so far) does not validate or transform the encoder/encoder input/output. 255 | 256 | e.g. The underlying data type of 'Date' logical type is 'int', in a perfect world, 257 | the encoder should accept `{Y, M, D}` as input and the decoder should transform the integer 258 | back to `{Y, M, D}` --- but this is not supported so far. 259 | 260 | Call `avro:get_custom_props/2` to access logical type info (as well as any extra customized type properties) 261 | for extra data validation/transformation at application level. 262 | 263 | -------------------------------------------------------------------------------- /src/avro_union.erl: -------------------------------------------------------------------------------- 1 | %%%----------------------------------------------------------------------------- 2 | %%% 3 | %%% Copyright (c) 2013-2018 Klarna AB 4 | %%% 5 | %%% This file is provided to you under the Apache License, 6 | %%% Version 2.0 (the "License"); you may not use this file 7 | %%% except in compliance with the License. You may obtain 8 | %%% a copy of the License at 9 | %%% 10 | %%% http://www.apache.org/licenses/LICENSE-2.0 11 | %%% 12 | %%% Unless required by applicable law or agreed to in writing, 13 | %%% software distributed under the License is distributed on an 14 | %%% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | %%% KIND, either express or implied. See the License for the 16 | %%% specific language governing permissions and limitations 17 | %%% under the License. 18 | %%% 19 | %%% @author Ilya Staheev 20 | %%% @doc Implements unions support for Avro. 21 | %%% 22 | %%% Unions may not contain more than one schema with the same type, except 23 | %%% for the named types record, fixed and enum. For example, unions containing 24 | %%% two array types or two map types are not permitted, but two types with 25 | %%% different names are permitted. (Names permit efficient resolution when 26 | %%% reading and writing unions.) 27 | %%% 28 | %%% Unions may not immediately contain other unions. 29 | %%% @end 30 | %%%----------------------------------------------------------------------------- 31 | 32 | -module(avro_union). 33 | 34 | %% API 35 | -export([ cast/2 36 | , encode/3 37 | , get_child_type_index/1 38 | , get_types/1 39 | , get_value/1 40 | , lookup_type/2 41 | , new/2 42 | , resolve_fullname/2 43 | , to_term/1 44 | , type/1 45 | , update_member_types/2 46 | ]). 47 | 48 | %% API functions which should be used only inside erlavro 49 | -export([new_direct/2]). 50 | 51 | -export_type([ id2type/0 52 | , name2id/0 53 | ]). 54 | 55 | -include("avro_internal.hrl"). 56 | 57 | -type id2type() :: #{union_index() => type_or_name()}. 58 | -type name2id() :: #{name() => {union_index(), boolean()}}. 59 | -type encode_result() :: avro_binary() | avro_json(). 60 | -type encode_fun() :: fun((avro_type(), avro:in(), 61 | union_index()) -> encode_result()). 62 | 63 | %%%_* APIs ===================================================================== 64 | 65 | %% @doc Define a union type. 66 | %% Exception when any of the below constraints is violated: 67 | %% 1. Union should no have union as direct member 68 | %% 2. No duplicated types are allowed in members 69 | %% @end 70 | -spec type([type_or_name()]) -> union_type() | no_return(). 71 | type([]) -> 72 | #avro_union_type 73 | { id2type = #{} 74 | , name2id = #{} 75 | }; 76 | type([_ | _ ] = Types0) -> 77 | IsUnion = fun(T) -> ?IS_UNION_TYPE(T) end, 78 | lists:any(IsUnion, Types0) andalso 79 | erlang:error(<<"union should not have union as member">>), 80 | Types = lists:map(fun avro_util:canonicalize_type_or_name/1, Types0), 81 | Count = length(Types), 82 | IndexedTypes = lists:zip(lists:seq(0, Count - 1), Types), 83 | Name2Id = build_name_to_id(IndexedTypes), 84 | ok = assert_no_duplicated_names(Name2Id, []), 85 | #avro_union_type 86 | { id2type = maps:from_list(IndexedTypes) 87 | , name2id = maps:from_list(Name2Id) 88 | }. 89 | 90 | %% @doc Resolve fullname by newly discovered enclosing namespace. 91 | -spec resolve_fullname(union_type(), namespace()) -> union_type(). 92 | resolve_fullname(Union, Ns) -> 93 | F = fun(T) -> avro:resolve_fullname(T, Ns) end, 94 | update_member_types(Union, F). 95 | 96 | %% @doc Update member types by evaluating callback function. 97 | -spec update_member_types(union_type(), 98 | fun((type_or_name()) -> type_or_name())) -> 99 | union_type(). 100 | update_member_types(T0, F) -> 101 | Types = get_types(T0), 102 | UpdatedTypes = lists:map(F, Types), 103 | type(UpdatedTypes). 104 | 105 | %% @doc Get the union member types in a list. 106 | -spec get_types(union_type()) -> [avro_type()]. 107 | get_types(#avro_union_type{id2type = IndexedTypes}) -> 108 | {_Ids, Types} = lists:unzip(lists:keysort(1, maps:to_list(IndexedTypes))), 109 | Types. 110 | 111 | %% @doc Search for a union member by its index or full name. 112 | -spec lookup_type(name_raw() | union_index(), union_type()) -> 113 | {ok, avro_type() | name()} | false. 114 | lookup_type(Id, #avro_union_type{id2type = Types}) when is_integer(Id) -> 115 | maps:is_key(Id, Types) andalso {ok, maps:get(Id, Types)}; 116 | lookup_type(Name, Union) when ?IS_NAME(Name) -> 117 | case lookup_index(Name, Union) of 118 | {ok, {_Id, true}} -> {ok, avro:name2type(Name)}; 119 | {ok, {Id, false}} -> {ok, _} = lookup_type(Id, Union); 120 | false -> false 121 | end; 122 | lookup_type(Name, Union) when ?IS_NAME_RAW(Name) -> 123 | lookup_type(?NAME(Name), Union). 124 | 125 | %% @doc Get member type index. 126 | -spec get_child_type_index(avro_value()) -> union_index(). 127 | get_child_type_index(Union) when ?IS_UNION_VALUE(Union) -> 128 | UnionType = ?AVRO_VALUE_TYPE(Union), 129 | TypeName = get_child_type_name(Union), 130 | {ok, {TypeId, _IsSelfRef}} = lookup_index(TypeName, UnionType), 131 | TypeId. 132 | 133 | %% @doc Get typeed member's full type name. 134 | %% This is used to encode wrapped (boxed) value to JSON format, 135 | %% where member type full name instead of member index is used. 136 | %% @end 137 | -spec get_child_type_name(avro_value()) -> fullname(). 138 | get_child_type_name(Union) when ?IS_UNION_VALUE(Union) -> 139 | TypedData = ?AVRO_VALUE_DATA(Union), 140 | avro:get_type_fullname(?AVRO_VALUE_TYPE(TypedData)). 141 | 142 | %% @doc Create a wrapped (boxed) value. 143 | -spec new(union_type(), avro:in()) -> avro_value() | no_return(). 144 | new(Type, Value) when ?IS_UNION_TYPE(Type) -> 145 | case cast(Type, Value) of 146 | {ok, Union} -> Union; 147 | {error, Err} -> erlang:error(Err) 148 | end. 149 | 150 | %% @hidden Special optimized version of new which assumes that Value 151 | %% is already casted to one of the union types. Should only 152 | %% be used inside erlavro. 153 | %% @end 154 | new_direct(Type, Value) when ?IS_UNION_TYPE(Type) -> 155 | ?AVRO_VALUE(Type, Value). 156 | 157 | %% @doc Get current value of a union type variable 158 | get_value(Union) when ?IS_UNION_VALUE(Union) -> 159 | ?AVRO_VALUE_DATA(Union). 160 | 161 | %% @doc Encode shared logic for JSON and binary encoder. 162 | %% Encoding logic is implemented in EncodeFun. 163 | %% @end 164 | -spec encode(type_or_name(), avro:in(), encode_fun()) -> 165 | encode_result() | no_return(). 166 | encode(Type, {MemberId, Value}, EncodeFun) when is_integer(MemberId) -> 167 | case lookup_type(MemberId, Type) of 168 | {ok, MemberType} -> 169 | EncodeFun(MemberType, Value, MemberId); 170 | false -> 171 | erlang:error({unknown_member, Type, MemberId}) 172 | end; 173 | encode(Type, {MemberName, Value}, EncodeFun) when ?IS_NAME_RAW(MemberName) -> 174 | case lookup_index(MemberName, Type) of 175 | {ok, {MemberId, true}} -> 176 | EncodeFun(avro:name2type(MemberName), Value, MemberId); 177 | {ok, {MemberId, false}} -> 178 | {ok, MemberType} = lookup_type(MemberId, Type), 179 | %% the union input value is tagged with a union member name or id 180 | EncodeFun(MemberType, Value, MemberId); 181 | false -> 182 | erlang:error({unknown_member, Type, MemberName}) 183 | end; 184 | encode(Type, Value, EncodeFun) -> 185 | MemberTypes = avro_union:get_types(Type), 186 | try_encode_union_loop(Type, MemberTypes, Value, 0, EncodeFun). 187 | 188 | %% @hidden Note: in some cases casting to an union type can be ambiguous, for 189 | %% example when it contains both string and enum types. In such cases 190 | %% it is recommended to explicitly specify types for values, or not 191 | %% use such combinations of types at all. 192 | %% @end 193 | -spec cast(union_type(), {name(), avro_value()} | avro:in()) -> 194 | {ok, avro_value()} | {error, any()}. 195 | cast(Type, Value) when ?IS_UNION_TYPE(Type) -> 196 | do_cast(Type, Value). 197 | 198 | %% @doc Recursively unbox typed value. 199 | -spec to_term(avro_value()) -> term(). 200 | to_term(Union) when ?IS_UNION_VALUE(Union) -> 201 | avro:to_term(?AVRO_VALUE_DATA(Union)). 202 | 203 | %%%_* Internal functions ======================================================= 204 | 205 | %% @private Build the member type name to member index mapping. 206 | %% The map result is the id and a 'IsSefRef' boolean tag. 207 | %% When the tag is set to true, there is no need to lookup the id2type 208 | %% mapping for type because: 209 | %% 1. when it's primitive type, simply call avro:name2type would be 210 | %% faster than another lookup 211 | %% 2. when it's a remote reference to the named member type, the lookup 212 | %% result would be the name itsef 213 | %% @end 214 | -spec build_name_to_id([{union_index(), type_or_name()}]) -> 215 | [{name(), {union_index(), IsSelfRef :: boolean()}}]. 216 | build_name_to_id(IndexedTypes) -> 217 | lists:map( 218 | fun({Id, FullName}) when ?IS_NAME(FullName) -> 219 | %% This is full name ref to a member type 220 | {FullName, {Id, _IsSelfRef = true}}; 221 | ({Id, Type}) -> 222 | FullName = avro:get_type_fullname(Type), 223 | {FullName, {Id, _IsSelfRef = ?IS_PRIMITIVE_TYPE(Type)}} 224 | end, IndexedTypes). 225 | 226 | %% @private 227 | -spec try_encode_union_loop(union_type(), [avro_type()], avro:in(), 228 | union_index(), encode_fun()) -> 229 | encode_result() | no_return(). 230 | try_encode_union_loop(UnionType, [], Value, _Index, _EncodeFun) -> 231 | erlang:error({failed_to_encode_union, UnionType, Value}); 232 | try_encode_union_loop(UnionType, [MemberT | Rest], Value, Index, EncodeFun) -> 233 | try 234 | EncodeFun(MemberT, Value, Index) 235 | catch _C : _E -> 236 | try_encode_union_loop(UnionType, Rest, Value, Index + 1, EncodeFun) 237 | end. 238 | 239 | %% @private Lookup union member index by union member name. 240 | -spec lookup_index(name(), union_type()) -> 241 | {ok, {union_index(), boolean()}} | false. 242 | lookup_index(Name, #avro_union_type{name2id = Ids}) when ?IS_NAME_RAW(Name) -> 243 | maps:is_key(?NAME(Name), Ids) andalso {ok, maps:get(?NAME(Name), Ids)}. 244 | 245 | %% @private 246 | -spec do_cast(union_type(), {name(), avro_value()} | avro:in()) -> 247 | {ok, avro_value()} | {error, any()}. 248 | do_cast(Type, {MemberId, Value}) -> 249 | case lookup_type(MemberId, Type) of 250 | {ok, MemberType} -> 251 | %% the union input value is tagged with a union member name or id 252 | {ok, WrappedValue} = avro:cast(MemberType, Value), 253 | {ok, ?AVRO_VALUE(Type, WrappedValue)}; 254 | false -> 255 | {error, {unknown_member, Type, MemberId}} 256 | end; 257 | do_cast(Type, Value) -> 258 | case cast_over_types(get_types(Type), Value) of 259 | {ok, V} -> {ok, ?AVRO_VALUE(Type, V)}; 260 | Err -> Err 261 | end. 262 | 263 | %% @private 264 | -spec cast_over_types([avro_type()], avro:in()) -> 265 | {ok, avro_value()} | {error, term()}. 266 | cast_over_types([], _Value) -> 267 | {error, type_mismatch}; 268 | cast_over_types([Type | Rest], Value) -> 269 | case avro:cast(Type, Value) of 270 | {error, _} -> cast_over_types(Rest, Value); 271 | R -> R %% appropriate type found 272 | end. 273 | 274 | -spec assert_no_duplicated_names([{name(), union_index()}], [name()]) -> 275 | ok | no_return(). 276 | assert_no_duplicated_names([], _UniqueNames) -> ok; 277 | assert_no_duplicated_names([{Name, _Index} | Rest], UniqueNames) -> 278 | case lists:member(Name, UniqueNames) of 279 | true -> erlang:error({<<"duplicated union member">>, Name}); 280 | false -> assert_no_duplicated_names(Rest, [Name | UniqueNames]) 281 | end. 282 | 283 | %%%_* Emacs ==================================================================== 284 | %%% Local Variables: 285 | %%% allout-layout: t 286 | %%% erlang-indent-level: 2 287 | %%% End: 288 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "{}" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright {yyyy} {name of copyright owner} 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | 203 | -------------------------------------------------------------------------------- /src/avro_schema_store.erl: -------------------------------------------------------------------------------- 1 | %%%----------------------------------------------------------------------------- 2 | %%% Copyright (c) 2013-2018 Klarna AB 3 | %%% 4 | %%% This file is provided to you under the Apache License, 5 | %%% Version 2.0 (the "License"); you may not use this file 6 | %%% except in compliance with the License. You may obtain 7 | %%% a copy of the License at 8 | %%% 9 | %%% http://www.apache.org/licenses/LICENSE-2.0 10 | %%% 11 | %%% Unless required by applicable law or agreed to in writing, 12 | %%% software distributed under the License is distributed on an 13 | %%% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | %%% KIND, either express or implied. See the License for the 15 | %%% specific language governing permissions and limitations 16 | %%% under the License. 17 | %%% 18 | %%% @author Ilya Staheev 19 | %%% @doc Stores all types in the schema. 20 | %%% 21 | %%% The module allows to access all types in uniform way by using 22 | %%% their full names. After a type was successfully placed into 23 | %%% the store all its name parts are resolved so that no further 24 | %%% actions to work with names are needed. 25 | %%% 26 | %%% When type is added to the store all its children named types are 27 | %%% extracted and stored as separate types as well. Their placeholders 28 | %%% in the original type are replaced by their full names. The types 29 | %%% itself keep their full names in 'name' field and empty strings 30 | %%% in 'namespace'. 31 | %%% 32 | %%% Error will be thrown when name conflict is detected during type 33 | %%% addition. 34 | %%% @end 35 | %%%----------------------------------------------------------------------------- 36 | 37 | -module(avro_schema_store). 38 | 39 | %% Init/Terminate 40 | -export([ new/0 41 | , new/1 42 | , new/2 43 | , close/1 44 | , is_store/1 45 | ]). 46 | 47 | %% Import 48 | -export([ import_file/2 49 | , import_files/2 50 | , import_schema_json/2 51 | , import_schema_json/3 52 | ]). 53 | 54 | %% Add/Lookup 55 | -export([ add_type/2 56 | , add_type/3 57 | , lookup_type/2 58 | , to_lookup_fun/1 59 | , get_all_types/1 60 | , ensure_store/1 61 | ]). 62 | 63 | -include("avro_internal.hrl"). 64 | 65 | -type store() :: ets:tab() | {dict, dict:dict()} | map(). 66 | -type option_key() :: access | name | dict | map. 67 | -type options() :: [option_key() | {option_key(), term()}]. 68 | -type filename() :: file:filename_all(). 69 | 70 | -export_type([store/0]). 71 | 72 | %%%_* APIs ===================================================================== 73 | 74 | %% @equiv new([]) 75 | -spec new() -> store(). 76 | new() -> new([]). 77 | 78 | %% @doc Create a new ets table to store avro types. 79 | %% Options: 80 | %% * `{access, public|protected|private}' - has same meaning as access 81 | %% mode in ets:new and defines what processes can have access to 82 | %% * `{name, atom()}' - used to create a named ets table. 83 | %% * `dict' - use dict as store backend, ignore `access' and `name' options 84 | %% * `map' - use map as store backend, ignore `access' and `name' options 85 | %% @end 86 | -spec new(options()) -> store(). 87 | new(Options) -> 88 | case proplists:get_bool(dict, Options) of 89 | true -> 90 | {dict, dict:new()}; 91 | false -> 92 | case proplists:get_bool(map, Options) of 93 | true -> 94 | #{}; 95 | false -> 96 | new_ets(Options) 97 | end 98 | end. 99 | 100 | %% @doc Create a new schema store and improt the given schema JSON files. 101 | -spec new([proplists:property()], [filename()]) -> store(). 102 | new(Options, Files) -> 103 | Store = new(Options), 104 | import_files(Files, Store). 105 | 106 | %% @doc Return true if the given arg is a schema store. 107 | -spec is_store(term()) -> boolean(). 108 | is_store({dict, _}) -> true; 109 | is_store(Map) when is_map(Map) -> true; 110 | is_store(T) -> is_integer(T) orelse is_atom(T) orelse is_reference(T). 111 | 112 | %% @doc Make a schema lookup function from store. 113 | -spec to_lookup_fun(store()) -> fun((name_raw()) -> avro_type()). 114 | to_lookup_fun(Store) -> 115 | fun(Name) -> 116 | {ok, Type} = ?MODULE:lookup_type(Name, Store), 117 | Type 118 | end. 119 | 120 | %% @doc Import avro JSON files into schema store. 121 | -spec import_files([filename()], store()) -> store(). 122 | import_files(Files, Store) -> 123 | lists:foldl(fun(File, S) -> import_file(File, S) end, Store, Files). 124 | 125 | %% @doc Import avro JSON file into schema store. 126 | %% In case the schema is unnamed, the file basename is used as its 127 | %% lookup name. 128 | %% Extention ".avsc" or ".json" will be stripped, 129 | %% Otherwise the full file basename is used. 130 | %% e.g. 131 | %% "/path/to/com.klarna.test.x.avsc" to 'com.klarna.etst.x" 132 | %% "/path/to/com.klarna.test.x.json" to 'com.klarna.etst.x" 133 | %% "/path/to/com.klarna.test.x" to 'com.klarna.etst.x" 134 | %% @end 135 | -spec import_file(filename(), store()) -> store(). 136 | import_file(File, Store) -> 137 | case file:read_file(File) of 138 | {ok, Json} -> 139 | Name = parse_basename(File), 140 | import_schema_json(Name, Json, Store); 141 | {error, Reason} -> 142 | erlang:error({failed_to_read_schema_file, File, Reason}) 143 | end. 144 | 145 | %% @doc Decode avro schema JSON into erlavro records. 146 | %% NOTE: Exception if the type is unnamed. 147 | %% @end 148 | -spec import_schema_json(binary(), store()) -> store(). 149 | import_schema_json(Json, Store) -> 150 | import_schema_json(undefined, Json, Store). 151 | 152 | %% @doc Delete the ets table. 153 | -spec close(store()) -> ok. 154 | close({dict, _}) -> ok; 155 | close(Map) when is_map(Map) -> ok; 156 | close(Store) -> 157 | ets:delete(Store), 158 | ok. 159 | 160 | %% @doc To make dialyzer happy. 161 | -spec ensure_store(atom() | integer() | reference() | store()) -> 162 | store(). 163 | ensure_store(Store) -> 164 | true = is_store(Store), 165 | Store. 166 | 167 | %% @doc Add named type into the schema store. 168 | %% NOTE: the type is flattened before inserting into the schema store. 169 | %% i.e. named types nested in the given type are lifted up to root level. 170 | %% @end 171 | -spec add_type(avro_type(), store()) -> store(). 172 | add_type(Type, Store) -> 173 | add_type(undefined, Type, Store). 174 | 175 | %% @doc Add (maybe unnamed) type to schema store. 176 | %% If the type is unnamed, the assigned name is used. 177 | %% For named types, the assigned name works like an alias. 178 | %% @end 179 | -spec add_type(undefined | name_raw(), avro_type(), store()) -> store(). 180 | add_type(AssignedName, Type0, Store) -> 181 | {Type, FlattenTypes} = avro:flatten_type(Type0), 182 | %% Exception when the root type is not named but assigned name is not given. 183 | case ?IS_TYPE_RECORD(Type) andalso AssignedName =:= undefined of 184 | true -> erlang:error({unnamed_type, Type}); 185 | false -> ok 186 | end, 187 | %% Add the root type with assigned name. 188 | %% Even when the flattened result is a name reference. 189 | Store1 = add_by_assigned_name(AssignedName, Type, Store), 190 | lists:foldl(fun do_add_type/2, Store1, FlattenTypes). 191 | 192 | %% @doc Lookup a type using its full name. 193 | -spec lookup_type(name_raw(), store()) -> {ok, avro_type()} | false. 194 | lookup_type(FullName, Store) -> 195 | get_type_from_store(?NAME(FullName), Store). 196 | 197 | %% @doc Get all schema types 198 | -spec get_all_types(store()) -> [avro_type()]. 199 | get_all_types(Store) -> 200 | All = [Type || {_Name, Type} <- to_list(Store), ?IS_TYPE_RECORD(Type)], 201 | lists:usort(All). 202 | 203 | %%%_* Internal Functions ======================================================= 204 | 205 | -spec to_list(store()) -> [{name(), avro_type()}]. 206 | to_list({dict, Dict}) -> dict:to_list(Dict); 207 | to_list(Map) when is_map(Map) -> maps:to_list(Map); 208 | to_list(Store) -> ets:tab2list(Store). 209 | 210 | -spec new_ets(options()) -> store(). 211 | new_ets(Options) -> 212 | Access = avro_util:get_opt(access, Options, public), 213 | {Name, EtsOpts} = 214 | case avro_util:get_opt(name, Options, undefined) of 215 | undefined -> {?MODULE, []}; 216 | Name1 -> {Name1, [named_table]} 217 | end, 218 | ets:new(Name, [Access, {read_concurrency, true} | EtsOpts]). 219 | 220 | %% @private Add type by an assigned name. 221 | %% Except when assigned name is 'undefined' 222 | %% @end 223 | -spec add_by_assigned_name(undefined | name_raw(), 224 | type_or_name(), store()) -> store(). 225 | add_by_assigned_name(undefined, _Type, Store) -> Store; 226 | add_by_assigned_name(AssignedName, TypeOrName, Store) -> 227 | add_type_by_name(?NAME(AssignedName), TypeOrName, Store). 228 | 229 | %% @private Parse file basename. try to strip ".avsc" or ".json" extension. 230 | -spec parse_basename(filename()) -> name(). 231 | parse_basename(FileName) -> 232 | BaseName0 = filename:basename(FileName), 233 | BaseName1 = filename:basename(FileName, ".avsc"), 234 | BaseName2 = filename:basename(FileName, ".json"), 235 | lists:foldl( 236 | fun(N, Shortest) -> 237 | BN = avro_util:ensure_binary(N), 238 | case size(BN) < size(Shortest) of 239 | true -> BN; 240 | false -> Shortest 241 | end 242 | end, avro_util:ensure_binary(BaseName0), [BaseName1, BaseName2]). 243 | 244 | %% @private Import JSON schema with assigned name. 245 | -spec import_schema_json(name_raw(), binary(), store()) -> store(). 246 | import_schema_json(AssignedName, Json, Store) -> 247 | Schema = avro:decode_schema(Json), 248 | add_type(AssignedName, Schema, Store). 249 | 250 | %% @private 251 | -spec do_add_type(avro_type(), store()) -> store(). 252 | do_add_type(Type, Store) -> 253 | FullName = avro:get_type_fullname(Type), 254 | Aliases = avro:get_aliases(Type), 255 | Store1 = add_type_by_name(FullName, Type, Store), 256 | add_aliases(Aliases, FullName, Store1). 257 | 258 | add_aliases([], _FullName, Store) -> 259 | Store; 260 | add_aliases([Alias | More], FullName, Store) -> 261 | NewStore = put_type_to_store(Alias, FullName, Store), 262 | add_aliases(More, FullName, NewStore). 263 | 264 | %% @private 265 | -spec add_type_by_name(fullname(), avro_type(), store()) -> 266 | store() | no_return(). 267 | add_type_by_name(Name, Type, Store) -> 268 | case get_type_from_store(Name, Store) of 269 | {ok, Type} -> 270 | Store; 271 | {ok, OtherType} -> 272 | %% Name can be an assigned name for unnamed types, 273 | %% This is why we raise error exception with name AND both 274 | %% old / new types. 275 | erlang:error({name_clash, Name, Type, OtherType}); 276 | false -> 277 | put_type_to_store(Name, Type, Store) 278 | end. 279 | 280 | %% @private 281 | -spec put_type_to_store(fullname(), name() | avro_type(), store()) -> store(). 282 | put_type_to_store(Name, Type, {dict, Dict}) -> 283 | NewDict = dict:store(Name, Type, Dict), 284 | {dict, NewDict}; 285 | put_type_to_store(Name, Type, Map) when is_map(Map) -> 286 | Map#{Name => Type}; 287 | put_type_to_store(Name, Type, Store) -> 288 | true = ets:insert(Store, {Name, Type}), 289 | Store. 290 | 291 | %% @private Get type by name or alias. 292 | -spec get_type_from_store(fullname(), store()) -> false | {ok, avro_type()}. 293 | get_type_from_store(NameRef, Store) -> 294 | case do_get_type_from_store(NameRef, Store) of 295 | false -> 296 | false; 297 | {ok, FullName} when is_binary(FullName) -> 298 | do_get_type_from_store(FullName, Store); 299 | {ok, Type} -> 300 | {ok, Type} 301 | end. 302 | 303 | %% @private 304 | -spec do_get_type_from_store(fullname(), store()) -> 305 | false | {ok, fullname() | avro_type()}. 306 | do_get_type_from_store(Name, {dict, Dict}) -> 307 | case dict:find(Name, Dict) of 308 | error -> false; 309 | {ok, Type} -> {ok, Type} 310 | end; 311 | do_get_type_from_store(Name, Map) when is_map(Map) -> 312 | case maps:find(Name, Map) of 313 | error -> false; 314 | {ok, Type} -> {ok, Type} 315 | end; 316 | do_get_type_from_store(Name, Store) -> 317 | case ets:lookup(Store, Name) of 318 | [] -> false; 319 | [{Name, Type}] -> {ok, Type} 320 | end. 321 | 322 | %%%_* Emacs ==================================================================== 323 | %%% Local Variables: 324 | %%% allout-layout: t 325 | %%% erlang-indent-level: 2 326 | %%% End: 327 | -------------------------------------------------------------------------------- /src/avro_ocf.erl: -------------------------------------------------------------------------------- 1 | %% coding: latin-1 2 | %%%----------------------------------------------------------------------------- 3 | %%% Copyright (c) 2016-2018 Klarna AB 4 | %%% 5 | %%% This file is provided to you under the Apache License, 6 | %%% Version 2.0 (the "License"); you may not use this file 7 | %%% except in compliance with the License. You may obtain 8 | %%% a copy of the License at 9 | %%% 10 | %%% http://www.apache.org/licenses/LICENSE-2.0 11 | %%% 12 | %%% Unless required by applicable law or agreed to in writing, 13 | %%% software distributed under the License is distributed on an 14 | %%% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | %%% KIND, either express or implied. See the License for the 16 | %%% specific language governing permissions and limitations 17 | %%% under the License. 18 | %%% 19 | %%% @doc 20 | %%% Encode/decode avro object container files 21 | %%% @end 22 | %%%----------------------------------------------------------------------------- 23 | 24 | -module(avro_ocf). 25 | 26 | -export([ append_file/3 27 | , append_file/5 28 | , decode_binary/1 29 | , decode_binary/2 30 | , decode_file/1 31 | , decode_file/2 32 | , encode_header/1 33 | , make_block/2 34 | , make_header/1 35 | , make_header/2 36 | , make_ocf/2 37 | , write_header/2 38 | , write_file/4 39 | , write_file/5 40 | ]). 41 | 42 | -export_type([ header/0 43 | , meta/0 44 | ]). 45 | 46 | -include("avro_internal.hrl"). 47 | 48 | -type filename() :: file:filename_all(). 49 | -type meta() :: [{string() | binary(), binary()}]. 50 | -type lkup() :: schema_store() | lkup_fun(). 51 | 52 | -record(header, { magic 53 | , meta 54 | , sync 55 | }). 56 | 57 | -opaque header() :: #header{}. 58 | 59 | %% Tested in OTP-21, dialyzer had trouble understanding the second arg 60 | %% for the call to write_header/2. 61 | -dialyzer({nowarn_function, [write_file/4, write_file/5]}). 62 | 63 | %%%_* APIs ===================================================================== 64 | 65 | %% @doc decode_file/2 equivalent with default decoder options. 66 | -spec decode_file(filename()) -> {header(), avro_type(), [avro:out()]}. 67 | decode_file(Filename) -> 68 | decode_file(Filename, avro:make_decoder_options([])). 69 | 70 | %% @doc Decode ocf into unwrapped values. 71 | -spec decode_file(filename(), decoder_options()) -> 72 | {header(), avro_type(), [avro:out()]}. 73 | decode_file(Filename, Options) -> 74 | {ok, Bin} = file:read_file(Filename), 75 | decode_binary(Bin, Options). 76 | 77 | %% @doc decode_binary/2 equivalent with default decoder options. 78 | -spec decode_binary(binary()) -> {header(), avro_type(), [avro:out()]}. 79 | decode_binary(Bin) -> 80 | decode_binary(Bin, avro:make_decoder_options([])). 81 | 82 | %% @doc Decode ocf binary into unwrapped values. 83 | -spec decode_binary(binary(), decoder_options()) -> 84 | {header(), avro_type(), [avro:out()]}. 85 | decode_binary(Bin, Options) -> 86 | {[ {<<"magic">>, Magic} 87 | , {<<"meta">>, Meta} 88 | , {<<"sync">>, Sync} 89 | ], Tail} = decode_stream(ocf_schema(), Bin), 90 | {_, SchemaBytes} = lists:keyfind(<<"avro.schema">>, 1, Meta), 91 | Codec = get_codec(Meta), 92 | %% Ignore bad defaults because ocf schema should never need defaults 93 | Schema = avro:decode_schema(SchemaBytes, [ignore_bad_default_values]), 94 | Lkup = avro:make_lkup_fun("_erlavro_ocf_root", Schema), 95 | Header = #header{ magic = Magic 96 | , meta = Meta 97 | , sync = Sync 98 | }, 99 | {Header, Schema, decode_blocks(Lkup, Schema, Codec, Sync, Tail, [], Options)}. 100 | 101 | %% @doc Write objects in a single block to the given file name. 102 | -spec write_file(filename(), lkup(), type_or_name(), [avro:in()]) -> ok. 103 | write_file(Filename, Lkup, Schema, Objects) -> 104 | write_file(Filename, Lkup, Schema, Objects, []). 105 | 106 | %% @doc Write objects in a single block to the given file name with custom 107 | %% metadata. @see make_header/2 for details about use of meta data. 108 | %% @end 109 | -spec write_file(filename(), lkup(), type_or_name(), [avro:in()], meta()) -> ok. 110 | write_file(Filename, Lkup, Schema, Objects, Meta) -> 111 | Header = make_header(Schema, Meta), 112 | {ok, Fd} = file:open(Filename, [write]), 113 | try 114 | ok = write_header(Fd, Header), 115 | ok = append_file(Fd, Header, Lkup, Schema, Objects) 116 | after 117 | file:close(Fd) 118 | end. 119 | 120 | %% @doc Writer header bytes to a ocf file. 121 | -spec write_header(file:io_device(), header()) -> ok. 122 | write_header(Fd, Header) -> 123 | HeaderBytes = encode_header(Header), 124 | ok = file:write(Fd, HeaderBytes). 125 | 126 | %% @doc Append encoded objects to the file as one data block. 127 | -spec append_file(file:io_device(), header(), [binary()]) -> ok. 128 | append_file(Fd, Header, Objects) -> 129 | IoData = make_block(Header, Objects), 130 | ok = file:write(Fd, IoData). 131 | 132 | %% @doc Encode the given objects and append to the file as one data block. 133 | -spec append_file(file:io_device(), header(), lkup(), 134 | type_or_name(), [avro:in()]) -> ok. 135 | append_file(Fd, Header, Lkup, Schema, Objects) -> 136 | EncodedObjects = 137 | [ avro:encode(Lkup, Schema, O, avro_binary) || O <- Objects ], 138 | append_file(Fd, Header, EncodedObjects). 139 | 140 | %% @doc Make ocf header. 141 | -spec make_header(avro_type()) -> header(). 142 | make_header(Type) -> 143 | make_header(Type, _ExtraMeta = []). 144 | 145 | %% @doc Make ocf header, and append the given metadata fields. 146 | %% You can use `>>"avro.codec"<<' metadata field to choose what data 147 | %% block coding should be used. Supported values are `>>"null"<<' 148 | %% (default), `>>"deflate"<<' (compressed) and 149 | %% `>>"snappy"<<' (compressed). Other values in `avro' namespace 150 | %% are reserved for internal use and can't be set. Other than that you are 151 | %% free to provide any custom metadata. 152 | %% @end 153 | -spec make_header(avro_type(), meta()) -> header(). 154 | make_header(Type, Meta0) -> 155 | ValidatedMeta = validate_meta(Meta0), 156 | Meta = case lists:keyfind(<<"avro.codec">>, 1, ValidatedMeta) of 157 | false -> 158 | [{<<"avro.codec">>, <<"null">>} | ValidatedMeta]; 159 | _ -> 160 | ValidatedMeta 161 | end, 162 | TypeJson = avro_json_encoder:encode_type(Type), 163 | #header{ magic = <<"Obj", 1>> 164 | , meta = [{<<"avro.schema">>, iolist_to_binary(TypeJson)} | Meta] 165 | , sync = generate_sync_bytes() 166 | }. 167 | 168 | -spec make_ocf(header(), [binary()]) -> iodata(). 169 | make_ocf(Header, Objects) -> 170 | HeaderBytes = encode_header(Header), 171 | DataBytes = make_block(Header, Objects), 172 | [HeaderBytes, DataBytes]. 173 | 174 | %% @doc Encode the given ocf header. 175 | -spec encode_header(header()) -> iodata(). 176 | encode_header(Header) -> 177 | HeaderFields = 178 | [ {"magic", Header#header.magic} 179 | , {"meta", Header#header.meta} 180 | , {"sync", Header#header.sync} 181 | ], 182 | HeaderRecord = avro_record:new(ocf_schema(), HeaderFields), 183 | avro_binary_encoder:encode_value(HeaderRecord). 184 | 185 | %% @doc Encode the given objects as one data block. 186 | -spec make_block(header(), [binary()]) -> iodata(). 187 | make_block(Header, Objects) -> 188 | Count = length(Objects), 189 | Data = encode_block(Header#header.meta, Objects), 190 | Size = size(Data), 191 | [ avro_binary_encoder:encode_value(avro_primitive:long(Count)) 192 | , avro_binary_encoder:encode_value(avro_primitive:long(Size)) 193 | , Data 194 | , Header#header.sync 195 | ]. 196 | 197 | %%%_* Internal functions ======================================================= 198 | 199 | %% Raise an exception if meta has a bad format. 200 | %% Otherwise return the formatted metadata entries 201 | -spec validate_meta(meta()) -> meta() | no_return(). 202 | validate_meta([]) -> []; 203 | validate_meta([{K0, V} | Rest]) -> 204 | K = iolist_to_binary(K0), 205 | is_reserved_meta_key(K) andalso erlang:error({reserved_meta_key, K0}), 206 | is_invalid_codec_meta(K, V) andalso erlang:error({bad_codec, V}), 207 | is_binary(V) orelse erlang:error({bad_meta_value, V}), 208 | [{K, V} | validate_meta(Rest)]. 209 | 210 | %% Meta keys which start with 'avro.' are reserved. 211 | -spec is_reserved_meta_key(binary()) -> boolean(). 212 | is_reserved_meta_key(<<"avro.codec">>) -> false; 213 | is_reserved_meta_key(<<"avro.", _/binary>>) -> true; 214 | is_reserved_meta_key(_) -> false. 215 | 216 | %% If avro.codec meta is provided, it must be one of supported values. 217 | -spec is_invalid_codec_meta(binary(), binary()) -> boolean(). 218 | is_invalid_codec_meta(<<"avro.codec">>, <<"null">>) -> false; 219 | is_invalid_codec_meta(<<"avro.codec">>, <<"deflate">>) -> false; 220 | is_invalid_codec_meta(<<"avro.codec">>, <<"snappy">>) -> false; 221 | is_invalid_codec_meta(<<"avro.codec">>, _) -> true; 222 | is_invalid_codec_meta(_, _) -> false. 223 | 224 | -spec generate_sync_bytes() -> binary(). 225 | generate_sync_bytes() -> crypto:strong_rand_bytes(16). 226 | 227 | -spec decode_stream(avro_type(), binary()) -> {avro:out(), binary()}. 228 | decode_stream(Type, Bin) when is_binary(Bin) -> 229 | Lkup = fun(_) -> erlang:error(unexpected) end, 230 | avro_binary_decoder:decode_stream(Bin, Type, Lkup). 231 | 232 | -spec decode_stream(lkup(), avro_type(), binary()) -> 233 | {avro:out(), binary()} | no_return(). 234 | decode_stream(Lkup, Type, Bin) when is_binary(Bin) -> 235 | avro_binary_decoder:decode_stream(Bin, Type, Lkup). 236 | 237 | -spec decode_stream(lkup(), avro_type(), binary(), decoder_options()) -> 238 | {avro:out(), binary()} | no_return(). 239 | decode_stream(Lkup, Type, Bin, Options) when is_binary(Bin) -> 240 | avro_binary_decoder:decode_stream(Bin, Type, Lkup, Options). 241 | 242 | -spec decode_blocks(lkup(), avro_type(), avro_codec(), 243 | binary(), binary(), [avro:out()], decoder_options()) -> 244 | [avro:out()]. 245 | decode_blocks(_Lkup, _Type, _Codec, _Sync, <<>>, Acc, _Options) -> 246 | lists:reverse(Acc); 247 | decode_blocks(Lkup, Type, Codec, Sync, Bin0, Acc, Options) -> 248 | LongType = avro_primitive:long_type(), 249 | {Count, Bin1} = decode_stream(Lkup, LongType, Bin0), 250 | {Size, Bin} = decode_stream(Lkup, LongType, Bin1), 251 | <> = Bin, 252 | NewAcc = decode_block(Lkup, Type, Codec, Block, Count, Acc, Options), 253 | decode_blocks(Lkup, Type, Codec, Sync, Tail, NewAcc, Options). 254 | 255 | -spec decode_block(lkup(), avro_type(), avro_codec(), 256 | binary(), integer(), [avro:out()], 257 | decoder_options()) -> [avro:out()]. 258 | decode_block(_Lkup, _Type, _Codec, <<>>, 0, Acc, _Options) -> Acc; 259 | decode_block(Lkup, Type, deflate, Bin, Count, Acc, Options) -> 260 | Decompressed = zlib:unzip(Bin), 261 | decode_block(Lkup, Type, null, Decompressed, Count, Acc, Options); 262 | decode_block(Lkup, Type, snappy, Bin, Count, Acc, Options) -> 263 | Size = byte_size(Bin), 264 | <> = Bin, 265 | {ok, Decompressed} = snappyer:decompress(Compressed), 266 | case is_valid_checksum(Decompressed, Checksum) of 267 | true -> Decompressed; 268 | false -> erlang:error({invalid_checksum, Decompressed}) 269 | end, 270 | decode_block(Lkup, Type, null, Decompressed, Count, Acc, Options); 271 | decode_block(Lkup, Type, null, Bin, Count, Acc, Options) -> 272 | {Obj, Tail} = decode_stream(Lkup, Type, Bin, Options), 273 | decode_block(Lkup, Type, null, Tail, Count - 1, [Obj | Acc], Options). 274 | 275 | - spec is_valid_checksum(binary(), binary()) -> boolean(). 276 | is_valid_checksum(Decompressed, Checksum) -> 277 | binary:encode_unsigned(erlang:crc32(Decompressed)) =:= Checksum. 278 | 279 | %% Hand coded schema. 280 | %% {"type": "record", "name": "org.apache.avro.file.Header", 281 | %% "fields" : [ 282 | %% {"name": "magic", "type": {"type": "fixed", "name": "Magic", "size": 4}}, 283 | %% {"name": "meta", "type": {"type": "map", "values": "bytes"}}, 284 | %% {"name": "sync", "type": {"type": "fixed", "name": "Sync", "size": 16}} 285 | %% ] 286 | %% } 287 | -spec ocf_schema() -> avro_type(). 288 | ocf_schema() -> 289 | MagicType = avro_fixed:type("magic", 4), 290 | MetaType = avro_map:type(avro_primitive:bytes_type()), 291 | SyncType = avro_fixed:type("sync", 16), 292 | Fields = [ avro_record:define_field("magic", MagicType) 293 | , avro_record:define_field("meta", MetaType) 294 | , avro_record:define_field("sync", SyncType) 295 | ], 296 | avro_record:type("org.apache.avro.file.Header", Fields). 297 | 298 | %% Get codec from meta fields 299 | -spec get_codec([{binary(), binary()}]) -> avro_codec(). 300 | get_codec(Meta) -> 301 | case lists:keyfind(<<"avro.codec">>, 1, Meta) of 302 | false -> 303 | null; 304 | {_, <<"null">>} -> 305 | null; 306 | {_, <<"deflate">>} -> 307 | deflate; 308 | {_, <<"snappy">>} -> 309 | snappy 310 | end. 311 | 312 | %% Encode block according to selected codec 313 | -spec encode_block([{binary(), binary()}], iolist()) -> binary(). 314 | encode_block(Meta, Data) -> 315 | case get_codec(Meta) of 316 | null -> 317 | iolist_to_binary(Data); 318 | deflate -> 319 | zlib:zip(Data); 320 | snappy -> 321 | Checksum = erlang:crc32(Data), 322 | {ok, Bin} = snappyer:compress(Data), 323 | iolist_to_binary([Bin, <>]) 324 | end. 325 | 326 | %%%_* Emacs ==================================================================== 327 | %%% Local Variables: 328 | %%% allout-layout: t 329 | %%% erlang-indent-level: 2 330 | %%% End: 331 | --------------------------------------------------------------------------------