├── .credo.exs ├── .formatter.exs ├── .github ├── CODEOWNERS └── workflows │ ├── ci.yml │ ├── common-config.yaml │ └── deploy.yml ├── .gitignore ├── .iex.exs ├── CHANGELOG.md ├── README.md ├── lib ├── avro_ex.ex └── avro_ex │ ├── decode.ex │ ├── decode_error.ex │ ├── encode.ex │ ├── encode_error.ex │ ├── schema.ex │ └── schema │ ├── array.ex │ ├── context.ex │ ├── encoder.ex │ ├── enum.ex │ ├── fixed.ex │ ├── map.ex │ ├── parser.ex │ ├── primitive.ex │ ├── record.ex │ ├── record │ └── field.ex │ ├── reference.ex │ ├── schema_decode_error.ex │ └── union.ex ├── mix.exs ├── mix.lock └── test ├── avro_ex_test.exs ├── decode_test.exs ├── encode_test.exs ├── fixtures ├── decimal.avro └── decimal.avsc ├── property_test.exs ├── schema_encoder_test.exs ├── schema_parser_test.exs ├── schema_test.exs ├── support ├── encode_macros.ex └── schema_macros.ex └── test_helper.exs /.credo.exs: -------------------------------------------------------------------------------- 1 | # This file contains the configuration for Credo and you are probably reading 2 | # this after creating it with `mix credo.gen.config`. 3 | # 4 | # If you find anything wrong or unclear in this file, please report an 5 | # issue on GitHub: https://github.com/rrrene/credo/issues 6 | # 7 | %{ 8 | # 9 | # You can have as many configs as you like in the `configs:` field. 10 | configs: [ 11 | %{ 12 | # 13 | # Run any config using `mix credo -C `. If no config name is given 14 | # "default" is used. 15 | # 16 | name: "default", 17 | # 18 | # These are the files included in the analysis: 19 | files: %{ 20 | # 21 | # You can give explicit globs or simply directories. 22 | # In the latter case `**/*.{ex,exs}` will be used. 23 | # 24 | included: [ 25 | "lib/", 26 | "src/", 27 | "test/", 28 | "web/", 29 | "apps/*/lib/", 30 | "apps/*/src/", 31 | "apps/*/test/", 32 | "apps/*/web/" 33 | ], 34 | excluded: [~r"/_build/", ~r"/deps/", ~r"/node_modules/"] 35 | }, 36 | # 37 | # Load and configure plugins here: 38 | # 39 | plugins: [], 40 | # 41 | # If you create your own checks, you must specify the source files for 42 | # them here, so they can be loaded by Credo before running the analysis. 43 | # 44 | requires: [], 45 | # 46 | # If you want to enforce a style guide and need a more traditional linting 47 | # experience, you can change `strict` to `true` below: 48 | # 49 | strict: false, 50 | # 51 | # To modify the timeout for parsing files, change this value: 52 | # 53 | parse_timeout: 5000, 54 | # 55 | # If you want to use uncolored output by default, you can change `color` 56 | # to `false` below: 57 | # 58 | color: true, 59 | # 60 | # You can customize the parameters of any check by adding a second element 61 | # to the tuple. 62 | # 63 | # To disable a check put `false` as second element: 64 | # 65 | # {Credo.Check.Design.DuplicatedCode, false} 66 | # 67 | checks: [ 68 | # 69 | ## Consistency Checks 70 | # 71 | {Credo.Check.Consistency.ExceptionNames, []}, 72 | {Credo.Check.Consistency.LineEndings, []}, 73 | {Credo.Check.Consistency.ParameterPatternMatching, []}, 74 | {Credo.Check.Consistency.SpaceAroundOperators, []}, 75 | {Credo.Check.Consistency.SpaceInParentheses, []}, 76 | {Credo.Check.Consistency.TabsOrSpaces, []}, 77 | 78 | # 79 | ## Design Checks 80 | # 81 | # You can customize the priority of any check 82 | # Priority values are: `low, normal, high, higher` 83 | # 84 | {Credo.Check.Design.AliasUsage, [priority: :low, if_nested_deeper_than: 2, if_called_more_often_than: 0]}, 85 | # You can also customize the exit_status of each check. 86 | # If you don't want TODO comments to cause `mix credo` to fail, just 87 | # set this value to 0 (zero). 88 | # 89 | {Credo.Check.Design.TagTODO, [exit_status: 2]}, 90 | {Credo.Check.Design.TagFIXME, []}, 91 | 92 | # 93 | ## Readability Checks 94 | # 95 | {Credo.Check.Readability.AliasOrder, []}, 96 | {Credo.Check.Readability.FunctionNames, []}, 97 | {Credo.Check.Readability.LargeNumbers, []}, 98 | {Credo.Check.Readability.MaxLineLength, [priority: :low, max_length: 120]}, 99 | {Credo.Check.Readability.ModuleAttributeNames, []}, 100 | {Credo.Check.Readability.ModuleDoc, false}, 101 | {Credo.Check.Readability.ModuleNames, []}, 102 | {Credo.Check.Readability.ParenthesesInCondition, []}, 103 | {Credo.Check.Readability.ParenthesesOnZeroArityDefs, []}, 104 | {Credo.Check.Readability.PredicateFunctionNames, []}, 105 | {Credo.Check.Readability.PreferImplicitTry, []}, 106 | {Credo.Check.Readability.RedundantBlankLines, []}, 107 | {Credo.Check.Readability.Semicolons, []}, 108 | {Credo.Check.Readability.SpaceAfterCommas, []}, 109 | {Credo.Check.Readability.StringSigils, []}, 110 | {Credo.Check.Readability.TrailingBlankLine, []}, 111 | {Credo.Check.Readability.TrailingWhiteSpace, []}, 112 | {Credo.Check.Readability.UnnecessaryAliasExpansion, []}, 113 | {Credo.Check.Readability.VariableNames, []}, 114 | 115 | # 116 | ## Refactoring Opportunities 117 | # 118 | {Credo.Check.Refactor.CondStatements, []}, 119 | {Credo.Check.Refactor.CyclomaticComplexity, []}, 120 | {Credo.Check.Refactor.FunctionArity, []}, 121 | {Credo.Check.Refactor.LongQuoteBlocks, []}, 122 | {Credo.Check.Refactor.MapInto, []}, 123 | {Credo.Check.Refactor.MatchInCondition, []}, 124 | {Credo.Check.Refactor.NegatedConditionsInUnless, []}, 125 | {Credo.Check.Refactor.NegatedConditionsWithElse, []}, 126 | {Credo.Check.Refactor.Nesting, max_nesting: 3}, 127 | {Credo.Check.Refactor.UnlessWithElse, []}, 128 | {Credo.Check.Refactor.WithClauses, []}, 129 | 130 | # 131 | ## Warnings 132 | # 133 | {Credo.Check.Warning.BoolOperationOnSameValues, []}, 134 | {Credo.Check.Warning.ExpensiveEmptyEnumCheck, []}, 135 | {Credo.Check.Warning.IExPry, []}, 136 | {Credo.Check.Warning.IoInspect, []}, 137 | {Credo.Check.Warning.LazyLogging, []}, 138 | {Credo.Check.Warning.MixEnv, false}, 139 | {Credo.Check.Warning.OperationOnSameValues, []}, 140 | {Credo.Check.Warning.OperationWithConstantResult, []}, 141 | {Credo.Check.Warning.RaiseInsideRescue, []}, 142 | {Credo.Check.Warning.UnusedEnumOperation, []}, 143 | {Credo.Check.Warning.UnusedFileOperation, []}, 144 | {Credo.Check.Warning.UnusedKeywordOperation, []}, 145 | {Credo.Check.Warning.UnusedListOperation, []}, 146 | {Credo.Check.Warning.UnusedPathOperation, []}, 147 | {Credo.Check.Warning.UnusedRegexOperation, []}, 148 | {Credo.Check.Warning.UnusedStringOperation, []}, 149 | {Credo.Check.Warning.UnusedTupleOperation, []}, 150 | {Credo.Check.Warning.UnsafeExec, []}, 151 | 152 | # 153 | # Checks scheduled for next check update (opt-in for now, just replace `false` with `[]`) 154 | 155 | # 156 | # Controversial and experimental checks (opt-in, just replace `false` with `[]`) 157 | # 158 | {Credo.Check.Readability.StrictModuleLayout, []}, 159 | {Credo.Check.Consistency.MultiAliasImportRequireUse, []}, 160 | {Credo.Check.Consistency.UnusedVariableNames, false}, 161 | {Credo.Check.Design.DuplicatedCode, []}, 162 | {Credo.Check.Readability.AliasAs, false}, 163 | {Credo.Check.Readability.MultiAlias, []}, 164 | {Credo.Check.Readability.Specs, []}, 165 | {Credo.Check.Readability.SinglePipe, []}, 166 | {Credo.Check.Readability.WithCustomTaggedTuple, false}, 167 | {Credo.Check.Refactor.ABCSize, false}, 168 | {Credo.Check.Refactor.AppendSingleItem, []}, 169 | {Credo.Check.Refactor.DoubleBooleanNegation, []}, 170 | {Credo.Check.Refactor.ModuleDependencies, false}, 171 | {Credo.Check.Refactor.NegatedIsNil, []}, 172 | {Credo.Check.Refactor.PipeChainStart, []}, 173 | {Credo.Check.Refactor.VariableRebinding, []}, 174 | {Credo.Check.Warning.MapGetUnsafePass, false}, 175 | {Credo.Check.Warning.UnsafeToAtom, false}, 176 | {Credo.Check.Warning.LeakyEnvironment, []} 177 | 178 | # 179 | # Custom checks can be created using `mix credo.gen.check`. 180 | # 181 | ] 182 | } 183 | ] 184 | } 185 | -------------------------------------------------------------------------------- /.formatter.exs: -------------------------------------------------------------------------------- 1 | # Used by "mix format" 2 | [ 3 | inputs: ["mix.exs", "{config,lib,test}/**/*.{ex,exs}"], 4 | import_deps: [:typed_struct, :stream_data], 5 | line_length: 120 6 | ] 7 | -------------------------------------------------------------------------------- /.github/CODEOWNERS: -------------------------------------------------------------------------------- 1 | # Order alphabetically. 2 | # Order is important. The last matching pattern takes the most precedence. 3 | 4 | # Default owners for everything in the repo. 5 | * @beam-community/team @beam-community/avro 6 | -------------------------------------------------------------------------------- /.github/workflows/ci.yml: -------------------------------------------------------------------------------- 1 | name: Continuous Integration 2 | 3 | on: 4 | pull_request: 5 | types: [opened, reopened, synchronize] 6 | push: 7 | branches: 8 | - "main" 9 | jobs: 10 | test: 11 | name: Build and test 12 | runs-on: ubuntu-latest 13 | 14 | steps: 15 | - uses: actions/checkout@v2 16 | - name: Set up Elixir 17 | uses: erlef/setup-beam@v1.15 18 | with: 19 | elixir-version: "1.13.3" # Define the elixir version [required] 20 | otp-version: "24.2.1" # Define the OTP version [required] 21 | - name: Restore dependencies cache 22 | uses: actions/cache@v2 23 | with: 24 | path: deps 25 | key: ${{ runner.os }}-mix-${{ hashFiles('**/mix.lock') }} 26 | restore-keys: ${{ runner.os }}-mix- 27 | - name: Install dependencies 28 | run: mix deps.get 29 | - name: Run tests 30 | run: mix test 31 | - name: Check formatting 32 | run: mix format --check-formatted 33 | - name: Credo 34 | run: mix credo 35 | 36 | dialyzer: 37 | name: Run Dialyzer for type checking 38 | runs-on: ubuntu-latest 39 | steps: 40 | - uses: actions/checkout@v2 41 | - name: Set mix file hash 42 | id: set_vars 43 | run: | 44 | mix_hash="${{ hashFiles(format('{0}{1}', github.workspace, '/mix.lock')) }}" 45 | echo "::set-output name=mix_hash::$mix_hash" 46 | - name: Cache PLT files 47 | id: cache-plt 48 | uses: actions/cache@v2 49 | with: 50 | path: | 51 | _build/dev/*.plt 52 | _build/dev/*.plt.hash 53 | key: plt-cache-${{ steps.set_vars.outputs.mix_hash }} 54 | restore-keys: | 55 | plt-cache- 56 | - name: Run Dialyzer 57 | uses: erlef/setup-beam@v1.15 58 | with: 59 | elixir-version: "1.13.3" # Define the elixir version [required] 60 | otp-version: "24.2.1" # Define the OTP version [required] 61 | - run: mix deps.get 62 | - run: mix dialyzer 63 | -------------------------------------------------------------------------------- /.github/workflows/common-config.yaml: -------------------------------------------------------------------------------- 1 | # This file is synced with beam-community/common-config. Any changes will be overwritten. 2 | 3 | name: Common Config 4 | 5 | on: 6 | push: 7 | branches: 8 | - main 9 | paths: 10 | - .github/workflows/common-config.yaml 11 | repository_dispatch: 12 | types: 13 | - common-config 14 | schedule: 15 | - cron: "8 12 8 * *" 16 | workflow_dispatch: {} 17 | 18 | concurrency: 19 | group: Common Config 20 | 21 | jobs: 22 | Sync: 23 | runs-on: ubuntu-latest 24 | 25 | steps: 26 | - name: Checkout 27 | uses: actions/checkout@v4 28 | with: 29 | token: ${{ secrets.GH_PERSONAL_ACCESS_TOKEN }} 30 | persist-credentials: true 31 | 32 | - name: Setup Node 33 | uses: actions/setup-node@v4 34 | with: 35 | node-version: 20 36 | 37 | - name: Setup Elixir 38 | uses: stordco/actions-elixir/setup@v1 39 | with: 40 | github-token: ${{ secrets.GH_PERSONAL_ACCESS_TOKEN }} 41 | elixir-version: "1.15" 42 | otp-version: "26.0" 43 | 44 | - name: Sync 45 | uses: stordco/actions-sync@v1 46 | with: 47 | commit-message: "chore: sync files with beam-community/common-config" 48 | pr-enabled: true 49 | pr-labels: common-config 50 | pr-title: "chore: sync files with beam-community/common-config" 51 | pr-token: ${{ secrets.GH_PERSONAL_ACCESS_TOKEN }} 52 | sync-auth: doomspork:${{ secrets.GH_PERSONAL_ACCESS_TOKEN }} 53 | sync-branch: latest 54 | sync-repository: github.com/beam-community/common-config.git 55 | -------------------------------------------------------------------------------- /.github/workflows/deploy.yml: -------------------------------------------------------------------------------- 1 | name: Publish to Hex.pm 2 | 3 | on: 4 | release: 5 | types: [published] 6 | 7 | jobs: 8 | Publish: 9 | runs-on: ubuntu-latest 10 | env: 11 | HEX_API_KEY: ${{ secrets.HEXPM_SECRET }} 12 | steps: 13 | - uses: actions/checkout@v2 14 | - uses: erlef/setup-beam@v1.15 15 | with: 16 | elixir-version: '1.13.3' 17 | otp-version: '24.2.1' 18 | - run: mix deps.get 19 | - run: mix compile --docs 20 | - run: mix hex.publish --yes 21 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # The directory Mix will write compiled artifacts to. 2 | /_build 3 | 4 | # If you run "mix test --cover", coverage assets end up here. 5 | /cover 6 | 7 | # The directory Mix downloads your dependencies sources to. 8 | /deps 9 | 10 | # Where 3rd-party dependencies like ExDoc output generated docs. 11 | /doc 12 | 13 | # Ignore .fetch files in case you like to edit your project deps locally. 14 | /.fetch 15 | 16 | # If the VM crashes, it generates a dump, let's ignore it too. 17 | erl_crash.dump 18 | 19 | # Also ignore archive artifacts (built via "mix archive.build"). 20 | *.ez 21 | 22 | .elixir_ls 23 | -------------------------------------------------------------------------------- /.iex.exs: -------------------------------------------------------------------------------- 1 | alias AvroEx.Schema 2 | alias AvroEx.Schema.{Context, Primitive} 3 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # Changelog 2 | 3 | ## v2.2.0 - July 31st, 2024 4 | 5 | ### Added 6 | 7 | - Support for encoding and decoding Decimals 8 | 9 | ### Fixed 10 | 11 | - Incorrect error for decimal encoding 12 | - String.slice deprecation warning 13 | 14 | ## v2.1.0 - March 28th, 2023 15 | 16 | ### Added 17 | 18 | - Support for tagged unions in named fields 19 | - `AvroEx.encode/3` supports a `:include_block_byte_size` option for encoding block size 20 | 21 | ### Fixed 22 | 23 | - Allow empty strings as namespaces 24 | - Fix `encodeable?` checks for int and float 25 | - Set time values to proper precision on decoding 26 | - Fixed reference type name generation 27 | 28 | ## v2.0.1 - April 3rd, 2022 29 | 30 | ### Fixed 31 | 32 | - Fixed bug where Array and Map children of Unions would fail to parse 33 | 34 | ## v2.0.0 - March 8th, 2022 35 | 36 | ### Changed 37 | 38 | - `AvroEx.encode/2` now returns `{:error, AvroEx.EncodeError.t()}` in the case of an error 39 | - Primitive integer types now represented as `%Primitive{type: :int}` instead of `%Primitive{type: :integer}` 40 | - Primitive null types now represented as `%Primitive{type: :null}` instead of `%Primitive{type: nil}` 41 | - Schema decoding now supports directly passing Elixir terms, will strictly validate the schema, and produce helpful error messages 42 | - Removed `Ecto` as a dependency 43 | - `AvroEx.Schema.full_name/2` - reverses the order of the arguments, accepting a Schema type or name, followed by the namespace 44 | 45 | ### Added 46 | 47 | - `AvroEx.encode!/2` - identical to `encode/2`, but returns raw value. Raises on error 48 | - `AvroEx.decode!/2` - identical to `decode/2`, but returns raw value. Raises on error 49 | - `AvroEx.decode_schema/1` and `AvroEx.decode_schema!/` in place of `AvroEx.parse_schema/1` 50 | - Support for encoding and decoding `date` logical times to and from `Date.t()` 51 | - Schema decoding adds a `:strict` option that will strictly validate the schema for unrecognized fields 52 | - `AvroEx.encode_schema/2` - encode a `AvroEx.Schema.t()` back to JSON. Supports encoding the schema to [Parsing Canonical Form](https://avro.apache.org/docs/current/spec.html#Parsing+Canonical+Form+for+Schemas) 53 | - `AvroEx.Schema.namespace/2` - Returns the namespace of the given Schema type 54 | 55 | ### Deprecated 56 | 57 | - `AvroEx.parse_schema/1` 58 | - `AvroEx.parse_schema!/1` 59 | - `AvroEx.named_type!/2` 60 | 61 | ## v1.2.0 - Februrary 20th 2022 62 | 63 | ### Fixed 64 | 65 | - Fix exception when encoding bad Record data 66 | - Address dialyzer issues 67 | - Add type for AvroEx.Schema.Record.Field to fix compilation error 68 | - Fix long encoding 69 | - Fix variable integer and long decoding 70 | 71 | ### Added 72 | 73 | - Support encoding DateTime and Time to logical types in Union 74 | 75 | ### Changed 76 | 77 | - Records can accept atoms for encoding keys 78 | - String values can accept atoms for encoding 79 | - Enums can accept atoms for encoding 80 | - Simplify integer and long encoding 81 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # AvroEx 2 | 3 | An [Avro](https://avro.apache.org/) encoding/decoding library written in pure Elixir. 4 | 5 | ## Documentation 6 | 7 | The docs can be found on [hex.pm](https://hexdocs.pm/avro_ex/AvroEx.html) 8 | 9 | ## Installation 10 | 11 | ```elixir 12 | def deps do 13 | [{:avro_ex, "~> 2.0"}] 14 | end 15 | ``` 16 | 17 | ## Usage 18 | 19 | ### Schema Decoding 20 | 21 | Avro uses schemas to define the shape and contract for data. The schemas that your 22 | application uses may be defined locally, or may come from a [Schema Registry](https://docs.confluent.io/platform/current/schema-registry/index.html). 23 | 24 | In either case, the first step is to decode a schema defined as JSON or Elixir terms into a `t:AvroEx.Schema.t/0` 25 | 26 | ```elixir 27 | iex> AvroEx.decode_schema!(["int", "string"]) 28 | %AvroEx.Schema{ 29 | context: %AvroEx.Schema.Context{names: %{}}, 30 | schema: %AvroEx.Schema.Union{ 31 | possibilities: [ 32 | %AvroEx.Schema.Primitive{metadata: %{}, type: :int}, 33 | %AvroEx.Schema.Primitive{metadata: %{}, type: :string} 34 | ] 35 | } 36 | } 37 | ``` 38 | 39 | `AvroEx` will automatically detect Elixir terms or JSON, so you can decode JSON schemas directly 40 | 41 | ``` elixir 42 | iex> AvroEx.decode_schema!("[\"int\",\"string\"]") 43 | %AvroEx.Schema{ 44 | context: %AvroEx.Schema.Context{names: %{}}, 45 | schema: %AvroEx.Schema.Union{ 46 | possibilities: [ 47 | %AvroEx.Schema.Primitive{metadata: %{}, type: :int}, 48 | %AvroEx.Schema.Primitive{metadata: %{}, type: :string} 49 | ] 50 | } 51 | } 52 | ``` 53 | 54 | #### Strict Schema Decoding 55 | 56 | When writing an Avro schema, it is helpful to get feedback on unrecognized fields. For this purpose, 57 | it is recommended to use the `:strict` option to provide additional checks. Note that it is not 58 | recommended to use this option in production when pulling externally defined schemas, as they may 59 | have published a schema with looser validations. 60 | 61 | ``` elixir 62 | iex> AvroEx.decode_schema!(%{"type" => "map", "values" => "int", "bogus" => "value"}, strict: true) 63 | ** (AvroEx.Schema.DecodeError) Unrecognized schema key `bogus` for AvroEx.Schema.Map in %{"bogus" => "value", "type" => "map", "values" => "int"} 64 | (avro_ex 1.2.0) lib/avro_ex/schema/parser.ex:43: AvroEx.Schema.Parser.parse!/2 65 | ``` 66 | 67 | 68 | ## Encoding 69 | 70 | When publishing Avro data, it first must be encoded using the schema. 71 | 72 | ```elixir 73 | iex> schema = AvroEx.decode_schema!(%{ 74 | "type" => "record", 75 | "name" => "MyRecord", 76 | "fields" => [ 77 | %{"name" => "a", "type" => "int"}, 78 | %{"name" => "b", "type" => "string"}, 79 | ] 80 | }) 81 | iex> AvroEx.encode!(schema, %{a: 1, b: "two"}) 82 | <<2, 6, 116, 119, 111> 83 | ``` 84 | 85 | ## Decoding 86 | 87 | When receiving Avro data, decode it using the schema 88 | 89 | ``` elixir 90 | iex> AvroEx.decode!(schema, <<2, 6, 116, 119, 111>>) 91 | %{"a" => 1, "b" => "two"} 92 | ``` 93 | 94 | ## Schema Encoding 95 | 96 | `AvroEx` also supports encoding schemas back to JSON. This may be needed when registering schemas or 97 | serializing them to disk. 98 | 99 | ``` elixir 100 | iex> AvroEx.encode_schema(schema) 101 | "{\"fields\":[{\"name\":\"a\",\"type\":{\"type\":\"int\"}},{\"name\":\"b\",\"type\":{\"type\":\"string\"}}],\"name\":\"MyRecord\",\"type\":\"record\"}" 102 | ``` 103 | 104 | Additionally, schemas can be encoded to [Parsing Canonical Form](https://avro.apache.org/docs/current/spec.html#Parsing+Canonical+Form+for+Schemas) using 105 | the `:canonical` option. 106 | 107 | ``` elixir 108 | iex> AvroEx.encode_schema(schema, canonical: true) 109 | "{\"name\":\"MyRecord\",\"type\":\"record\",\"fields\":[{\"name\":\"a\",\"type\":\"int\"},{\"name\":\"b\",\"type\":\"string\"}]}" 110 | ``` 111 | 112 | ### Testing 113 | 114 | For testing convenience, `AvroEx.encodable?/2` is exported to check if data can be 115 | encoded against the given schema. Note that in production scenarios, it is not 116 | recommended to use this function. 117 | 118 | ```elixir 119 | defmodule MyModule.Test do 120 | use ExUnit.Case 121 | 122 | setup do 123 | data = ... 124 | schema = ... 125 | {:ok, %{data: data, schema: schema}} 126 | end 127 | 128 | describe "my_function/1" do 129 | test "builds a structure that can be encoded with our avro schema", context do 130 | result = MyModule.my_function(context.data) 131 | 132 | assert AvroEx.encodable?(context.schema, result) 133 | end 134 | end 135 | end 136 | ``` 137 | -------------------------------------------------------------------------------- /lib/avro_ex.ex: -------------------------------------------------------------------------------- 1 | defmodule AvroEx do 2 | @moduledoc """ 3 | AvroEx is a library for encoding and decoding data with Avro schemas. 4 | Supports parsing schemas, encoding data, and decoding data. 5 | 6 | For encoding and decoding, the following type chart should be referenced: 7 | 8 | | Avro Types | Elixir Types | 9 | |------------|:------------:| 10 | | boolean | boolean | 11 | | integer | integer | 12 | | long | integer | 13 | | float | decimal | 14 | | double | decimal | 15 | | bytes | binary | 16 | | string | String.t, atom | 17 | | null | nil | 18 | | Record | map | 19 | | Enum | String.t, atom (corresponding to the enum's symbol list) | 20 | """ 21 | alias AvroEx.Schema 22 | alias AvroEx.Schema.Context 23 | 24 | @type encoded_avro :: binary 25 | 26 | @doc """ 27 | Checks to see if the given data is encodable using the given schema. Helpful for unit testing. 28 | 29 | iex> AvroEx.encodable?(%Schema{schema: %Primitive{type: :string}}, "wut") 30 | true 31 | 32 | iex> AvroEx.encodable?(%Schema{schema: %Primitive{type: :string}}, 12345) 33 | false 34 | """ 35 | @spec encodable?(AvroEx.Schema.t(), any) :: boolean 36 | defdelegate encodable?(schema, data), to: AvroEx.Schema 37 | 38 | @spec parse_schema(Schema.json_schema()) :: {:ok, Schema.t()} | {:error, AvroEx.Schema.DecodeError.t()} 39 | @deprecated "Use AvroEx.decode_schema/1 instead" 40 | def parse_schema(json), do: decode_schema(json, []) 41 | 42 | @spec parse_schema!(Schema.json_schema()) :: Schema.t() | no_return 43 | @deprecated "Use AvroEx.decode_schema!/1 instead" 44 | def parse_schema!(json), do: decode_schema!(json, []) 45 | 46 | @doc """ 47 | Given an Elixir or JSON-encoded schema, parses the schema and returns a `t:AvroEx.Schema.t/0` struct representing the schema. 48 | 49 | Errors for invalid JSON, invalid schemas, and bad name references. 50 | 51 | ## Options 52 | * `:strict` - whether to strictly validate the schema, defaults to `false`. Recommended to turn this on for locally owned schemas, but not for interop with external schemas. 53 | 54 | ## Examples 55 | 56 | iex> AvroEx.decode_schema("string") 57 | {:ok, %AvroEx.Schema{schema: %AvroEx.Schema.Primitive{type: :string}}} 58 | 59 | iex> json= ~S({\"fields\":[{\"name\":\"a\",\"type\":\"string\"}],\"name\":\"my_type\",\"type\":\"record\"}) 60 | iex> {:ok, %Schema{schema: record}} = AvroEx.decode_schema(json) 61 | iex> match?(%Record{}, record) 62 | true 63 | """ 64 | @spec decode_schema(term(), Keyword.t()) :: {:ok, Schema.t()} | {:error, AvroEx.Schema.DecodeError.t()} 65 | def decode_schema(schema, opts \\ []) do 66 | try do 67 | {:ok, decode_schema!(schema, opts)} 68 | rescue 69 | error -> {:error, error} 70 | end 71 | end 72 | 73 | @doc """ 74 | Same as `AvroEx.decode_schema/1`, but raises an exception on failure instead of 75 | returning an error tuple. 76 | 77 | ## Examples 78 | 79 | iex> AvroEx.decode_schema!("int") 80 | %AvroEx.Schema{schema: %AvroEx.Schema.Primitive{type: :int}} 81 | 82 | """ 83 | @spec decode_schema!(term(), Keyword.t()) :: Schema.t() 84 | def decode_schema!(schema, opts \\ []) do 85 | if is_binary(schema) and not Schema.Parser.primitive?(schema) do 86 | schema 87 | |> Jason.decode!() 88 | |> Schema.Parser.parse!(opts) 89 | else 90 | Schema.Parser.parse!(schema, opts) 91 | end 92 | end 93 | 94 | @doc """ 95 | Encodes the given schema to JSON 96 | 97 | ## Options 98 | * `canonical` - Encodes the schema into its [Parsing Canonical Form](https://avro.apache.org/docs/current/spec.html#Parsing+Canonical+Form+for+Schemas), default `false` 99 | 100 | ## Examples 101 | 102 | iex> schema = AvroEx.decode_schema!(%{"type" => "int", "logicalType" => "date"}) 103 | iex> AvroEx.encode_schema(schema) 104 | ~S({"type":"int","logicalType":"date"}) 105 | 106 | iex> schema = AvroEx.decode_schema!(%{"type" => "int", "logicalType" => "date"}) 107 | iex> AvroEx.encode_schema(schema, canonical: true) 108 | ~S("int") 109 | 110 | """ 111 | @spec encode_schema(Schema.t(), Keyword.t()) :: String.t() 112 | def encode_schema(%Schema{} = schema, opts \\ []) do 113 | AvroEx.Schema.Encoder.encode(schema, opts) 114 | end 115 | 116 | @doc """ 117 | Given `t:AvroEx.Schema.t/0` and `term()`, takes the data and encodes it according to the schema. 118 | 119 | ## Examples 120 | 121 | iex> schema = AvroEx.decode_schema!("int") 122 | iex> AvroEx.encode(schema, 1234) 123 | {:ok, <<164, 19>>} 124 | 125 | ## Tagged unions 126 | 127 | When supplying a union value one can optionally supply a tagged tuple of `{name, value}` 128 | instead of just the plain `value` to force encoding the value as the named type 129 | found using `name` instead of matching by the shape of `value`. This can improve 130 | performance and allows forcing a selected named type even if the shape of the 131 | data is the same. See also the "Tagged unions" section on `decode/3`. 132 | 133 | ## Array encoding 134 | 135 | Array encoding may add an additional `long` encoded integer to put the byte size 136 | of blocks with their counts. This allows consumers of the encoded data to skip 137 | over those blocks in an efficient manner. Using the option `include_block_byte_size: true` 138 | enables adding those additional values. 139 | """ 140 | @spec encode(Schema.t(), term, keyword()) :: 141 | {:ok, encoded_avro} | {:error, AvroEx.EncodeError.t() | Exception.t()} 142 | def encode(schema, data, opts \\ []) do 143 | AvroEx.Encode.encode(schema, data, opts) 144 | end 145 | 146 | @doc """ 147 | Same as `encode/2`, but returns the encoded value directly. 148 | 149 | Raises `t:AvroEx.EncodeError.t/0` on error. 150 | 151 | For documentation of `opts` see `encode/3`. 152 | 153 | ## Examples 154 | 155 | iex> schema = AvroEx.decode_schema!("boolean") 156 | iex> AvroEx.encode!(schema, true) 157 | <<1>> 158 | """ 159 | @spec encode!(Schema.t(), term(), keyword()) :: encoded_avro() 160 | def encode!(schema, data, opts \\ []) do 161 | case AvroEx.Encode.encode(schema, data, opts) do 162 | {:ok, data} -> data 163 | {:error, error} -> raise error 164 | end 165 | end 166 | 167 | @doc """ 168 | Given an encoded message and its accompanying schema, decodes the message. 169 | 170 | iex> schema = AvroEx.decode_schema!("boolean") 171 | iex> AvroEx.decode(schema, <<1>>) 172 | {:ok, true} 173 | 174 | ## Tagged unions 175 | 176 | When decoding one can set the option `tagged_unions: true` to decode union 177 | values as a tagged tuple of `{name, value}` instead of just the plain `value`. 178 | This allows to retain the information about which union schema was used for 179 | encoding when this cannot be infered from the `value` alone. 180 | 181 | ## Decimals 182 | 183 | Specify the option `decimals: :exact` to use `Decimal.new/3` to parse decimals 184 | into a Decimal struct with arbitrary precision. 185 | 186 | Otherwise, an approximate number is calculated. 187 | 188 | """ 189 | @spec decode(Schema.t(), encoded_avro, keyword()) :: 190 | {:ok, term} 191 | | {:error, AvroEx.DecodeError.t()} 192 | def decode(schema, message, opts \\ []) do 193 | case AvroEx.Decode.decode(schema, message, opts) do 194 | {:ok, value, _} -> {:ok, value} 195 | {:error, error} -> {:error, error} 196 | end 197 | end 198 | 199 | @doc """ 200 | Same as decode/2, but returns raw decoded value. 201 | 202 | Raises `t:AvroEx.DecodeError.t/0` on error. 203 | 204 | For documentation of `opts` see `decode/3`. 205 | 206 | ## Examples 207 | 208 | iex> schema = AvroEx.decode_schema!("string") 209 | iex> encoded = AvroEx.encode!(schema, "hello") 210 | iex> AvroEx.decode!(schema, encoded) 211 | "hello" 212 | """ 213 | @spec decode!(Schema.t(), encoded_avro(), keyword()) :: term() 214 | def decode!(schema, message, opts \\ []) do 215 | case AvroEx.Decode.decode(schema, message, opts) do 216 | {:ok, value, _} -> value 217 | {:error, error} -> raise error 218 | end 219 | end 220 | 221 | @deprecated "Use AvroEx.Schema.Context.lookup/2" 222 | @spec named_type(Schema.full_name(), Schema.t() | Context.t()) :: nil | Schema.schema_types() 223 | def named_type(name, %Schema{context: %Context{} = context}) when is_binary(name) do 224 | named_type(name, context) 225 | end 226 | 227 | def named_type(name, %Context{} = context) do 228 | Context.lookup(context, name) 229 | end 230 | end 231 | -------------------------------------------------------------------------------- /lib/avro_ex/decode.ex: -------------------------------------------------------------------------------- 1 | defmodule AvroEx.Decode do 2 | @moduledoc false 3 | 4 | require Bitwise 5 | alias AvroEx.{DecodeError} 6 | alias AvroEx.Schema 7 | alias AvroEx.Schema.{Array, Context, Fixed, Primitive, Record, Reference, Union} 8 | alias AvroEx.Schema.Record.Field 9 | 10 | @type reason :: term 11 | 12 | @seconds_in_day 24 * 60 * 60 13 | 14 | @doc false 15 | @spec decode(AvroEx.Schema.t(), binary(), keyword()) :: {:ok, any(), binary()} | {:error, AvroEx.DecodeError.t()} 16 | def decode(%Schema{schema: schema, context: context}, avro_message, opts \\ []) 17 | when is_binary(avro_message) do 18 | try do 19 | {value, rest} = do_decode(schema, context, avro_message, opts) 20 | {:ok, value, rest} 21 | catch 22 | :throw, %DecodeError{} = e -> {:error, e} 23 | end 24 | end 25 | 26 | defp do_decode(%Reference{type: name}, %Context{} = context, data, opts) do 27 | do_decode(Context.lookup(context, name), context, data, opts) 28 | end 29 | 30 | defp do_decode(%Primitive{type: :null}, %Context{}, data, _) when is_binary(data) do 31 | {nil, data} 32 | end 33 | 34 | defp do_decode(%Primitive{type: :boolean}, %Context{}, <<0::8, rest::binary>>, _) do 35 | {false, rest} 36 | end 37 | 38 | defp do_decode(%Primitive{type: :boolean}, %Context{}, <<1::8, rest::binary>>, _) do 39 | {true, rest} 40 | end 41 | 42 | defp do_decode(%Primitive{type: :int, metadata: %{"logicalType" => "date"}}, %Context{}, data, _) do 43 | {val, rest} = variable_integer_decode(data, 0, 0, 32) 44 | 45 | {:ok, datetime} = DateTime.from_unix(@seconds_in_day * zigzag_decode(val)) 46 | date = DateTime.to_date(datetime) 47 | 48 | {date, rest} 49 | end 50 | 51 | defp do_decode( 52 | %Primitive{type: :int, metadata: %{"logicalType" => "time-millis"}}, 53 | %Context{}, 54 | data, 55 | _ 56 | ) 57 | when is_binary(data) do 58 | {val, rest} = variable_integer_decode(data, 0, 0, 32) 59 | milliseconds = zigzag_decode(val) 60 | 61 | # Create new time with :millisecond precision 62 | # In 1.14.0 – 1.14.2 `Time.add` wouldn't increase the precision as needed 63 | # https://github.com/elixir-lang/elixir/issues/12303 64 | {:ok, midnight} = Time.new(0, 0, 0, {0, 3}) 65 | time = Time.add(midnight, milliseconds, :millisecond) 66 | 67 | {time, rest} 68 | end 69 | 70 | defp do_decode(%Primitive{type: :int}, %Context{}, data, _) when is_binary(data) do 71 | {val, rest} = variable_integer_decode(data, 0, 0, 32) 72 | {zigzag_decode(val), rest} 73 | end 74 | 75 | defp do_decode( 76 | %Primitive{type: :long, metadata: %{"logicalType" => "time-micros"}}, 77 | %Context{}, 78 | data, 79 | _ 80 | ) 81 | when is_binary(data) do 82 | {val, rest} = variable_integer_decode(data, 0, 0, 64) 83 | microseconds = zigzag_decode(val) 84 | 85 | # Create new time with :microsecond precision 86 | # In 1.14.0 – 1.14.2 `Time.add` wouldn't increase the precision as needed 87 | # https://github.com/elixir-lang/elixir/issues/12303 88 | {:ok, midnight} = Time.new(0, 0, 0, {0, 6}) 89 | time = Time.add(midnight, microseconds, :microsecond) 90 | 91 | {time, rest} 92 | end 93 | 94 | defp do_decode( 95 | %Primitive{type: :long, metadata: %{"logicalType" => "timestamp-nanos"}}, 96 | %Context{}, 97 | data, 98 | _ 99 | ) 100 | when is_binary(data) do 101 | {val, rest} = variable_integer_decode(data, 0, 0, 64) 102 | nanoseconds = zigzag_decode(val) 103 | {:ok, date_time} = DateTime.from_unix(nanoseconds, :nanosecond) 104 | {date_time, rest} 105 | end 106 | 107 | defp do_decode( 108 | %Primitive{type: :long, metadata: %{"logicalType" => "timestamp-micros"}}, 109 | %Context{}, 110 | data, 111 | _ 112 | ) 113 | when is_binary(data) do 114 | {val, rest} = variable_integer_decode(data, 0, 0, 64) 115 | microseconds = zigzag_decode(val) 116 | {:ok, date_time} = DateTime.from_unix(microseconds, :microsecond) 117 | {date_time, rest} 118 | end 119 | 120 | defp do_decode( 121 | %Primitive{type: :long, metadata: %{"logicalType" => "timestamp-millis"}}, 122 | %Context{}, 123 | data, 124 | _ 125 | ) 126 | when is_binary(data) do 127 | {val, rest} = variable_integer_decode(data, 0, 0, 64) 128 | milliseconds = zigzag_decode(val) 129 | {:ok, date_time} = DateTime.from_unix(milliseconds, :millisecond) 130 | {date_time, rest} 131 | end 132 | 133 | defp do_decode( 134 | %Primitive{type: :bytes, metadata: %{"logicalType" => "decimal"} = metadata}, 135 | %Context{} = context, 136 | data, 137 | opts 138 | ) 139 | when is_binary(data) do 140 | scale = Map.get(metadata, "scale", 0) 141 | {bytes, rest} = do_decode(%Primitive{type: :bytes}, context, data, opts) 142 | 143 | size = bit_size(bytes) 144 | <> = bytes 145 | 146 | number = 147 | if :exact == Keyword.get(opts, :decimals) do 148 | # avoid undefined cross reference for optional dependency 149 | decimal = Decimal 150 | decimal.new(if(unscaled >= 0, do: 1, else: -1), abs(unscaled), -scale) 151 | else 152 | unscaled * :math.pow(10, -scale) 153 | end 154 | 155 | {number, rest} 156 | end 157 | 158 | defp do_decode(%Primitive{type: :long}, %Context{}, data, _) when is_binary(data) do 159 | {val, rest} = variable_integer_decode(data, 0, 0, 64) 160 | {zigzag_decode(val), rest} 161 | end 162 | 163 | defp do_decode(%Primitive{type: :float}, %Context{}, data, _) when is_binary(data) do 164 | <> = data 165 | {float, rest} 166 | end 167 | 168 | defp do_decode(%Primitive{type: :double}, %Context{}, data, _) when is_binary(data) do 169 | <> = data 170 | {float, rest} 171 | end 172 | 173 | defp do_decode(%Primitive{type: :bytes}, %Context{} = context, data, opts) when is_binary(data) do 174 | {byte_count, buffer} = do_decode(%Primitive{type: :long}, context, data, opts) 175 | bit_count = byte_count * 8 176 | 177 | <> = buffer 178 | 179 | {bytes, rest} 180 | end 181 | 182 | defp do_decode(%Primitive{type: :string}, %Context{} = context, data, opts) when is_binary(data) do 183 | {str, rest} = do_decode(%Primitive{type: :bytes}, context, data, opts) 184 | 185 | if String.valid?(str) do 186 | {str, rest} 187 | else 188 | error({:invalid_string, str}) 189 | end 190 | end 191 | 192 | defp do_decode(%Record{} = record, %Context{} = context, data, opts) when is_binary(data) do 193 | {decoded, buffer} = 194 | Enum.reduce(record.fields, {[], data}, fn field, {decoded, buffer} -> 195 | {val, buff} = do_decode(field, context, buffer, opts) 196 | {[val | decoded], buff} 197 | end) 198 | 199 | decoded_map = 200 | decoded 201 | |> Enum.reverse() 202 | |> Enum.zip(record.fields) 203 | |> Enum.map(fn {val, %Field{name: name}} -> 204 | {name, val} 205 | end) 206 | |> Map.new() 207 | 208 | {decoded_map, buffer} 209 | end 210 | 211 | defp do_decode(%Field{type: type}, %Context{} = context, data, opts) when is_binary(data) do 212 | do_decode(type, context, data, opts) 213 | end 214 | 215 | defp do_decode(%Union{possibilities: possibilities}, %Context{} = context, data, opts) 216 | when is_binary(data) do 217 | {index, index_rest} = do_decode(%Primitive{type: :long}, context, data, opts) 218 | schema = :lists.nth(index + 1, possibilities) 219 | 220 | {decoded_item, rest} = do_decode(schema, context, index_rest, opts) 221 | 222 | if Keyword.get(opts, :tagged_unions, false) and Map.has_key?(schema, :name) do 223 | {{schema.name, decoded_item}, rest} 224 | else 225 | {decoded_item, rest} 226 | end 227 | end 228 | 229 | defp do_decode(%Array{items: item_schema}, %Context{} = context, data, opts) when is_binary(data) do 230 | {count, buffer} = 231 | with {count, rest} when count < 0 <- 232 | do_decode(%Primitive{type: :long}, context, data, opts) do 233 | {_byte_size, buffer} = do_decode(%Primitive{type: :long}, context, rest, opts) 234 | {abs(count), buffer} 235 | end 236 | 237 | if count > 0 do 238 | {decoded_items, rest} = 239 | Enum.reduce(1..count, {[], buffer}, fn _, {decoded_items, buffer} -> 240 | {decoded_item, buffer} = do_decode(item_schema, context, buffer, opts) 241 | {[decoded_item | decoded_items], buffer} 242 | end) 243 | 244 | {Enum.reverse(decoded_items), String.slice(rest, 1..-1//1)} 245 | else 246 | {[], buffer} 247 | end 248 | end 249 | 250 | defp do_decode(%AvroEx.Schema.Map{values: value_schema}, %Context{} = context, data, opts) when is_binary(data) do 251 | {count, buffer} = do_decode(%Primitive{type: :long}, context, data, opts) 252 | string_schema = %Primitive{type: :string} 253 | 254 | if count > 0 do 255 | {decoded_values, rest} = 256 | Enum.reduce(1..count, {[], buffer}, fn _, {decoded_values, buffer} -> 257 | {decoded_key, buffer} = do_decode(string_schema, context, buffer, opts) 258 | {decoded_value, buffer} = do_decode(value_schema, context, buffer, opts) 259 | {[{decoded_key, decoded_value} | decoded_values], buffer} 260 | end) 261 | 262 | {Map.new(decoded_values), String.slice(rest, 1..-1//1)} 263 | else 264 | {%{}, buffer} 265 | end 266 | end 267 | 268 | defp do_decode(%AvroEx.Schema.Enum{symbols: symbols}, %Context{} = context, data, opts) when is_binary(data) do 269 | {index, rest} = do_decode(%Primitive{type: :long}, context, data, opts) 270 | {:lists.nth(index + 1, symbols), rest} 271 | end 272 | 273 | defp do_decode(%Fixed{size: size}, %Context{}, data, _) when is_binary(data) do 274 | <> = data 275 | {fixed, rest} 276 | end 277 | 278 | @doc false 279 | @spec zigzag_decode(integer()) :: integer() 280 | def zigzag_decode(int) do 281 | int 282 | |> Bitwise.bsr(1) 283 | |> Bitwise.bxor(-Bitwise.band(int, 1)) 284 | end 285 | 286 | @doc false 287 | @spec variable_integer_decode(bitstring(), integer(), integer(), integer()) :: {integer(), bitstring()} 288 | def variable_integer_decode(<>, acc, acc_bits, max_bits) do 289 | # assertion 290 | true = acc_bits < max_bits 291 | 292 | new_acc = 293 | value 294 | |> Bitwise.bsl(acc_bits) 295 | |> Bitwise.bor(acc) 296 | 297 | case tag do 298 | 0 -> {new_acc, tail} 299 | 1 -> variable_integer_decode(tail, new_acc, acc_bits + 7, max_bits) 300 | end 301 | end 302 | 303 | @compile {:inline, error: 1} 304 | defp error(error) do 305 | error |> AvroEx.DecodeError.new() |> throw() 306 | end 307 | end 308 | -------------------------------------------------------------------------------- /lib/avro_ex/decode_error.ex: -------------------------------------------------------------------------------- 1 | defmodule AvroEx.DecodeError do 2 | @moduledoc """ 3 | Exceptions in decoding Avro data 4 | """ 5 | 6 | defexception [:message] 7 | 8 | @type t :: %__MODULE__{} 9 | 10 | @spec new(tuple()) :: t() 11 | def new({:invalid_string, str}) do 12 | message = "Invalid UTF-8 string found #{inspect(str)}." 13 | %__MODULE__{message: message} 14 | end 15 | end 16 | -------------------------------------------------------------------------------- /lib/avro_ex/encode.ex: -------------------------------------------------------------------------------- 1 | defmodule AvroEx.Encode do 2 | @moduledoc false 3 | 4 | require Bitwise 5 | 6 | alias AvroEx.EncodeError 7 | alias AvroEx.{Schema} 8 | alias AvroEx.Schema.{Array, Context, Fixed, Primitive, Record, Reference, Union} 9 | alias AvroEx.Schema.Enum, as: AvroEnum 10 | alias AvroEx.Schema.Record.Field 11 | 12 | @type reason :: term 13 | 14 | @seconds_in_day 24 * 60 * 60 15 | 16 | @doc false 17 | @spec encode(Schema.t(), term, keyword()) :: {:ok, AvroEx.encoded_avro()} | {:error, EncodeError.t() | Exception.t()} 18 | def encode(%Schema{context: %Context{} = context, schema: schema}, data, opts \\ []) do 19 | try do 20 | {:ok, do_encode(schema, context, data, opts)} 21 | catch 22 | :throw, %EncodeError{} = e -> {:error, e} 23 | end 24 | end 25 | 26 | defp do_encode(%Reference{type: type}, %Context{} = context, data, opts) do 27 | do_encode(Context.lookup(context, type), context, data, opts) 28 | end 29 | 30 | defp do_encode(%Primitive{type: :boolean}, %Context{}, true, _), do: <<1::8>> 31 | defp do_encode(%Primitive{type: :boolean}, %Context{}, false, _), do: <<0::8>> 32 | defp do_encode(%Primitive{type: :null}, %Context{}, nil, _), do: <<>> 33 | 34 | defp do_encode(%Primitive{type: :float}, %Context{}, float, _) when is_float(float), 35 | do: <> 36 | 37 | defp do_encode(%Primitive{type: :double}, %Context{}, double, _) when is_float(double), 38 | do: <> 39 | 40 | defp do_encode( 41 | %Primitive{type: :int, metadata: %{"logicalType" => "date"}} = schema, 42 | %Context{}, 43 | %Date{} = date, 44 | _ 45 | ) do 46 | date 47 | |> DateTime.new!(~T[00:00:00]) 48 | |> DateTime.to_unix(:second) 49 | |> Kernel.div(@seconds_in_day) 50 | |> encode_integer(schema) 51 | end 52 | 53 | defp do_encode( 54 | %Primitive{type: :long, metadata: %{"logicalType" => "timestamp-nanos"}} = schema, 55 | %Context{}, 56 | %DateTime{} = dt, 57 | _ 58 | ) do 59 | dt 60 | |> DateTime.to_unix(:nanosecond) 61 | |> encode_integer(schema) 62 | end 63 | 64 | defp do_encode( 65 | %Primitive{type: :long, metadata: %{"logicalType" => "timestamp-micros"}} = schema, 66 | %Context{}, 67 | %DateTime{} = dt, 68 | _ 69 | ) do 70 | dt 71 | |> DateTime.to_unix(:microsecond) 72 | |> encode_integer(schema) 73 | end 74 | 75 | defp do_encode( 76 | %Primitive{type: :long, metadata: %{"logicalType" => "timestamp-millis"}} = schema, 77 | %Context{}, 78 | %DateTime{} = dt, 79 | _ 80 | ) do 81 | dt 82 | |> DateTime.to_unix(:millisecond) 83 | |> encode_integer(schema) 84 | end 85 | 86 | defp do_encode( 87 | %Primitive{type: :long, metadata: %{"logicalType" => "time-micros"}} = schema, 88 | %Context{}, 89 | %Time{} = dt, 90 | _ 91 | ) do 92 | {:ok, midnight} = Time.new(0, 0, 0) 93 | 94 | dt 95 | |> Time.diff(midnight, :microsecond) 96 | |> encode_integer(schema) 97 | end 98 | 99 | defp do_encode( 100 | %Primitive{type: :int, metadata: %{"logicalType" => "time-millis"}} = schema, 101 | %Context{}, 102 | %Time{} = dt, 103 | _ 104 | ) do 105 | {:ok, midnight} = Time.new(0, 0, 0) 106 | 107 | dt 108 | |> Time.diff(midnight, :millisecond) 109 | |> encode_integer(schema) 110 | end 111 | 112 | defp do_encode( 113 | %Primitive{type: :bytes, metadata: %{"logicalType" => "decimal"} = metadata}, 114 | %Context{} = context, 115 | value, 116 | opts 117 | ) do 118 | scale = Map.get(metadata, "scale", 0) 119 | 120 | unscaled = 121 | case value do 122 | value when is_number(value) -> 123 | trunc(value / :math.pow(10, -scale)) 124 | 125 | %struct{} when struct == Decimal -> 126 | if value.exp != -scale do 127 | error({:incompatible_decimal, -scale, value.exp}) 128 | end 129 | 130 | value.coef * value.sign 131 | end 132 | 133 | number_of_bits = value_size(unscaled) 134 | 135 | bin = <> 136 | do_encode(%Primitive{type: :bytes}, context, bin, opts) 137 | end 138 | 139 | defp do_encode(%Primitive{type: :long} = schema, %Context{}, long, _) when is_integer(long) do 140 | encode_integer(long, schema) 141 | end 142 | 143 | defp do_encode(%Primitive{type: :int} = schema, %Context{}, integer, _) 144 | when is_integer(integer) do 145 | encode_integer(integer, schema) 146 | end 147 | 148 | defp do_encode(%Primitive{type: :string} = primitive, %Context{} = context, atom, opts) 149 | when is_atom(atom) and not (is_nil(atom) or is_boolean(atom)) do 150 | do_encode(primitive, context, to_string(atom), opts) 151 | end 152 | 153 | defp do_encode(%Primitive{type: :string}, %Context{} = context, str, opts) when is_binary(str) do 154 | if String.valid?(str) do 155 | do_encode(%Primitive{type: :bytes}, context, str, opts) 156 | else 157 | error({:invalid_string, str, context}) 158 | end 159 | end 160 | 161 | defp do_encode(%Primitive{type: :bytes}, %Context{} = context, bin, opts) when is_binary(bin) do 162 | byte_count = :erlang.size(bin) 163 | size = do_encode(%Primitive{type: :long}, context, byte_count, opts) 164 | size <> bin 165 | end 166 | 167 | defp do_encode(%Fixed{size: size}, %Context{}, bin, _) 168 | when is_binary(bin) and byte_size(bin) == size do 169 | bin 170 | end 171 | 172 | defp do_encode(%Fixed{} = fixed, %Context{} = context, bin, _) when is_binary(bin) do 173 | error({:incorrect_fixed_size, fixed, bin, context}) 174 | end 175 | 176 | defp do_encode(%Record{fields: fields}, %Context{} = context, record, opts) when is_map(record) do 177 | record = 178 | Map.new(record, fn 179 | {k, v} when is_binary(k) -> {k, v} 180 | {k, v} when is_atom(k) -> {to_string(k), v} 181 | end) 182 | 183 | Enum.map_join(fields, &do_encode(&1, context, record[&1.name], opts)) 184 | end 185 | 186 | defp do_encode(%Field{type: type, default: default}, %Context{} = context, nil, opts) do 187 | do_encode(type, context, default, opts) 188 | end 189 | 190 | defp do_encode(%Field{type: type}, %Context{} = context, value, opts) do 191 | do_encode(type, context, value, opts) 192 | end 193 | 194 | defp do_encode(%Union{possibilities: possibilities} = schema, %Context{} = context, {name, value} = original, opts) do 195 | index = 196 | Enum.find_index(possibilities, fn 197 | %{name: ^name} = possible_schema -> Schema.encodable?(possible_schema, context, value) 198 | _ -> false 199 | end) 200 | 201 | do_encode_union(schema, context, value, original, index, opts) 202 | end 203 | 204 | defp do_encode(%Union{possibilities: possibilities} = schema, %Context{} = context, value, opts) do 205 | index = 206 | Enum.find_index(possibilities, fn possible_schema -> 207 | Schema.encodable?(possible_schema, context, value) 208 | end) 209 | 210 | do_encode_union(schema, context, value, value, index, opts) 211 | end 212 | 213 | defp do_encode(%AvroEx.Schema.Map{values: values}, %Context{} = context, map, opts) when is_map(map) do 214 | case map_size(map) do 215 | 0 -> 216 | <<0>> 217 | 218 | size -> 219 | acc = do_encode(%Primitive{type: :long}, context, size, opts) 220 | 221 | encoded_map = 222 | Enum.reduce(map, acc, fn {k, v}, acc -> 223 | key = do_encode(%Primitive{type: :string}, context, k, opts) 224 | value = do_encode(values, context, v, opts) 225 | 226 | acc <> key <> value 227 | end) 228 | 229 | encoded_map <> <<0>> 230 | end 231 | end 232 | 233 | defp do_encode(%Array{items: items}, %Context{} = context, data, opts) when is_list(data) do 234 | case length(data) do 235 | 0 -> 236 | <<0>> 237 | 238 | size -> 239 | array_payload = 240 | Enum.reduce(data, <<>>, fn v, acc -> 241 | value = do_encode(items, context, v, opts) 242 | 243 | acc <> value 244 | end) 245 | 246 | header = 247 | if Keyword.get(opts, :include_block_byte_size, false) do 248 | negated_count = do_encode(%Primitive{type: :long}, context, -1 * size, opts) 249 | byte_size = do_encode(%Primitive{type: :long}, context, byte_size(array_payload), opts) 250 | negated_count <> byte_size 251 | else 252 | do_encode(%Primitive{type: :long}, context, size, opts) 253 | end 254 | 255 | header <> array_payload <> <<0>> 256 | end 257 | end 258 | 259 | defp do_encode(%AvroEnum{} = enum, %Context{} = context, atom, opts) when is_atom(atom) do 260 | do_encode(enum, context, to_string(atom), opts) 261 | end 262 | 263 | defp do_encode(%AvroEnum{symbols: symbols} = enum, %Context{} = context, data, opts) when is_binary(data) do 264 | if data in symbols do 265 | index = Enum.find_index(symbols, fn e -> e == data end) 266 | do_encode(%Primitive{type: :long}, context, index, opts) 267 | else 268 | error({:invalid_symbol, enum, data, context}) 269 | end 270 | end 271 | 272 | defp do_encode(schema, context, data, _) do 273 | error({:schema_mismatch, schema, data, context}) 274 | end 275 | 276 | defp do_encode_union( 277 | %Union{possibilities: possibilities} = schema, 278 | %Context{} = context, 279 | value, 280 | original, 281 | index, 282 | opts 283 | ) do 284 | if index do 285 | schema = Enum.at(possibilities, index) 286 | 287 | do_encode(%Primitive{type: :int}, context, index, opts) <> do_encode(schema, context, value, opts) 288 | else 289 | error({:schema_mismatch, schema, original, context}) 290 | end 291 | end 292 | 293 | @doc false 294 | @spec zigzag_encode(Primitive.t(), integer) :: integer 295 | def zigzag_encode(%Primitive{type: :int}, int) when is_integer(int) do 296 | int 297 | |> Bitwise.bsl(1) 298 | |> Bitwise.bxor(Bitwise.bsr(int, 31)) 299 | end 300 | 301 | def zigzag_encode(%Primitive{type: :long}, long) when is_integer(long) do 302 | long 303 | |> Bitwise.bsl(1) 304 | |> Bitwise.bxor(Bitwise.bsr(long, 63)) 305 | end 306 | 307 | @doc false 308 | @spec variable_integer_encode(integer()) :: <<_::8, _::_*8>> 309 | def variable_integer_encode(value) when value <= 127, do: <> 310 | 311 | def variable_integer_encode(value) do 312 | <<128 + Bitwise.band(value, 127)>> <> variable_integer_encode(Bitwise.bsr(value, 7)) 313 | end 314 | 315 | defp encode_integer(int, schema) do 316 | schema 317 | |> zigzag_encode(int) 318 | |> variable_integer_encode 319 | end 320 | 321 | defp value_size(value, bits \\ 8) when is_number(value) do 322 | if :math.pow(2, bits) > abs(value) do 323 | bits 324 | else 325 | value_size(value, bits + 8) 326 | end 327 | end 328 | 329 | @compile {:inline, error: 1} 330 | defp error(error) do 331 | error |> AvroEx.EncodeError.new() |> throw() 332 | end 333 | end 334 | -------------------------------------------------------------------------------- /lib/avro_ex/encode_error.ex: -------------------------------------------------------------------------------- 1 | defmodule AvroEx.EncodeError do 2 | @moduledoc """ 3 | Exceptions in encoding Avro data 4 | """ 5 | defexception [:message] 6 | 7 | @type t :: %__MODULE__{} 8 | 9 | @spec new(tuple()) :: t() 10 | def new({:schema_mismatch, schema, value, _context}) do 11 | type = AvroEx.Schema.type_name(schema) 12 | 13 | %__MODULE__{message: "Schema Mismatch: Expected value of #{type}, got #{inspect(value)}"} 14 | end 15 | 16 | def new({:invalid_string, str, _context}) do 17 | %__MODULE__{message: "Invalid string \"#{inspect(str)}\""} 18 | end 19 | 20 | def new({:invalid_symbol, enum, value, _context}) do 21 | type = AvroEx.Schema.type_name(enum) 22 | 23 | %__MODULE__{ 24 | message: "Invalid symbol for #{type}. Expected value in #{inspect(enum.symbols)}, got #{inspect(value)}" 25 | } 26 | end 27 | 28 | def new({:incorrect_fixed_size, fixed, binary, _context}) do 29 | type = AvroEx.Schema.type_name(fixed) 30 | 31 | %__MODULE__{ 32 | message: "Invalid size for #{type}. Size of #{byte_size(binary)} for #{inspect(binary)}" 33 | } 34 | end 35 | 36 | def new({:incompatible_decimal, expected_scale, actual_scale}) do 37 | %__MODULE__{ 38 | message: "Incompatible decimal: expected scale #{expected_scale}, got #{actual_scale}" 39 | } 40 | end 41 | end 42 | -------------------------------------------------------------------------------- /lib/avro_ex/schema.ex: -------------------------------------------------------------------------------- 1 | defmodule AvroEx.Schema do 2 | use TypedStruct 3 | 4 | alias AvroEx.{Schema} 5 | alias AvroEx.Schema.Enum, as: AvroEnum 6 | alias AvroEx.Schema.Map, as: AvroMap 7 | alias AvroEx.Schema.Record.Field 8 | alias AvroEx.Schema.{Array, Context, Fixed, Primitive, Record, Reference, Union} 9 | 10 | @type schema_types :: 11 | Array.t() 12 | | Enum.t() 13 | | Fixed.t() 14 | | AvroMap.t() 15 | | Record.t() 16 | | Primitive.t() 17 | | Union.t() 18 | | Reference.t() 19 | 20 | @type named_type :: 21 | AvroEnum.t() 22 | | Fixed.t() 23 | | Record.t() 24 | 25 | typedstruct do 26 | field :context, Context.t(), default: %Context{} 27 | field :schema, schema_types() 28 | end 29 | 30 | @type name :: String.t() 31 | @type namespace :: nil | String.t() 32 | @type full_name :: String.t() 33 | @type doc :: nil | String.t() 34 | @type metadata :: %{String.t() => String.t()} 35 | @type alias :: name 36 | 37 | @type json_schema :: String.t() 38 | 39 | @spec encodable?(AvroEx.Schema.t(), any()) :: boolean() 40 | def encodable?(%Schema{schema: schema, context: context}, data) do 41 | encodable?(schema, context, data) 42 | end 43 | 44 | @int32_range -2_147_483_648..2_147_483_647 45 | @int64_range -9_223_372_036_854_775_808..9_223_372_036_854_775_807 46 | 47 | @spec encodable?(any(), any(), any()) :: boolean() 48 | def encodable?(%Primitive{type: :null}, _, nil), do: true 49 | def encodable?(%Primitive{type: :boolean}, _, bool) when is_boolean(bool), do: true 50 | def encodable?(%Primitive{type: :int}, _, n) when is_integer(n) and n in @int32_range, do: true 51 | def encodable?(%Primitive{type: :long}, _, n) when is_integer(n) and n in @int64_range, do: true 52 | 53 | def encodable?(%Primitive{type: :float}, _, n) when is_float(n) do 54 | match?(<<^n::little-float-size(32)>>, <>) 55 | end 56 | 57 | def encodable?(%Primitive{type: :double}, _, n) when is_float(n), do: true 58 | def encodable?(%Primitive{type: :bytes}, _, bytes) when is_binary(bytes), do: true 59 | def encodable?(%Primitive{type: :string}, _, str) when is_binary(str), do: String.valid?(str) 60 | 61 | def encodable?(%Primitive{type: :string}, _, atom) when is_atom(atom) do 62 | if is_nil(atom) or is_boolean(atom) do 63 | false 64 | else 65 | atom |> to_string() |> String.valid?() 66 | end 67 | end 68 | 69 | def encodable?(%Primitive{type: :long, metadata: %{"logicalType" => "timestamp-nanos"}}, _, %DateTime{}), do: true 70 | def encodable?(%Primitive{type: :long, metadata: %{"logicalType" => "timestamp-micros"}}, _, %DateTime{}), do: true 71 | def encodable?(%Primitive{type: :long, metadata: %{"logicalType" => "timestamp-millis"}}, _, %DateTime{}), do: true 72 | def encodable?(%Primitive{type: :long, metadata: %{"logicalType" => "time-micros"}}, _, %Time{}), do: true 73 | def encodable?(%Primitive{type: :int, metadata: %{"logicalType" => "time-millis"}}, _, %Time{}), do: true 74 | def encodable?(%Primitive{type: :int, metadata: %{"logicalType" => "date"}}, _, %Date{}), do: true 75 | 76 | def encodable?(%Record{} = record, %Context{} = context, data) when is_map(data), 77 | do: Record.match?(record, context, data) 78 | 79 | def encodable?(%Field{} = field, %Context{} = context, data), 80 | do: Field.match?(field, context, data) 81 | 82 | def encodable?(%Union{} = union, %Context{} = context, data), 83 | do: Union.match?(union, context, data) 84 | 85 | def encodable?(%Fixed{} = fixed, %Context{} = context, data), 86 | do: Fixed.match?(fixed, context, data) 87 | 88 | def encodable?(%AvroMap{} = schema, %Context{} = context, data) when is_map(data) do 89 | AvroMap.match?(schema, context, data) 90 | end 91 | 92 | def encodable?(%Array{} = schema, %Context{} = context, data) when is_list(data) do 93 | Array.match?(schema, context, data) 94 | end 95 | 96 | def encodable?(%AvroEnum{} = schema, %Context{} = context, data) when is_atom(data) do 97 | AvroEnum.match?(schema, context, to_string(data)) 98 | end 99 | 100 | def encodable?(%AvroEnum{} = schema, %Context{} = context, data) when is_binary(data) do 101 | AvroEnum.match?(schema, context, data) 102 | end 103 | 104 | def encodable?(%Reference{type: name}, %Context{} = context, data) do 105 | schema = Context.lookup(context, name) 106 | encodable?(schema, context, data) 107 | end 108 | 109 | def encodable?(_, _, _), do: false 110 | 111 | @doc """ 112 | The namespace of the given Schema type 113 | 114 | ## Examples 115 | iex> namespace(%Primitive{type: :string}) 116 | nil 117 | 118 | iex> namespace(%Record{name: "MyRecord"}, "namespace") 119 | "namespace" 120 | 121 | iex> namespace(%Record{name: "MyRecord", namespace: "inner"}, "namespace") 122 | "inner" 123 | 124 | iex> namespace(%Record{name: "qualified.MyRecord", namespace: "inner"}, "namespace") 125 | "qualified" 126 | """ 127 | @spec namespace(schema_types(), namespace()) :: namespace() 128 | def namespace(schema, parent_namespace \\ nil) 129 | def namespace(%Record.Field{}, parent_namespace), do: parent_namespace 130 | 131 | def namespace(%{name: name, namespace: namespace}, parent_namespace) do 132 | split_name = split_name(name) 133 | 134 | cond do 135 | # if it has at least two values, its a fullname 136 | # e.g. "namespace.Name" would be `["namespace", "Name"]` 137 | match?([_, _ | _], split_name) -> 138 | split_name |> :lists.droplast() |> Enum.join(".") 139 | 140 | is_nil(namespace) -> 141 | parent_namespace 142 | 143 | true -> 144 | namespace 145 | end 146 | end 147 | 148 | def namespace(_schema, parent_namespace), do: parent_namespace 149 | 150 | @doc """ 151 | The fully-qualified name of the type 152 | 153 | 154 | ## Examples 155 | iex> full_name(%Primitive{type: "string"}) 156 | nil 157 | 158 | iex> full_name(%Record{name: "foo", namespace: "beam.community"}) 159 | "beam.community.foo" 160 | 161 | iex> full_name(%Record{name: "foo"}, "top.level.namespace") 162 | "top.level.namespace.foo" 163 | """ 164 | @spec full_name(schema_types() | name(), namespace()) :: nil | String.t() 165 | def full_name(schema, parent_namespace \\ nil) 166 | 167 | def full_name(%{name: name, namespace: namespace}, parent_namespace) do 168 | full_name(name, namespace || parent_namespace) 169 | end 170 | 171 | def full_name(%Record.Field{name: name}, _parent_namespace) do 172 | name 173 | end 174 | 175 | def full_name(name, namespace) when is_binary(name) do 176 | cond do 177 | is_nil(namespace) -> 178 | name 179 | 180 | String.contains?(name, ".") -> 181 | name 182 | 183 | true -> 184 | "#{namespace}.#{name}" 185 | end 186 | end 187 | 188 | def full_name(_name, _namespace), do: nil 189 | 190 | @doc """ 191 | The name of the schema type 192 | 193 | ## Examples 194 | 195 | iex> type_name(%Primitive{type: "string"}) 196 | "string" 197 | 198 | iex> type_name(%Primitive{type: :long, metadata: %{"logicalType" => "timestamp-millis"}}) 199 | "timestamp-millis" 200 | 201 | iex> type_name(%AvroEnum{name: "switch", symbols: []}) 202 | "Enum" 203 | 204 | iex> type_name(%Array{items: %Primitive{type: "int"}}) 205 | "Array" 206 | 207 | iex> type_name(%Fixed{size: 2, name: "double"}) 208 | "Fixed" 209 | 210 | iex> type_name(%Union{possibilities: [%Primitive{type: "string"}, %Primitive{type: "int"}]}) 211 | "Union" 212 | 213 | iex> type_name(%Record{name: "foo"}) 214 | "Record" 215 | 216 | iex> type_name(%Reference{type: "foo"}) 217 | "Reference" 218 | """ 219 | @spec type_name(schema_types()) :: String.t() 220 | def type_name(%Primitive{type: :null}), do: "null" 221 | def type_name(%Primitive{metadata: %{"logicalType" => type}}), do: type 222 | def type_name(%Primitive{type: type}), do: to_string(type) 223 | 224 | def type_name(%Array{items: type}), do: "Array" 225 | def type_name(%Union{possibilities: types}), do: "Union" 226 | def type_name(%Record{} = record), do: "Record" 227 | def type_name(%Reference{type: type}), do: "Reference" 228 | def type_name(%Record.Field{} = field), do: "Field" 229 | def type_name(%Fixed{size: size} = fixed), do: "Fixed" 230 | def type_name(%AvroEnum{} = enum), do: "Enum" 231 | def type_name(%AvroMap{values: values}), do: "Map" 232 | 233 | # split a full name into its parts 234 | defp split_name(string) do 235 | pattern = :binary.compile_pattern(".") 236 | String.split(string, pattern) 237 | end 238 | end 239 | -------------------------------------------------------------------------------- /lib/avro_ex/schema/array.ex: -------------------------------------------------------------------------------- 1 | defmodule AvroEx.Schema.Array do 2 | use TypedStruct 3 | 4 | alias AvroEx.{Schema, Schema.Context} 5 | 6 | typedstruct do 7 | field :items, Schema.schema_types(), enforce: true 8 | field :default, [Schema.schema_types()], default: [] 9 | field :metadata, Schema.metadata(), default: %{} 10 | end 11 | 12 | @spec match?(any(), any(), any()) :: boolean() 13 | def match?(%__MODULE__{items: item_type}, %Context{} = context, data) when is_list(data) do 14 | Enum.all?(data, fn item -> 15 | Schema.encodable?(item_type, context, item) 16 | end) 17 | end 18 | 19 | def match?(_array, _context, _data), do: false 20 | end 21 | -------------------------------------------------------------------------------- /lib/avro_ex/schema/context.ex: -------------------------------------------------------------------------------- 1 | defmodule AvroEx.Schema.Context do 2 | use TypedStruct 3 | alias AvroEx.Schema 4 | 5 | typedstruct do 6 | field :names, %{String.t() => Schema.schema_types()}, default: %{} 7 | end 8 | 9 | @spec lookup(t(), String.t()) :: Schema.schema_types() | nil 10 | def lookup(%__MODULE__{names: names}, name) do 11 | names[name] 12 | end 13 | end 14 | -------------------------------------------------------------------------------- /lib/avro_ex/schema/encoder.ex: -------------------------------------------------------------------------------- 1 | defmodule AvroEx.Schema.Encoder do 2 | @moduledoc false 3 | 4 | require Jason.Helpers 5 | 6 | alias AvroEx.Schema 7 | alias AvroEx.Schema.Enum, as: AvroEnum 8 | alias AvroEx.Schema.Map, as: AvroMap 9 | alias AvroEx.Schema.{Array, Fixed, Primitive, Record, Record.Field, Reference, Union} 10 | 11 | @spec encode(Schema.t(), Keyword.t()) :: String.t() 12 | def encode(%Schema{schema: schema}, opts) do 13 | config = %{canonical?: Keyword.get(opts, :canonical, false), namespace: Schema.namespace(schema)} 14 | 15 | schema |> do_encode(config) |> Jason.encode!() 16 | end 17 | 18 | defp do_encode(%Primitive{} = primitive, config) do 19 | if config.canonical? do 20 | primitive.type 21 | else 22 | Map.put(primitive.metadata, :type, primitive.type) 23 | end 24 | end 25 | 26 | defp do_encode(%Reference{type: type}, _config) do 27 | type 28 | end 29 | 30 | defp do_encode(%Union{possibilities: possibilities}, config) do 31 | Enum.map(possibilities, &do_encode(&1, config)) 32 | end 33 | 34 | defp do_encode(binary, _config) when is_binary(binary), do: binary 35 | 36 | defp do_encode(struct, config) do 37 | process(struct, config) 38 | end 39 | 40 | defp process(%struct_type{} = struct, config) do 41 | config = update_in(config.namespace, &Schema.namespace(struct, &1)) 42 | 43 | data = 44 | for {k, v} <- extract(struct), not empty?(v), keep?(k, config), into: %{} do 45 | case k do 46 | k when k in [:values, :items, :type] -> {k, do_encode(v, config)} 47 | :fields -> {k, Enum.map(v, &do_encode(&1, config))} 48 | _ -> {k, v} 49 | end 50 | end 51 | 52 | if config.canonical? do 53 | full_name = Schema.full_name(struct, config.namespace) 54 | 55 | data 56 | |> Map.put(:name, full_name) 57 | |> Map.delete(:metadata) 58 | |> order_json_keys(struct_type) 59 | else 60 | merge_metadata(data) 61 | end 62 | end 63 | 64 | defp empty?([]), do: true 65 | defp empty?(nil), do: true 66 | defp empty?(""), do: true 67 | defp empty?(map) when map == %{}, do: true 68 | defp empty?(_), do: false 69 | 70 | defp keep?(k, %{canonical?: true}) do 71 | k in ~w(type name fields symbols items values size)a 72 | end 73 | 74 | defp keep?(_k, _config), do: true 75 | 76 | defp merge_metadata(%{metadata: _} = data) do 77 | {metadata, data} = Map.pop(data, :metadata) 78 | Map.merge(metadata, data) 79 | end 80 | 81 | defp merge_metadata(data), do: data 82 | 83 | defp extract(%struct{} = data) do 84 | type = 85 | case struct do 86 | AvroEnum -> "enum" 87 | AvroMap -> "map" 88 | Array -> "array" 89 | Fixed -> "fixed" 90 | Record -> "record" 91 | Field -> data.type 92 | end 93 | 94 | data |> Map.from_struct() |> Map.put(:type, type) 95 | end 96 | 97 | # name, type, fields, symbols, items, values, size 98 | defp order_json_keys(data, type) do 99 | case type do 100 | Array -> 101 | Jason.Helpers.json_map_take(data, [:type, :items]) 102 | 103 | AvroEnum -> 104 | Jason.Helpers.json_map_take(data, [:name, :type, :symbols]) 105 | 106 | AvroMap -> 107 | Jason.Helpers.json_map_take(data, [:type, :values]) 108 | 109 | Fixed -> 110 | Jason.Helpers.json_map_take(data, [:name, :type, :size]) 111 | 112 | Field -> 113 | Jason.Helpers.json_map_take(data, [:name, :type]) 114 | 115 | Record -> 116 | Jason.Helpers.json_map_take(data, [:name, :type, :fields]) 117 | end 118 | end 119 | end 120 | -------------------------------------------------------------------------------- /lib/avro_ex/schema/enum.ex: -------------------------------------------------------------------------------- 1 | defmodule AvroEx.Schema.Enum do 2 | use TypedStruct 3 | 4 | alias AvroEx.Schema 5 | alias AvroEx.Schema.Context 6 | 7 | typedstruct do 8 | field :aliases, [Schema.alias()], default: [] 9 | field :doc, Schema.doc() 10 | field :metadata, Schema.metadata(), default: %{} 11 | field :name, Schema.name(), enforce: true 12 | field :namespace, Schema.namespace() 13 | field :symbols, [String.t()], enforce: true 14 | end 15 | 16 | @spec match?(any(), any(), any()) :: boolean() 17 | def match?(%__MODULE__{symbols: symbols}, %Context{}, data) when is_binary(data) do 18 | data in symbols 19 | end 20 | 21 | def match?(_enum, _context, _data), do: false 22 | end 23 | -------------------------------------------------------------------------------- /lib/avro_ex/schema/fixed.ex: -------------------------------------------------------------------------------- 1 | defmodule AvroEx.Schema.Fixed do 2 | use TypedStruct 3 | 4 | alias AvroEx.Schema 5 | alias AvroEx.Schema.Context 6 | 7 | typedstruct do 8 | field :aliases, [Schema.alias()], default: [] 9 | field :metadata, Schema.metadata(), default: %{} 10 | field :name, String.t(), enforce: true 11 | field :doc, String.t() 12 | field :namespace, String.t() 13 | field :size, integer(), enforce: true 14 | end 15 | 16 | @spec match?(t, Context.t(), term) :: boolean 17 | def match?(%__MODULE__{size: size}, %Context{}, data) 18 | when is_binary(data) and byte_size(data) == size do 19 | true 20 | end 21 | 22 | def match?(_fixed, _context, _data), do: false 23 | end 24 | -------------------------------------------------------------------------------- /lib/avro_ex/schema/map.ex: -------------------------------------------------------------------------------- 1 | defmodule AvroEx.Schema.Map do 2 | use TypedStruct 3 | 4 | alias AvroEx.{Schema} 5 | alias AvroEx.Schema.{Context, Primitive} 6 | 7 | typedstruct do 8 | field :metadata, Schema.metadata(), default: %{} 9 | field :values, Schema.schema_types(), enforce: true 10 | field :default, map(), default: %{} 11 | end 12 | 13 | @spec match?(any(), any(), any()) :: boolean() 14 | def match?(%__MODULE__{values: value_type}, %Context{} = context, data) when is_map(data) do 15 | Enum.all?(data, fn {key, value} -> 16 | Schema.encodable?(%Primitive{type: :string}, context, key) and 17 | Schema.encodable?(value_type, context, value) 18 | end) 19 | end 20 | 21 | def match?(_map, _context, _data), do: false 22 | end 23 | -------------------------------------------------------------------------------- /lib/avro_ex/schema/parser.ex: -------------------------------------------------------------------------------- 1 | defmodule AvroEx.Schema.Parser do 2 | @moduledoc false 3 | 4 | alias AvroEx.Schema 5 | alias AvroEx.Schema.{Array, Context, Fixed, Primitive, Record, Reference, Union} 6 | alias AvroEx.Schema.Enum, as: AvroEnum 7 | alias AvroEx.Schema.Map, as: AvroMap 8 | 9 | @primitives [ 10 | :null, 11 | :boolean, 12 | :int, 13 | :long, 14 | :float, 15 | :double, 16 | :bytes, 17 | :string 18 | ] 19 | 20 | @str_primitives Enum.map(@primitives, &to_string/1) 21 | 22 | @spec primitives :: list(atom()) 23 | def primitives, do: @primitives 24 | 25 | @spec primitive?(String.t() | atom()) :: boolean() 26 | for p <- @primitives do 27 | def primitive?(unquote(p)), do: true 28 | def primitive?(unquote(to_string(p))), do: true 29 | end 30 | 31 | def primitive?(_), do: false 32 | 33 | @spec parse!(term(), Keyword.t()) :: Schema.t() 34 | def parse!(data, opts \\ []) do 35 | config = %{namespace: nil, strict?: Keyword.get(opts, :strict, false)} 36 | 37 | try do 38 | type = do_parse(data, config) 39 | context = build_context(type, %Context{}) 40 | 41 | %Schema{schema: type, context: context} 42 | catch 43 | :throw, %Schema.DecodeError{} = err -> raise err 44 | end 45 | end 46 | 47 | # do_parse_ref/1 handles types that might be a %Reference{} 48 | defp do_parse_ref(term, config) do 49 | if is_binary(term) and not primitive?(term) do 50 | term 51 | |> Schema.full_name(config.namespace) 52 | |> Reference.new() 53 | else 54 | do_parse(term, config) 55 | end 56 | end 57 | 58 | defp do_parse(nil, _config), do: %Primitive{type: :null} 59 | 60 | for p <- @primitives do 61 | defp do_parse(unquote(to_string(p)), _config) do 62 | %Primitive{type: unquote(p)} 63 | end 64 | end 65 | 66 | defp do_parse(list, config) when is_list(list) do 67 | {possibilities, _} = 68 | Enum.map_reduce(list, MapSet.new(), fn type, seen -> 69 | %struct{} = parsed = do_parse_ref(type, config) 70 | 71 | if match?(%Union{}, parsed) do 72 | error({:nested_union, parsed, list}) 73 | end 74 | 75 | set_key = 76 | case parsed do 77 | %{type: type} -> type 78 | %{name: name} -> {struct, name} 79 | %struct{} -> struct 80 | end 81 | 82 | if MapSet.member?(seen, set_key) do 83 | error({:duplicate_union_type, parsed, list}) 84 | end 85 | 86 | {parsed, MapSet.put(seen, set_key)} 87 | end) 88 | 89 | struct!(Union, possibilities: possibilities) 90 | end 91 | 92 | defp do_parse(%{"type" => primitive} = type, config) when primitive in @str_primitives do 93 | data = 94 | type 95 | |> cast(Primitive, []) 96 | |> drop([:type]) 97 | |> extract_metadata(config) 98 | 99 | struct!(Primitive, Map.put(data, :type, String.to_existing_atom(primitive))) 100 | end 101 | 102 | defp do_parse(%{"type" => "map"} = map, config) do 103 | data = 104 | map 105 | |> cast(AvroMap, [:values, :default]) 106 | |> validate_required([:values]) 107 | |> drop([:type]) 108 | |> extract_metadata(config) 109 | |> update_in([:values], &do_parse_ref(&1, config)) 110 | 111 | struct!(AvroMap, data) 112 | end 113 | 114 | defp do_parse(%{"type" => "enum", "symbols" => symbols} = enum, config) when is_list(symbols) do 115 | data = 116 | enum 117 | |> cast(AvroEnum, [:aliases, :doc, :name, :namespace, :symbols]) 118 | |> drop([:type]) 119 | |> validate_required([:name, :symbols]) 120 | |> validate_name() 121 | |> validate_namespace() 122 | |> validate_aliases() 123 | |> extract_metadata(config) 124 | 125 | # credo:disable-for-lines:11 Credo.Check.Warning.UnusedEnumOperation 126 | Enum.reduce(symbols, MapSet.new(), fn symbol, set -> 127 | if MapSet.member?(set, symbol) do 128 | error({:duplicate_symbol, symbol, enum}) 129 | end 130 | 131 | unless valid_name?(symbol) do 132 | error({:invalid_name, {:symbols, symbol}, enum}) 133 | end 134 | 135 | MapSet.put(set, symbol) 136 | end) 137 | 138 | struct!(AvroEnum, data) 139 | end 140 | 141 | defp do_parse(%{"type" => "array"} = array, config) do 142 | data = 143 | array 144 | |> cast(Array, [:items, :default]) 145 | |> drop([:type]) 146 | |> validate_required([:items]) 147 | |> extract_metadata(config) 148 | |> update_in([:items], &do_parse_ref(&1, config)) 149 | 150 | struct!(Array, data) 151 | end 152 | 153 | defp do_parse(%{"type" => "fixed"} = fixed, config) do 154 | data = 155 | fixed 156 | |> cast(Fixed, [:aliases, :doc, :name, :namespace, :size]) 157 | |> drop([:type]) 158 | |> validate_required([:name, :size]) 159 | |> validate_integer(:size) 160 | |> validate_name() 161 | |> validate_namespace() 162 | |> validate_aliases() 163 | |> extract_metadata(config) 164 | 165 | struct!(Fixed, data) 166 | end 167 | 168 | defp do_parse(%{"type" => "record", "fields" => fields} = record, config) when is_list(fields) do 169 | data = 170 | record 171 | |> cast(Record, [:aliases, :doc, :name, :namespace, :fields]) 172 | |> drop([:type]) 173 | |> validate_required([:name, :fields]) 174 | |> validate_name() 175 | |> validate_namespace() 176 | |> validate_aliases() 177 | |> extract_metadata(config) 178 | 179 | config = Map.update!(config, :namespace, &Schema.namespace(data, &1)) 180 | 181 | struct!( 182 | Record, 183 | update_in(data[:fields], fn fields -> Enum.map(fields, &parse_fields(&1, config)) end) 184 | ) 185 | end 186 | 187 | defp do_parse(other, _config) do 188 | error({:invalid_format, other}) 189 | end 190 | 191 | defp parse_fields(%{"type" => type} = field, config) do 192 | data = 193 | field 194 | |> cast(Record.Field, [:aliases, :doc, :default, :name, :namespace, :order, :type]) 195 | |> validate_required([:name, :type]) 196 | |> validate_aliases() 197 | |> extract_metadata(config) 198 | |> put_in([:type], do_parse_ref(type, config)) 199 | 200 | struct!(Record.Field, data) 201 | end 202 | 203 | defp cast(data, type, keys) do 204 | info = {type, data} 205 | 206 | Enum.reduce(keys, {%{}, data, info}, fn key, {data, rest, info} -> 207 | case Map.pop(rest, to_string(key)) do 208 | {nil, rest} -> 209 | {data, rest, info} 210 | 211 | {value, rest} -> 212 | {Map.put(data, key, value), rest, info} 213 | end 214 | end) 215 | end 216 | 217 | defp validate_required({data, rest, {type, raw} = info}, keys) do 218 | Enum.each(keys, fn k -> 219 | unless data[k] do 220 | error({:missing_required, k, type, raw}) 221 | end 222 | end) 223 | 224 | {data, rest, info} 225 | end 226 | 227 | defp validate_field({data, _rest, _info} = input, field, func) do 228 | case Map.fetch(data, field) do 229 | {:ok, _value} -> 230 | # Only validate if it has the field 231 | func.(data[field]) 232 | 233 | :error -> 234 | :ok 235 | end 236 | 237 | input 238 | end 239 | 240 | defp validate_integer({_data, _rest, {_type, raw}} = input, field) do 241 | validate_field(input, field, fn value -> 242 | unless is_integer(value) do 243 | error({:invalid_type, {field, value}, %Primitive{type: :integer}, raw}) 244 | end 245 | end) 246 | end 247 | 248 | defp validate_name({_data, _rest, {_type, raw}} = input) do 249 | validate_field(input, :name, fn value -> 250 | unless valid_full_name?(value) do 251 | error({:invalid_name, {:name, value}, raw}) 252 | end 253 | end) 254 | end 255 | 256 | defp validate_aliases({_data, _rest, {_type, raw}} = input) do 257 | validate_field(input, :aliases, fn aliases -> 258 | unless is_list(aliases) and Enum.all?(aliases, &valid_full_name?/1) do 259 | error({:invalid_name, {:aliases, aliases}, raw}) 260 | end 261 | end) 262 | end 263 | 264 | defp validate_namespace({_data, _rest, {_type, raw}} = input) do 265 | validate_field(input, :namespace, fn value -> 266 | # From the specification: "A namespace is a dot-separated sequence of such 267 | # names. The empty string may also be used as a namespace to indicate the 268 | # null namespace. 269 | unless valid_full_name?(value) or value == "" do 270 | error({:invalid_name, {:namespace, value}, raw}) 271 | end 272 | end) 273 | end 274 | 275 | defp validate_default(%{default: default} = schema) when not is_nil(default) do 276 | case AvroEx.encode(%Schema{schema: schema, context: %Context{}}, schema.default) do 277 | {:ok, _data} -> :ok 278 | {:error, reason} -> error({:invalid_default, schema, reason}) 279 | end 280 | 281 | schema 282 | end 283 | 284 | defp validate_default(schema), do: schema 285 | 286 | defp extract_metadata({data, rest, {type, raw}}, config) do 287 | if config.strict? and drop_metadata(rest, type) != %{} do 288 | error({:unrecognized_fields, Map.keys(rest), type, raw}) 289 | else 290 | Map.put(data, :metadata, rest) 291 | end 292 | end 293 | 294 | # Drops known metadata fields 295 | defp drop_metadata(%{"logicalType" => logical} = data, type) when type in [Primitive, Fixed] do 296 | case logical do 297 | "decimal" -> Map.drop(data, ["logicalType", "precision", "scale"]) 298 | _ -> Map.delete(data, "logicalType") 299 | end 300 | end 301 | 302 | defp drop_metadata(data, _type), do: data 303 | 304 | defp drop({data, rest, info}, keys) do 305 | {data, Map.drop(rest, Enum.map(keys, &to_string/1)), info} 306 | end 307 | 308 | defp build_context(type, context, namespace \\ nil) 309 | 310 | defp build_context(type, context, namespace) do 311 | namespace = Schema.namespace(type, namespace) 312 | context = capture_context(type, context, namespace) 313 | 314 | type 315 | |> validate_default() 316 | |> do_build_context(context, namespace) 317 | end 318 | 319 | defp do_build_context(%Union{} = union, context, namespace) do 320 | build_inner_context(union, :possibilities, context, namespace) 321 | end 322 | 323 | defp do_build_context(%Record{} = record, context, namespace) do 324 | build_inner_context(record, :fields, context, namespace) 325 | end 326 | 327 | defp do_build_context(%Record.Field{} = field, context, namespace) do 328 | build_inner_context(field, :type, context, namespace) 329 | end 330 | 331 | defp do_build_context(%Array{} = array, context, namespace) do 332 | build_inner_context(array, :items, context, namespace) 333 | end 334 | 335 | defp do_build_context(%AvroMap{} = map, context, namespace) do 336 | build_inner_context(map, :values, context, namespace) 337 | end 338 | 339 | defp do_build_context(%Reference{} = ref, context, _namespace) do 340 | unless Map.has_key?(context.names, ref.type) do 341 | error({:missing_ref, ref, context}) 342 | end 343 | 344 | context 345 | end 346 | 347 | defp do_build_context(_schema, context, _namespace), do: context 348 | 349 | defp build_inner_context(type, field, context, namespace) do 350 | %{^field => inner} = type 351 | 352 | if is_list(inner) do 353 | Enum.reduce(inner, context, &build_context(&1, &2, namespace)) 354 | else 355 | build_context(inner, context, namespace) 356 | end 357 | end 358 | 359 | defp capture_context(%Record.Field{}, context, _namespace), do: context 360 | 361 | defp capture_context(%{name: _name} = schema, context, namespace) do 362 | name = Schema.full_name(schema, namespace) 363 | 364 | if Map.has_key?(context.names, name) do 365 | error({:duplicate_name, name, schema}) 366 | end 367 | 368 | if match?(%Record{}, schema) do 369 | # credo:disable-for-lines:8 Credo.Check.Warning.UnusedEnumOperation 370 | Enum.reduce(schema.fields, MapSet.new(), fn field, set -> 371 | if MapSet.member?(set, field.name) do 372 | error({:duplicate_name, field.name, schema}) 373 | end 374 | 375 | MapSet.put(set, field.name) 376 | end) 377 | end 378 | 379 | context = 380 | schema 381 | |> aliases(namespace) 382 | |> Enum.reduce(context, fn name, context -> 383 | put_context(context, name, schema) 384 | end) 385 | 386 | put_context(context, name, schema) 387 | end 388 | 389 | defp capture_context(_type, context, _namespace), do: context 390 | 391 | defp put_context(context, name, schema) do 392 | put_in(context.names[name], schema) 393 | end 394 | 395 | defp aliases(%{aliases: aliases, namespace: namespace}, parent_namespace) 396 | when is_list(aliases) do 397 | Enum.map(aliases, fn name -> 398 | Schema.full_name(name, namespace || parent_namespace) 399 | end) 400 | end 401 | 402 | defp aliases(_schema, _parent_namespace), do: [] 403 | 404 | defp error(info) do 405 | info |> AvroEx.Schema.DecodeError.new() |> throw() 406 | end 407 | 408 | defp valid_name?(name) when is_binary(name) do 409 | Regex.match?(~r/^[A-Za-z_][A-Za-z0-9_]*$/, name) 410 | end 411 | 412 | defp valid_name?(_), do: false 413 | 414 | defp valid_full_name?(name) when is_binary(name) do 415 | Regex.match?(~r/^[A-Za-z_](\.?[A-Za-z0-9_]+)*$/, name) 416 | end 417 | 418 | defp valid_full_name?(_), do: false 419 | end 420 | -------------------------------------------------------------------------------- /lib/avro_ex/schema/primitive.ex: -------------------------------------------------------------------------------- 1 | defmodule AvroEx.Schema.Primitive do 2 | @moduledoc """ 3 | Functions for handling primitive types in Avro schemas 4 | """ 5 | 6 | use TypedStruct 7 | 8 | alias AvroEx.{Schema} 9 | 10 | @type primitive :: 11 | :null 12 | | :boolean 13 | | :int 14 | | :long 15 | | :float 16 | | :double 17 | | :bytes 18 | | :string 19 | 20 | typedstruct do 21 | field :metadata, Schema.metadata(), default: %{} 22 | field :type, primitive(), enforce: true 23 | end 24 | end 25 | -------------------------------------------------------------------------------- /lib/avro_ex/schema/record.ex: -------------------------------------------------------------------------------- 1 | defmodule AvroEx.Schema.Record do 2 | use TypedStruct 3 | 4 | alias AvroEx.{Schema, Schema.Context, Schema.Record} 5 | 6 | typedstruct do 7 | field :aliases, [Schema.alias()], default: [] 8 | field :doc, String.t() 9 | field :name, String.t(), enforce: true 10 | field :namespace, String.t() 11 | field :metadata, Schema.metadata(), default: %{} 12 | field :fields, [Record.Field.t()], default: [] 13 | end 14 | 15 | @spec match?(t, Context.t(), term) :: boolean 16 | def match?(%__MODULE__{fields: fields}, %Context{} = context, data) 17 | when is_map(data) and map_size(data) == length(fields) do 18 | Enum.all?(fields, fn %Record.Field{name: name} = field -> 19 | data = 20 | Map.new(data, fn 21 | {k, v} when is_binary(k) -> {k, v} 22 | {k, v} when is_atom(k) -> {to_string(k), v} 23 | end) 24 | 25 | Map.has_key?(data, name) and Schema.encodable?(field, context, data[name]) 26 | end) 27 | end 28 | 29 | def match?(_record, _context, _data), do: false 30 | end 31 | -------------------------------------------------------------------------------- /lib/avro_ex/schema/record/field.ex: -------------------------------------------------------------------------------- 1 | defmodule AvroEx.Schema.Record.Field do 2 | use TypedStruct 3 | 4 | alias AvroEx.{Schema} 5 | alias AvroEx.Schema.Context 6 | 7 | typedstruct do 8 | field :name, String.t(), enforce: true 9 | field :doc, String.t() 10 | field :type, Schema.schema_types(), enforce: true 11 | field :default, Schema.schema_types() 12 | field :aliases, [Schema.alias()], default: [] 13 | field :metadata, Schema.metadata(), default: %{} 14 | end 15 | 16 | @spec match?(AvroEx.Schema.Record.Field.t(), AvroEx.Schema.Context.t(), any()) :: boolean() 17 | def match?(%__MODULE__{type: type}, %Context{} = context, data) do 18 | Schema.encodable?(type, context, data) 19 | end 20 | end 21 | -------------------------------------------------------------------------------- /lib/avro_ex/schema/reference.ex: -------------------------------------------------------------------------------- 1 | defmodule AvroEx.Schema.Reference do 2 | defstruct [:type] 3 | 4 | @type t :: %__MODULE__{} 5 | 6 | @spec new(String.t()) :: t() 7 | def new(type) when is_binary(type) do 8 | %__MODULE__{type: type} 9 | end 10 | end 11 | -------------------------------------------------------------------------------- /lib/avro_ex/schema/schema_decode_error.ex: -------------------------------------------------------------------------------- 1 | defmodule AvroEx.Schema.DecodeError do 2 | defexception [:message] 3 | 4 | @type t :: %__MODULE__{} 5 | 6 | @spec new(tuple()) :: t() 7 | def new({:unrecognized_fields, keys, type, data}) do 8 | qualifier = 9 | case keys do 10 | [_] -> "key" 11 | _ -> "keys" 12 | end 13 | 14 | message = 15 | "Unrecognized schema #{qualifier} #{Enum.map_join(keys, ", ", &surround(&1))} for #{inspect(type)} in #{inspect(data)}" 16 | 17 | %__MODULE__{message: message} 18 | end 19 | 20 | def new({:missing_required, key, type, data}) do 21 | message = "Schema missing required key #{surround(key)} for #{inspect(type)} in #{inspect(data)}" 22 | %__MODULE__{message: message} 23 | end 24 | 25 | def new({:nested_union, nested, union}) do 26 | nested = AvroEx.Schema.type_name(nested) 27 | message = "Union contains nested union #{nested} as immediate child in #{inspect(union)}" 28 | %__MODULE__{message: message} 29 | end 30 | 31 | def new({:duplicate_union_type, schema, union}) do 32 | type = AvroEx.Schema.type_name(schema) 33 | message = "Union contains duplicated #{type} in #{inspect(union)}" 34 | %__MODULE__{message: message} 35 | end 36 | 37 | def new({:duplicate_symbol, symbol, enum}) do 38 | message = "Enum contains duplicated symbol #{surround(symbol)} in #{inspect(enum)}" 39 | %__MODULE__{message: message} 40 | end 41 | 42 | def new({:duplicate_name, name, schema}) do 43 | type = AvroEx.Schema.type_name(schema) 44 | message = "Duplicate name #{surround(name)} found in #{type}" 45 | %__MODULE__{message: message} 46 | end 47 | 48 | def new({:invalid_name, {field, name}, context}) do 49 | message = "Invalid name #{surround(name)} for #{surround(field)} in #{inspect(context)}" 50 | %__MODULE__{message: message} 51 | end 52 | 53 | def new({:invalid_default, schema, reason}) do 54 | type = AvroEx.Schema.type_name(schema) 55 | message = "Invalid default in #{type} #{Exception.message(reason)}" 56 | %__MODULE__{message: message} 57 | end 58 | 59 | def new({:invalid_type, {field, value}, type, context}) do 60 | type = AvroEx.Schema.type_name(type) 61 | message = "Expected #{surround(field)} to be #{type} got #{inspect(value)} in #{inspect(context)}" 62 | %__MODULE__{message: message} 63 | end 64 | 65 | def new({:invalid_format, data}) do 66 | message = "Invalid schema format #{inspect(data)}" 67 | %__MODULE__{message: message} 68 | end 69 | 70 | def new({:missing_ref, ref, context}) do 71 | known = 72 | if context.names == %{} do 73 | "empty" 74 | else 75 | context.names |> Map.keys() |> Enum.map_join(", ", &surround/1) 76 | end 77 | 78 | message = "Found undeclared reference #{surround(ref.type)}. Known references are #{known}" 79 | %__MODULE__{message: message} 80 | end 81 | 82 | defp surround(string, value \\ "`") do 83 | value <> to_string(string) <> value 84 | end 85 | end 86 | -------------------------------------------------------------------------------- /lib/avro_ex/schema/union.ex: -------------------------------------------------------------------------------- 1 | defmodule AvroEx.Schema.Union do 2 | use TypedStruct 3 | 4 | alias AvroEx.{Schema, Schema.Context} 5 | 6 | typedstruct enforce: true do 7 | field :possibilities, [Schema.schema_types()], enforce: true 8 | end 9 | 10 | @spec match?(AvroEx.Schema.Union.t(), Context.t(), any()) :: boolean() 11 | def match?(%__MODULE__{} = union, %Context{} = context, data) do 12 | Enum.any?(union.possibilities, fn schema -> 13 | Schema.encodable?(schema, context, data) 14 | end) 15 | end 16 | end 17 | -------------------------------------------------------------------------------- /mix.exs: -------------------------------------------------------------------------------- 1 | defmodule AvroEx.Mixfile do 2 | use Mix.Project 3 | 4 | @url "http://github.com/beam-community/avro_ex" 5 | @version "2.2.0" 6 | 7 | def project do 8 | [ 9 | app: :avro_ex, 10 | version: @version, 11 | elixir: "~> 1.6", 12 | build_embedded: Mix.env() == :prod, 13 | start_permanent: Mix.env() == :prod, 14 | aliases: aliases(), 15 | package: package(), 16 | elixirc_paths: elixirc_paths(Mix.env()), 17 | name: "AvroEx", 18 | description: "An Avro encoding/decoding library written in pure Elixir", 19 | docs: docs(), 20 | deps: deps() 21 | ] 22 | end 23 | 24 | def application do 25 | [extra_applications: [:logger]] 26 | end 27 | 28 | defp aliases do 29 | [] 30 | end 31 | 32 | defp deps do 33 | [ 34 | {:typed_struct, "~> 0.3.0", runtime: false}, 35 | {:jason, "~> 1.1"}, 36 | {:credo, "~> 1.0", only: :dev, runtime: false}, 37 | {:dialyxir, "~> 1.1", only: :dev, runtime: false}, 38 | {:ex_doc, "~> 0.20", only: :dev, runtime: false}, 39 | {:stream_data, "~> 0.5", only: [:dev, :test]}, 40 | {:decimal, "~> 2.0", optional: true} 41 | ] 42 | end 43 | 44 | defp docs do 45 | [ 46 | main: "AvroEx", 47 | source_url: @url, 48 | source_ref: "v#{@version}", 49 | groups_for_modules: [ 50 | Schema: ~r/Schema/ 51 | ], 52 | extras: [] 53 | ] 54 | end 55 | 56 | defp package do 57 | [ 58 | licenses: ["MIT"], 59 | maintainers: ["doomspork", "cjpoll", "davydog187"], 60 | links: %{"Github" => @url} 61 | ] 62 | end 63 | 64 | defp elixirc_paths(:test), do: ["lib", "test/support"] 65 | defp elixirc_paths(_), do: ["lib"] 66 | end 67 | -------------------------------------------------------------------------------- /mix.lock: -------------------------------------------------------------------------------- 1 | %{ 2 | "bunt": {:hex, :bunt, "0.2.0", "951c6e801e8b1d2cbe58ebbd3e616a869061ddadcc4863d0a2182541acae9a38", [:mix], [], "hexpm", "7af5c7e09fe1d40f76c8e4f9dd2be7cebd83909f31fee7cd0e9eadc567da8353"}, 3 | "credo": {:hex, :credo, "1.6.4", "ddd474afb6e8c240313f3a7b0d025cc3213f0d171879429bf8535d7021d9ad78", [:mix], [{:bunt, "~> 0.2.0", [hex: :bunt, repo: "hexpm", optional: false]}, {:file_system, "~> 0.2.8", [hex: :file_system, repo: "hexpm", optional: false]}, {:jason, "~> 1.0", [hex: :jason, repo: "hexpm", optional: false]}], "hexpm", "c28f910b61e1ff829bffa056ef7293a8db50e87f2c57a9b5c3f57eee124536b7"}, 4 | "decimal": {:hex, :decimal, "2.1.1", "5611dca5d4b2c3dd497dec8f68751f1f1a54755e8ed2a966c2633cf885973ad6", [:mix], [], "hexpm", "53cfe5f497ed0e7771ae1a475575603d77425099ba5faef9394932b35020ffcc"}, 5 | "dialyxir": {:hex, :dialyxir, "1.1.0", "c5aab0d6e71e5522e77beff7ba9e08f8e02bad90dfbeffae60eaf0cb47e29488", [:mix], [{:erlex, ">= 0.2.6", [hex: :erlex, repo: "hexpm", optional: false]}], "hexpm", "07ea8e49c45f15264ebe6d5b93799d4dd56a44036cf42d0ad9c960bc266c0b9a"}, 6 | "earmark_parser": {:hex, :earmark_parser, "1.4.20", "89970db71b11b6b89759ce16807e857df154f8df3e807b2920a8c39834a9e5cf", [:mix], [], "hexpm", "1eb0d2dabeeeff200e0d17dc3048a6045aab271f73ebb82e416464832eb57bdd"}, 7 | "erlex": {:hex, :erlex, "0.2.6", "c7987d15e899c7a2f34f5420d2a2ea0d659682c06ac607572df55a43753aa12e", [:mix], [], "hexpm", "2ed2e25711feb44d52b17d2780eabf998452f6efda104877a3881c2f8c0c0c75"}, 8 | "ex_doc": {:hex, :ex_doc, "0.28.2", "e031c7d1a9fc40959da7bf89e2dc269ddc5de631f9bd0e326cbddf7d8085a9da", [:mix], [{:earmark_parser, "~> 1.4.19", [hex: :earmark_parser, repo: "hexpm", optional: false]}, {:makeup_elixir, "~> 0.14", [hex: :makeup_elixir, repo: "hexpm", optional: false]}, {:makeup_erlang, "~> 0.1", [hex: :makeup_erlang, repo: "hexpm", optional: false]}], "hexpm", "51ee866993ffbd0e41c084a7677c570d0fc50cb85c6b5e76f8d936d9587fa719"}, 9 | "file_system": {:hex, :file_system, "0.2.10", "fb082005a9cd1711c05b5248710f8826b02d7d1784e7c3451f9c1231d4fc162d", [:mix], [], "hexpm", "41195edbfb562a593726eda3b3e8b103a309b733ad25f3d642ba49696bf715dc"}, 10 | "jason": {:hex, :jason, "1.3.0", "fa6b82a934feb176263ad2df0dbd91bf633d4a46ebfdffea0c8ae82953714946", [:mix], [{:decimal, "~> 1.0 or ~> 2.0", [hex: :decimal, repo: "hexpm", optional: true]}], "hexpm", "53fc1f51255390e0ec7e50f9cb41e751c260d065dcba2bf0d08dc51a4002c2ac"}, 11 | "makeup": {:hex, :makeup, "1.1.0", "6b67c8bc2882a6b6a445859952a602afc1a41c2e08379ca057c0f525366fc3ca", [:mix], [{:nimble_parsec, "~> 1.2.2 or ~> 1.3", [hex: :nimble_parsec, repo: "hexpm", optional: false]}], "hexpm", "0a45ed501f4a8897f580eabf99a2e5234ea3e75a4373c8a52824f6e873be57a6"}, 12 | "makeup_elixir": {:hex, :makeup_elixir, "0.15.2", "dc72dfe17eb240552857465cc00cce390960d9a0c055c4ccd38b70629227e97c", [:mix], [{:makeup, "~> 1.0", [hex: :makeup, repo: "hexpm", optional: false]}, {:nimble_parsec, "~> 1.1", [hex: :nimble_parsec, repo: "hexpm", optional: false]}], "hexpm", "fd23ae48d09b32eff49d4ced2b43c9f086d402ee4fd4fcb2d7fad97fa8823e75"}, 13 | "makeup_erlang": {:hex, :makeup_erlang, "0.1.1", "3fcb7f09eb9d98dc4d208f49cc955a34218fc41ff6b84df7c75b3e6e533cc65f", [:mix], [{:makeup, "~> 1.0", [hex: :makeup, repo: "hexpm", optional: false]}], "hexpm", "174d0809e98a4ef0b3309256cbf97101c6ec01c4ab0b23e926a9e17df2077cbb"}, 14 | "nimble_parsec": {:hex, :nimble_parsec, "1.2.3", "244836e6e3f1200c7f30cb56733fd808744eca61fd182f731eac4af635cc6d0b", [:mix], [], "hexpm", "c8d789e39b9131acf7b99291e93dae60ab48ef14a7ee9d58c6964f59efb570b0"}, 15 | "stream_data": {:hex, :stream_data, "0.5.0", "b27641e58941685c75b353577dc602c9d2c12292dd84babf506c2033cd97893e", [:mix], [], "hexpm", "012bd2eec069ada4db3411f9115ccafa38540a3c78c4c0349f151fc761b9e271"}, 16 | "typed_struct": {:hex, :typed_struct, "0.3.0", "939789e3c1dca39d7170c87f729127469d1315dcf99fee8e152bb774b17e7ff7", [:mix], [], "hexpm", "c50bd5c3a61fe4e198a8504f939be3d3c85903b382bde4865579bc23111d1b6d"}, 17 | } 18 | -------------------------------------------------------------------------------- /test/avro_ex_test.exs: -------------------------------------------------------------------------------- 1 | defmodule AvroEx.Schema.Primitive.Test do 2 | use ExUnit.Case 3 | 4 | alias AvroEx.Schema 5 | alias AvroEx.Schema.{Primitive, Record, Record.Field, Reference, Union} 6 | 7 | doctest AvroEx 8 | 9 | describe "lookup" do 10 | test "looks up a named type" do 11 | schema_json = ~S(["null", {"type": "record", "namespace": "me.cjpoll", "name": "LinkedList", "fields": [ 12 | {"type": "int", "name": "value"}, 13 | {"type": ["null", "me.cjpoll.LinkedList"], "name": "next"} 14 | ]}]) 15 | 16 | assert %Schema{ 17 | schema: %Union{ 18 | possibilities: [ 19 | %Primitive{type: :null}, 20 | %Record{ 21 | name: "LinkedList", 22 | fields: [ 23 | %Field{name: "value", type: %Primitive{type: :int}}, 24 | %Field{ 25 | name: "next", 26 | type: %Union{ 27 | possibilities: [ 28 | %Primitive{type: :null}, 29 | %Reference{type: "me.cjpoll.LinkedList" = type} 30 | ] 31 | } 32 | } 33 | ] 34 | } = record 35 | ] 36 | }, 37 | context: context 38 | } = AvroEx.decode_schema!(schema_json) 39 | 40 | assert AvroEx.Schema.Context.lookup(context, type) == record 41 | end 42 | end 43 | 44 | describe "encode recursive" do 45 | test "can encode and decode a recursive type" do 46 | schema_json = ~S(["null", {"type": "record", "namespace": "me.cjpoll", "name": "LinkedList", "fields": [ 47 | {"type": "int", "name": "value"}, 48 | {"type": ["null", "me.cjpoll.LinkedList"], "name": "next"} 49 | ]}]) 50 | 51 | assert %Schema{ 52 | schema: %Union{ 53 | possibilities: [ 54 | %Primitive{type: :null}, 55 | %Record{ 56 | name: "LinkedList", 57 | fields: [ 58 | %Field{name: "value", type: %Primitive{type: :int}}, 59 | %Field{ 60 | name: "next", 61 | type: %Union{ 62 | possibilities: [ 63 | %Primitive{type: :null}, 64 | %Reference{type: "me.cjpoll.LinkedList"} 65 | ] 66 | } 67 | } 68 | ] 69 | } 70 | ] 71 | }, 72 | context: context 73 | } = schema = AvroEx.decode_schema!(schema_json) 74 | 75 | data = %{ 76 | "value" => 25, 77 | "next" => %{"value" => 23, "next" => %{"value" => 20, "next" => nil}} 78 | } 79 | 80 | assert context == 81 | %AvroEx.Schema.Context{ 82 | names: %{ 83 | "me.cjpoll.LinkedList" => %AvroEx.Schema.Record{ 84 | fields: [ 85 | %AvroEx.Schema.Record.Field{ 86 | name: "value", 87 | type: %AvroEx.Schema.Primitive{type: :int} 88 | }, 89 | %AvroEx.Schema.Record.Field{ 90 | name: "next", 91 | type: %AvroEx.Schema.Union{ 92 | possibilities: [ 93 | %AvroEx.Schema.Primitive{type: :null}, 94 | %AvroEx.Schema.Reference{type: "me.cjpoll.LinkedList"} 95 | ] 96 | } 97 | } 98 | ], 99 | name: "LinkedList", 100 | namespace: "me.cjpoll" 101 | } 102 | } 103 | } 104 | 105 | assert {:ok, avro} = AvroEx.encode(schema, data) 106 | assert {:ok, ^data} = AvroEx.decode(schema, avro) 107 | end 108 | end 109 | end 110 | -------------------------------------------------------------------------------- /test/decode_test.exs: -------------------------------------------------------------------------------- 1 | defmodule AvroEx.Decode.Test do 2 | use ExUnit.Case, async: true 3 | 4 | alias AvroEx.DecodeError 5 | 6 | describe "decode (primitive)" do 7 | test "null" do 8 | {:ok, schema} = AvroEx.decode_schema(~S("null")) 9 | {:ok, avro_message} = AvroEx.encode(schema, nil) 10 | assert {:ok, nil} = AvroEx.decode(schema, avro_message) 11 | end 12 | 13 | test "boolean" do 14 | {:ok, schema} = AvroEx.decode_schema(~S("boolean")) 15 | {:ok, true_message} = AvroEx.encode(schema, true) 16 | {:ok, false_message} = AvroEx.encode(schema, false) 17 | 18 | assert {:ok, true} = AvroEx.decode(schema, true_message) 19 | assert {:ok, false} = AvroEx.decode(schema, false_message) 20 | end 21 | 22 | test "integer" do 23 | {:ok, schema} = AvroEx.decode_schema(~S("int")) 24 | {:ok, zero} = AvroEx.encode(schema, 0) 25 | {:ok, neg_ten} = AvroEx.encode(schema, -10) 26 | {:ok, ten} = AvroEx.encode(schema, 10) 27 | {:ok, big} = AvroEx.encode(schema, 5_000_000) 28 | {:ok, small} = AvroEx.encode(schema, -5_000_000) 29 | {:ok, min_int32} = AvroEx.encode(schema, -2_147_483_648) 30 | {:ok, max_int32} = AvroEx.encode(schema, 2_147_483_647) 31 | 32 | assert {:ok, 0} = AvroEx.decode(schema, zero) 33 | assert {:ok, -10} = AvroEx.decode(schema, neg_ten) 34 | assert {:ok, 10} = AvroEx.decode(schema, ten) 35 | assert {:ok, 5_000_000} = AvroEx.decode(schema, big) 36 | assert {:ok, -5_000_000} = AvroEx.decode(schema, small) 37 | assert {:ok, -2_147_483_648} = AvroEx.decode(schema, min_int32) 38 | assert {:ok, 2_147_483_647} = AvroEx.decode(schema, max_int32) 39 | end 40 | 41 | test "long" do 42 | {:ok, schema} = AvroEx.decode_schema(~S("long")) 43 | {:ok, zero} = AvroEx.encode(schema, 0) 44 | {:ok, neg_ten} = AvroEx.encode(schema, -10) 45 | {:ok, ten} = AvroEx.encode(schema, 10) 46 | {:ok, big} = AvroEx.encode(schema, 2_147_483_647) 47 | {:ok, small} = AvroEx.encode(schema, -2_147_483_647) 48 | {:ok, min_int64} = AvroEx.encode(schema, -9_223_372_036_854_775_808) 49 | {:ok, max_int64} = AvroEx.encode(schema, 9_223_372_036_854_775_807) 50 | 51 | assert {:ok, 0} = AvroEx.decode(schema, zero) 52 | assert {:ok, -10} = AvroEx.decode(schema, neg_ten) 53 | assert {:ok, 10} = AvroEx.decode(schema, ten) 54 | assert {:ok, 2_147_483_647} = AvroEx.decode(schema, big) 55 | assert {:ok, -2_147_483_647} = AvroEx.decode(schema, small) 56 | assert {:ok, -9_223_372_036_854_775_808} = AvroEx.decode(schema, min_int64) 57 | assert {:ok, 9_223_372_036_854_775_807} = AvroEx.decode(schema, max_int64) 58 | end 59 | 60 | test "float" do 61 | {:ok, schema} = AvroEx.decode_schema(~S("float")) 62 | {:ok, zero} = AvroEx.encode(schema, 0.0) 63 | {:ok, big} = AvroEx.encode(schema, 256.25) 64 | 65 | assert {:ok, 0.0} = AvroEx.decode(schema, zero) 66 | assert {:ok, 256.25} = AvroEx.decode(schema, big) 67 | end 68 | 69 | test "double" do 70 | {:ok, schema} = AvroEx.decode_schema(~S("double")) 71 | {:ok, zero} = AvroEx.encode(schema, 0.0) 72 | {:ok, big} = AvroEx.encode(schema, 256.25) 73 | 74 | assert {:ok, 0.0} = AvroEx.decode(schema, zero) 75 | assert {:ok, 256.25} = AvroEx.decode(schema, big) 76 | end 77 | 78 | test "bytes" do 79 | {:ok, schema} = AvroEx.decode_schema(~S("bytes")) 80 | {:ok, bytes} = AvroEx.encode(schema, <<222, 213, 194, 34, 58, 92, 95, 62>>) 81 | 82 | assert {:ok, <<222, 213, 194, 34, 58, 92, 95, 62>>} = AvroEx.decode(schema, bytes) 83 | end 84 | 85 | test "string" do 86 | {:ok, schema} = AvroEx.decode_schema(~S("string")) 87 | {:ok, bytes} = AvroEx.encode(schema, "Hello there 🕶") 88 | 89 | assert {:ok, "Hello there 🕶"} = AvroEx.decode(schema, bytes) 90 | end 91 | end 92 | 93 | describe "complex types" do 94 | test "record" do 95 | {:ok, schema} = AvroEx.decode_schema(~S({"type": "record", "name": "MyRecord", "fields": [ 96 | {"type": "int", "name": "a"}, 97 | {"type": "int", "name": "b", "aliases": ["c", "d"]}, 98 | {"type": "string", "name": "e"} 99 | ]})) 100 | 101 | {:ok, encoded_message} = AvroEx.encode(schema, %{"a" => 1, "b" => 2, "e" => "Hello world!"}) 102 | 103 | assert {:ok, %{"a" => 1, "b" => 2, "e" => "Hello world!"}} = AvroEx.decode(schema, encoded_message) 104 | end 105 | 106 | test "union" do 107 | {:ok, schema} = AvroEx.decode_schema(~S(["null", "int"])) 108 | 109 | {:ok, encoded_null} = AvroEx.encode(schema, nil) 110 | {:ok, encoded_int} = AvroEx.encode(schema, 25) 111 | 112 | assert {:ok, nil} = AvroEx.decode(schema, encoded_null) 113 | assert {:ok, 25} = AvroEx.decode(schema, encoded_int) 114 | end 115 | 116 | test "union with DateTime" do 117 | {:ok, schema} = AvroEx.decode_schema(~S(["null", {"type": "long", "logicalType":"timestamp-micros"}])) 118 | datetime = DateTime.utc_now() 119 | 120 | {:ok, encoded_null} = AvroEx.encode(schema, nil) 121 | {:ok, encoded_datetime} = AvroEx.encode(schema, datetime) 122 | 123 | assert {:ok, nil} = AvroEx.decode(schema, encoded_null) 124 | assert {:ok, ^datetime} = AvroEx.decode(schema, encoded_datetime) 125 | end 126 | 127 | test "union with Time" do 128 | {:ok, schema} = AvroEx.decode_schema(~S(["null", {"type": "long", "logicalType":"time-micros"}])) 129 | time = Time.utc_now() 130 | 131 | {:ok, encoded_null} = AvroEx.encode(schema, nil) 132 | {:ok, encoded_time} = AvroEx.encode(schema, time) 133 | 134 | assert {:ok, nil} = AvroEx.decode(schema, encoded_null) 135 | assert {:ok, ^time} = AvroEx.decode(schema, encoded_time) 136 | end 137 | 138 | test "decode tagged named possibility" do 139 | record_json_factory = fn name -> 140 | ~s""" 141 | { 142 | "type": "record", 143 | "name": "#{name}", 144 | "fields": [ 145 | {"type": "string", "name": "value"} 146 | ] 147 | } 148 | """ 149 | end 150 | 151 | {:ok, schema} = AvroEx.decode_schema(~s([#{record_json_factory.("a")}, #{record_json_factory.("b")}])) 152 | 153 | {:ok, encoded_a} = AvroEx.encode(schema, {"a", %{"value" => "hello"}}) 154 | {:ok, encoded_b} = AvroEx.encode(schema, {"b", %{"value" => "hello"}}) 155 | 156 | assert {:ok, %{"value" => "hello"}} = AvroEx.decode(schema, encoded_a) 157 | assert {:ok, %{"value" => "hello"}} = AvroEx.decode(schema, encoded_b) 158 | 159 | assert {:ok, {"a", %{"value" => "hello"}}} = AvroEx.decode(schema, encoded_a, tagged_unions: true) 160 | assert {:ok, {"b", %{"value" => "hello"}}} = AvroEx.decode(schema, encoded_b, tagged_unions: true) 161 | end 162 | 163 | test "array with negative count" do 164 | {:ok, schema} = AvroEx.decode_schema(~S({"type": "array", "items": ["null", "int"]})) 165 | {:ok, _long_schema} = AvroEx.decode_schema("long") 166 | 167 | {:ok, encoded_array} = AvroEx.encode(schema, [1, 2, 3, nil, 4, 5, nil], include_block_byte_size: true) 168 | 169 | assert {:ok, [1, 2, 3, nil, 4, 5, nil]} = AvroEx.decode(schema, encoded_array) 170 | end 171 | 172 | test "array" do 173 | {:ok, schema} = AvroEx.decode_schema(~S({"type": "array", "items": ["null", "int"]})) 174 | 175 | {:ok, encoded_array} = AvroEx.encode(schema, [1, 2, 3, nil, 4, 5, nil]) 176 | 177 | assert {:ok, [1, 2, 3, nil, 4, 5, nil]} = AvroEx.decode(schema, encoded_array) 178 | end 179 | 180 | test "empty array" do 181 | {:ok, schema} = AvroEx.decode_schema(~S({"type": "array", "items": ["null", "int"]})) 182 | 183 | {:ok, encoded_array} = AvroEx.encode(schema, []) 184 | 185 | assert {:ok, []} = AvroEx.decode(schema, encoded_array) 186 | end 187 | 188 | test "map" do 189 | {:ok, schema} = AvroEx.decode_schema(~S({"type": "map", "values": ["null", "int"]})) 190 | 191 | {:ok, encoded_array} = AvroEx.encode(schema, %{"a" => 1, "b" => nil, "c" => 3}) 192 | 193 | assert {:ok, %{"a" => 1, "b" => nil, "c" => 3}} = AvroEx.decode(schema, encoded_array) 194 | end 195 | 196 | test "empty map" do 197 | {:ok, schema} = AvroEx.decode_schema(~S({"type": "map", "values": ["null", "int"]})) 198 | 199 | {:ok, encoded_map} = AvroEx.encode(schema, %{}) 200 | 201 | assert {:ok, %{}} = AvroEx.decode(schema, encoded_map) 202 | end 203 | 204 | test "enum" do 205 | {:ok, schema} = 206 | AvroEx.decode_schema(~S({"type": "enum", "name": "Suit", "symbols": ["heart", "spade", "diamond", "club"]})) 207 | 208 | {:ok, club} = AvroEx.encode(schema, "club") 209 | {:ok, heart} = AvroEx.encode(schema, "heart") 210 | {:ok, diamond} = AvroEx.encode(schema, "diamond") 211 | {:ok, spade} = AvroEx.encode(schema, "spade") 212 | 213 | assert {:ok, "club"} = AvroEx.decode(schema, club) 214 | assert {:ok, "heart"} = AvroEx.decode(schema, heart) 215 | assert {:ok, "diamond"} = AvroEx.decode(schema, diamond) 216 | assert {:ok, "spade"} = AvroEx.decode(schema, spade) 217 | end 218 | 219 | test "fixed" do 220 | {:ok, schema} = AvroEx.decode_schema(~S({"type": "fixed", "name": "SHA", "size": 40})) 221 | sha = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 222 | {:ok, encoded_sha} = AvroEx.encode(schema, sha) 223 | assert {:ok, ^sha} = AvroEx.decode(schema, encoded_sha) 224 | end 225 | 226 | test "record with empty array of records" do 227 | {:ok, schema} = AvroEx.decode_schema(~S( 228 | { 229 | "type": "record", 230 | "name": "User", 231 | "fields": [ 232 | { 233 | "name": "friends", 234 | "type": { 235 | "type": "array", 236 | "items": { 237 | "type": "record", 238 | "name": "Friend", 239 | "fields": [ 240 | { 241 | "name": "userId", 242 | "type": "string" 243 | } 244 | ] 245 | } 246 | } 247 | }, 248 | { 249 | "name": "username", 250 | "type": "string" 251 | } 252 | ] 253 | } 254 | )) 255 | 256 | {:ok, encoded} = AvroEx.encode(schema, %{"friends" => [], "username" => "iamauser"}) 257 | 258 | assert {:ok, %{"friends" => [], "username" => "iamauser"}} = AvroEx.decode(schema, encoded) 259 | end 260 | end 261 | 262 | describe "decode logical types" do 263 | test "date" do 264 | assert %AvroEx.Schema{} = schema = AvroEx.decode_schema!(%{"type" => "int", "logicalType" => "date"}) 265 | 266 | date1 = ~D[1970-01-01] 267 | assert {:ok, encoded} = AvroEx.encode(schema, date1) 268 | assert {:ok, ^date1} = AvroEx.decode(schema, encoded) 269 | 270 | date2 = ~D[1970-03-01] 271 | assert {:ok, encoded} = AvroEx.encode(schema, date2) 272 | assert {:ok, ^date2} = AvroEx.decode(schema, encoded) 273 | end 274 | 275 | test "datetime micros" do 276 | now = DateTime.utc_now() 277 | 278 | {:ok, micro_schema} = AvroEx.decode_schema(~S({"type": "long", "logicalType":"timestamp-micros"})) 279 | 280 | {:ok, micro_encode} = AvroEx.encode(micro_schema, now) 281 | assert {:ok, ^now} = AvroEx.decode(micro_schema, micro_encode) 282 | end 283 | 284 | test "datetime millis" do 285 | now = DateTime.truncate(DateTime.utc_now(), :millisecond) 286 | 287 | {:ok, milli_schema} = AvroEx.decode_schema(~S({"type": "long", "logicalType":"timestamp-millis"})) 288 | 289 | {:ok, milli_encode} = AvroEx.encode(milli_schema, now) 290 | assert {:ok, ^now} = AvroEx.decode(milli_schema, milli_encode) 291 | end 292 | 293 | test "datetime nanos" do 294 | now = DateTime.utc_now() 295 | 296 | {:ok, nano_schema} = AvroEx.decode_schema(~S({"type": "long", "logicalType":"timestamp-nanos"})) 297 | 298 | {:ok, nano_encode} = AvroEx.encode(nano_schema, now) 299 | assert {:ok, ^now} = AvroEx.decode(nano_schema, nano_encode) 300 | end 301 | 302 | test "time micros" do 303 | now = Time.truncate(Time.utc_now(), :microsecond) 304 | 305 | {:ok, micro_schema} = AvroEx.decode_schema(~S({"type": "long", "logicalType":"time-micros"})) 306 | {:ok, micro_encode} = AvroEx.encode(micro_schema, now) 307 | assert {:ok, ^now} = AvroEx.decode(micro_schema, micro_encode) 308 | end 309 | 310 | test "time millis" do 311 | now = Time.truncate(Time.utc_now(), :millisecond) 312 | 313 | {:ok, milli_schema} = AvroEx.decode_schema(~S({"type": "int", "logicalType":"time-millis"})) 314 | {:ok, milli_encode} = AvroEx.encode(milli_schema, now) 315 | {:ok, time} = AvroEx.decode(milli_schema, milli_encode) 316 | 317 | assert Time.truncate(time, :millisecond) == now 318 | end 319 | 320 | test "decimal" do 321 | schema = "test/fixtures/decimal.avsc" |> File.read!() |> AvroEx.decode_schema!() 322 | # This reference file was encoded using avro's reference implementation: 323 | # 324 | # ```java 325 | # Conversions.DecimalConversion conversion = new Conversions.DecimalConversion(); 326 | # BigDecimal bigDecimal = new BigDecimal(valueInString); 327 | # return conversion.toBytes(bigDecimal, schema, logicalType); 328 | # ``` 329 | result = AvroEx.decode!(schema, File.read!("test/fixtures/decimal.avro"), decimals: :exact) 330 | 331 | assert result == %{ 332 | "decimalField1" => Decimal.new("1.23456789E-7"), 333 | "decimalField2" => Decimal.new("4.54545454545E-35"), 334 | "decimalField3" => Decimal.new("-111111111.1"), 335 | "decimalField4" => Decimal.new("5.3E-11") 336 | } 337 | 338 | result_approximate_values = AvroEx.decode!(schema, File.read!("test/fixtures/decimal.avro")) 339 | 340 | assert result_approximate_values == %{ 341 | "decimalField1" => 1.2345678900000002e-7, 342 | "decimalField2" => 4.54545454545e-35, 343 | "decimalField3" => -111_111_111.10000001, 344 | "decimalField4" => 5.3e-11 345 | } 346 | end 347 | end 348 | 349 | describe "DecodingError" do 350 | test "invalid utf string" do 351 | assert schema = AvroEx.decode_schema!("string") 352 | 353 | assert_raise DecodeError, "Invalid UTF-8 string found <<104, 101, 108, 108, 255>>.", fn -> 354 | AvroEx.decode!(schema, <<"\nhell", 0xFFFF::16>>) 355 | end 356 | end 357 | end 358 | end 359 | -------------------------------------------------------------------------------- /test/encode_test.exs: -------------------------------------------------------------------------------- 1 | defmodule AvroEx.Encode.Test do 2 | require __MODULE__.Macros 3 | alias __MODULE__.Macros 4 | use ExUnit.Case, async: true 5 | 6 | @test_module AvroEx.Encode 7 | 8 | describe "encode (primitive)" do 9 | test "null" do 10 | {:ok, schema} = AvroEx.decode_schema(~S("null")) 11 | 12 | assert {:ok, ""} = @test_module.encode(schema, nil) 13 | end 14 | 15 | test "boolean" do 16 | {:ok, schema} = AvroEx.decode_schema(~S("boolean")) 17 | 18 | assert {:ok, <<1::8>>} = @test_module.encode(schema, true) 19 | assert {:ok, <<0::8>>} = @test_module.encode(schema, false) 20 | end 21 | 22 | test "integer" do 23 | {:ok, schema} = AvroEx.decode_schema(~S("int")) 24 | 25 | assert {:ok, <<2::8>>} = @test_module.encode(schema, 1) 26 | end 27 | 28 | test "long" do 29 | {:ok, schema} = AvroEx.decode_schema(~S("long")) 30 | 31 | assert {:ok, <<2::8>>} = @test_module.encode(schema, 1) 32 | end 33 | 34 | test "float" do 35 | {:ok, schema} = AvroEx.decode_schema(~S("float")) 36 | 37 | assert {:ok, <<205, 204, 140, 63>>} = @test_module.encode(schema, 1.1) 38 | end 39 | 40 | test "double" do 41 | {:ok, schema} = AvroEx.decode_schema(~S("double")) 42 | 43 | assert {:ok, <<154, 153, 153, 153, 153, 153, 241, 63>>} = @test_module.encode(schema, 1.1) 44 | end 45 | 46 | test "bytes" do 47 | {:ok, schema} = AvroEx.decode_schema(~S("bytes")) 48 | 49 | assert {:ok, <<14, 97, 98, 99, 100, 101, 102, 103>>} = @test_module.encode(schema, "abcdefg") 50 | end 51 | 52 | test "string" do 53 | {:ok, schema} = AvroEx.decode_schema(~S("string")) 54 | 55 | assert {:ok, <<14, 97, 98, 99, 100, 101, 102, 103>>} = @test_module.encode(schema, "abcdefg") 56 | assert {:ok, <<14, 97, 98, 99, 100, 101, 102, 103>>} = @test_module.encode(schema, :abcdefg) 57 | 58 | assert {:error, %AvroEx.EncodeError{message: message}} = @test_module.encode(schema, nil) 59 | assert message == "Schema Mismatch: Expected value of string, got nil" 60 | 61 | assert {:error, %AvroEx.EncodeError{message: message}} = @test_module.encode(schema, true) 62 | assert message == "Schema Mismatch: Expected value of string, got true" 63 | 64 | assert {:error, %AvroEx.EncodeError{message: message}} = @test_module.encode(schema, false) 65 | assert message == "Schema Mismatch: Expected value of string, got false" 66 | end 67 | end 68 | 69 | describe "encode (logical types)" do 70 | test "date" do 71 | assert %AvroEx.Schema{} = schema = AvroEx.decode_schema!(%{"type" => "int", "logicalType" => "date"}) 72 | date1 = ~D[1970-01-01] 73 | assert {:ok, <<0>>} = AvroEx.encode(schema, date1) 74 | 75 | date2 = ~D[1970-03-01] 76 | assert {:ok, "v"} = AvroEx.encode(schema, date2) 77 | end 78 | 79 | test "decimal" do 80 | schema = "test/fixtures/decimal.avsc" |> File.read!() |> AvroEx.decode_schema!() 81 | 82 | payload = %{ 83 | "decimalField1" => Decimal.new("1.23456789E-7"), 84 | "decimalField2" => Decimal.new("4.54545454545E-35"), 85 | "decimalField3" => Decimal.new("-111111111.1"), 86 | "decimalField4" => Decimal.new("5.3E-11") 87 | } 88 | 89 | # Round-trip the encoder/decoder 90 | encoded = AvroEx.encode!(schema, payload) 91 | assert AvroEx.decode!(schema, encoded, decimals: :exact) == payload 92 | 93 | # This reference file was encoded using avro's reference implementation: 94 | # 95 | # ```java 96 | # Conversions.DecimalConversion conversion = new Conversions.DecimalConversion(); 97 | # BigDecimal bigDecimal = new BigDecimal(valueInString); 98 | # return conversion.toBytes(bigDecimal, schema, logicalType); 99 | # ``` 100 | assert encoded == File.read!("test/fixtures/decimal.avro") 101 | end 102 | 103 | test "decimal encoding error" do 104 | schema = "test/fixtures/decimal.avsc" |> File.read!() |> AvroEx.decode_schema!() 105 | 106 | payload = %{ 107 | "decimalField1" => Decimal.new("1E-10"), 108 | "decimalField2" => Decimal.new("0"), 109 | "decimalField3" => Decimal.new("0"), 110 | "decimalField4" => Decimal.new("0") 111 | } 112 | 113 | assert_raise(AvroEx.EncodeError, "Incompatible decimal: expected scale -15, got -10", fn -> 114 | AvroEx.encode!(schema, payload) 115 | end) 116 | end 117 | 118 | test "decimal without using the Decimal library" do 119 | schema = "test/fixtures/decimal.avsc" |> File.read!() |> AvroEx.decode_schema!() 120 | 121 | encoded = 122 | AvroEx.encode!(schema, %{ 123 | "decimalField1" => 1.23456789e-7, 124 | "decimalField2" => 4.54545454545e-35, 125 | "decimalField3" => -111_111_111.1, 126 | "decimalField4" => 5.3e-11 127 | }) 128 | 129 | # Without using decimals, the results are inevitably approximate 130 | assert AvroEx.decode!(schema, encoded) == %{ 131 | "decimalField1" => 1.2345678800000002e-7, 132 | "decimalField2" => 4.54545454545e-35, 133 | "decimalField3" => -111_111_111.0, 134 | "decimalField4" => 5.3e-11 135 | } 136 | end 137 | end 138 | 139 | describe "variable_integer_encode" do 140 | Macros.assert_result(@test_module, :variable_integer_encode, [0], <<0>>) 141 | 142 | Macros.assert_result( 143 | @test_module, 144 | :variable_integer_encode, 145 | [1], 146 | <<1::size(8)>> 147 | ) 148 | 149 | Macros.assert_result( 150 | @test_module, 151 | :variable_integer_encode, 152 | [128], 153 | <<32_769::size(16)>> 154 | ) 155 | 156 | Macros.assert_result( 157 | @test_module, 158 | :variable_integer_encode, 159 | [16_383], 160 | <<65_407::size(16)>> 161 | ) 162 | 163 | Macros.assert_result( 164 | @test_module, 165 | :variable_integer_encode, 166 | [16_384], 167 | <<8_421_377::size(24)>> 168 | ) 169 | 170 | Macros.assert_result( 171 | @test_module, 172 | :variable_integer_encode, 173 | [4_294_967_041], 174 | <<129, 254, 255, 255, 15>> 175 | ) 176 | end 177 | 178 | describe "encode (record)" do 179 | test "works as expected with primitive fields" do 180 | {:ok, schema} = AvroEx.decode_schema(~S({"type": "record", "name": "Record", "fields": [ 181 | {"type": "null", "name": "null"}, 182 | {"type": "boolean", "name": "bool"}, 183 | {"type": "int", "name": "integer"}, 184 | {"type": "long", "name": "long"}, 185 | {"type": "float", "name": "float"}, 186 | {"type": "double", "name": "double"}, 187 | {"type": "string", "name": "string"}, 188 | {"type": "bytes", "name": "bytes"} 189 | ]})) 190 | 191 | record = %{ 192 | "null" => nil, 193 | "bool" => true, 194 | "integer" => 25, 195 | "long" => 25, 196 | "float" => 2.5, 197 | "double" => 2.5, 198 | "string" => "abcdefg", 199 | "bytes" => "abcdefg" 200 | } 201 | 202 | {:ok, null_schema} = AvroEx.decode_schema(~S("null")) 203 | {:ok, boolean_schema} = AvroEx.decode_schema(~S("boolean")) 204 | {:ok, int_schema} = AvroEx.decode_schema(~S("int")) 205 | {:ok, long_schema} = AvroEx.decode_schema(~S("long")) 206 | {:ok, float_schema} = AvroEx.decode_schema(~S("float")) 207 | {:ok, double_schema} = AvroEx.decode_schema(~S("double")) 208 | {:ok, string_schema} = AvroEx.decode_schema(~S("string")) 209 | {:ok, bytes_schema} = AvroEx.decode_schema(~S("bytes")) 210 | 211 | assert {:ok, 212 | Enum.join([ 213 | elem(@test_module.encode(null_schema, record["null"]), 1), 214 | elem(@test_module.encode(boolean_schema, record["bool"]), 1), 215 | elem(@test_module.encode(int_schema, record["integer"]), 1), 216 | elem(@test_module.encode(long_schema, record["long"]), 1), 217 | elem(@test_module.encode(float_schema, record["float"]), 1), 218 | elem(@test_module.encode(double_schema, record["double"]), 1), 219 | elem(@test_module.encode(string_schema, record["string"]), 1), 220 | elem(@test_module.encode(bytes_schema, record["bytes"]), 1) 221 | ])} == @test_module.encode(schema, record) 222 | end 223 | 224 | test "works as expected with default values" do 225 | {:ok, schema} = AvroEx.decode_schema(~S({"type": "record", "name": "Record", "fields": [ 226 | {"type": "null", "name": "null", "default": null}, 227 | {"type": "boolean", "name": "bool", "default": false}, 228 | {"type": "int", "name": "integer", "default": 0}, 229 | {"type": "long", "name": "long", "default": 0}, 230 | {"type": "float", "name": "float", "default": 0.0}, 231 | {"type": "double", "name": "double", "default": 0.0}, 232 | {"type": "string", "name": "string", "default": "ok"}, 233 | {"type": "bytes", "name": "bytes", "default": "ok"} 234 | ]})) 235 | 236 | assert {:ok, _encoded} = @test_module.encode(schema, %{}) 237 | end 238 | 239 | test "can encode records with atom keys and string values" do 240 | {:ok, schema} = AvroEx.decode_schema(~S({"type": "record", "name": "Record", "fields": [ 241 | {"type": "string", "name": "first"}, 242 | {"type": "string", "name": "last"}, 243 | {"name": "meta", "type": { 244 | "name": "MetaRecord", 245 | "type": "record", 246 | "fields": [ 247 | {"type": "int", "name": "age"} 248 | ] 249 | }}]})) 250 | 251 | assert {:ok, "\bDave\nLucia@"} = 252 | @test_module.encode(schema, %{"first" => "Dave", "last" => "Lucia", "meta" => %{"age" => 32}}) 253 | 254 | assert {:ok, "\bDave\nLucia@"} = @test_module.encode(schema, %{first: "Dave", last: "Lucia", meta: %{age: 32}}) 255 | 256 | assert {:ok, "\bdave\nlucia@"} = @test_module.encode(schema, %{first: :dave, last: :lucia, meta: %{age: 32}}) 257 | end 258 | 259 | test "works as expected with default of null on union type" do 260 | {:ok, schema} = AvroEx.decode_schema(~S({"type": "record", "name": "Record", "fields": [ 261 | {"type": ["null", "string"], "name": "maybe_null", "default": null} 262 | ]})) 263 | 264 | assert {:ok, <<0>>} = @test_module.encode(schema, %{}) 265 | assert {:ok, <<2, 2, 49>>} = @test_module.encode(schema, %{"maybe_null" => "1"}) 266 | end 267 | 268 | test "works with logicalType field values" do 269 | schema = 270 | AvroEx.decode_schema!(%{ 271 | "type" => "record", 272 | "name" => "Record", 273 | "fields" => [ 274 | %{"name" => "timestamp", "type" => %{"type" => "long", "logicalType" => "timestamp-millis"}} 275 | ] 276 | }) 277 | 278 | timestamp = ~U[2022-02-23 20:28:13.498428Z] 279 | 280 | assert {:ok, <<244, 132, 169, 132, 229, 95>>} = @test_module.encode(schema, %{timestamp: timestamp}) 281 | end 282 | end 283 | 284 | describe "encode (union)" do 285 | test "works as expected with nulls" do 286 | {:ok, schema} = AvroEx.decode_schema(~S(["null", "int"])) 287 | {:ok, null_schema} = AvroEx.decode_schema(~S("null")) 288 | {:ok, int_schema} = AvroEx.decode_schema(~S("int")) 289 | 290 | {:ok, index} = @test_module.encode(int_schema, 0) 291 | {:ok, encoded_null} = @test_module.encode(null_schema, nil) 292 | {:ok, encoded_union} = @test_module.encode(schema, nil) 293 | 294 | assert encoded_union == index <> encoded_null 295 | end 296 | 297 | test "works as expected with ints" do 298 | {:ok, schema} = AvroEx.decode_schema(~S(["null", "int"])) 299 | {:ok, int_schema} = AvroEx.decode_schema(~S("int")) 300 | 301 | {:ok, index} = @test_module.encode(int_schema, 1) 302 | {:ok, encoded_int} = @test_module.encode(int_schema, 2086) 303 | {:ok, encoded_union} = @test_module.encode(schema, 2086) 304 | 305 | assert encoded_union == index <> encoded_int 306 | end 307 | 308 | test "works as expected with int and long" do 309 | {:ok, schema} = AvroEx.decode_schema(~S(["int", "long"])) 310 | {:ok, int_schema} = AvroEx.decode_schema(~S("int")) 311 | {:ok, long_schema} = AvroEx.decode_schema(~S("long")) 312 | 313 | {:ok, index} = @test_module.encode(int_schema, 1) 314 | {:ok, encoded_long} = @test_module.encode(long_schema, -3_376_656_585_598_455_353) 315 | {:ok, encoded_union} = @test_module.encode(schema, -3_376_656_585_598_455_353) 316 | 317 | assert encoded_union == index <> encoded_long 318 | end 319 | 320 | test "works as expected with float and double" do 321 | {:ok, schema} = AvroEx.decode_schema(~S(["float", "double"])) 322 | {:ok, int_schema} = AvroEx.decode_schema(~S("int")) 323 | {:ok, double_schema} = AvroEx.decode_schema(~S("double")) 324 | 325 | {:ok, index} = @test_module.encode(int_schema, 1) 326 | {:ok, encoded_long} = @test_module.encode(double_schema, 0.0000000001) 327 | {:ok, encoded_union} = @test_module.encode(schema, 0.0000000001) 328 | 329 | assert encoded_union == index <> encoded_long 330 | end 331 | 332 | test "works as expected with logical types" do 333 | datetime_json = ~S({"type": "long", "logicalType":"timestamp-millis"}) 334 | datetime_value = ~U[2020-09-17 12:56:50.438Z] 335 | 336 | {:ok, schema} = AvroEx.decode_schema(~s(["null", #{datetime_json}])) 337 | {:ok, datetime_schema} = AvroEx.decode_schema(datetime_json) 338 | 339 | {:ok, index} = @test_module.encode(datetime_schema, 1) 340 | {:ok, encoded_datetime} = @test_module.encode(datetime_schema, datetime_value) 341 | {:ok, encoded_union} = @test_module.encode(schema, datetime_value) 342 | 343 | assert encoded_union == index <> encoded_datetime 344 | end 345 | 346 | test "works as expected with records" do 347 | record_json = ~S""" 348 | { 349 | "type": "record", 350 | "name": "MyRecord", 351 | "fields": [ 352 | {"type": "int", "name": "a"}, 353 | {"type": "string", "name": "b"} 354 | ] 355 | } 356 | """ 357 | 358 | json_schema = ~s(["null", #{record_json}]) 359 | 360 | {:ok, schema} = AvroEx.decode_schema(json_schema) 361 | {:ok, int_schema} = AvroEx.decode_schema(~S("int")) 362 | {:ok, record_schema} = AvroEx.decode_schema(record_json) 363 | 364 | {:ok, index} = @test_module.encode(int_schema, 1) 365 | {:ok, encoded_record} = @test_module.encode(record_schema, %{"a" => 25, "b" => "hello"}) 366 | {:ok, encoded_union} = @test_module.encode(schema, %{"a" => 25, "b" => "hello"}) 367 | 368 | assert encoded_union == index <> encoded_record 369 | end 370 | 371 | test "works as expected with union values tagged for a named possibility" do 372 | record_json_factory = fn name -> 373 | ~s""" 374 | { 375 | "type": "record", 376 | "name": "#{name}", 377 | "fields": [ 378 | {"type": "string", "name": "value"} 379 | ] 380 | } 381 | """ 382 | end 383 | 384 | json_schema = ~s([#{record_json_factory.("a")}, #{record_json_factory.("b")}]) 385 | 386 | {:ok, schema} = AvroEx.decode_schema(json_schema) 387 | {:ok, int_schema} = AvroEx.decode_schema(~S("int")) 388 | {:ok, record_schema} = AvroEx.decode_schema(record_json_factory.("b")) 389 | 390 | {:ok, index} = @test_module.encode(int_schema, 1) 391 | {:ok, encoded_record} = @test_module.encode(record_schema, %{"value" => "hello"}) 392 | {:ok, encoded_union} = @test_module.encode(schema, {"b", %{"value" => "hello"}}) 393 | 394 | assert encoded_union == index <> encoded_record 395 | end 396 | 397 | test "errors with a clear error for tagged unions" do 398 | record_json_factory = fn name -> 399 | ~s""" 400 | { 401 | "type": "record", 402 | "name": "#{name}", 403 | "fields": [ 404 | {"type": "string", "name": "value"} 405 | ] 406 | } 407 | """ 408 | end 409 | 410 | json_schema = ~s([#{record_json_factory.("a")}, #{record_json_factory.("b")}]) 411 | 412 | {:ok, schema} = AvroEx.decode_schema(json_schema) 413 | 414 | assert {:error, 415 | %AvroEx.EncodeError{ 416 | message: 417 | "Schema Mismatch: Expected value of Union|Record>" <> 418 | ", got {\"c\", %{\"value\" => \"hello\"}}" 419 | }} = @test_module.encode(schema, {"c", %{"value" => "hello"}}) 420 | end 421 | 422 | test "errors if the data doesn't match the schema" do 423 | {:ok, schema} = AvroEx.decode_schema(~S(["null", "int"])) 424 | 425 | assert {:error, 426 | %AvroEx.EncodeError{ 427 | message: "Schema Mismatch: Expected value of Union, got \"wat\"" 428 | }} = @test_module.encode(schema, "wat") 429 | end 430 | end 431 | 432 | describe "encode (map)" do 433 | test "properly encodes the length, key-value pairs, and terminal byte" do 434 | {:ok, schema} = AvroEx.decode_schema(~S({"type": "map", "values": "int"})) 435 | assert {:ok, <<2, 12, 118, 97, 108, 117, 101, 49, 2, 0>>} = @test_module.encode(schema, %{"value1" => 1}) 436 | end 437 | 438 | test "can encode atom keys" do 439 | {:ok, schema} = AvroEx.decode_schema(~S({"type": "map", "values": "int"})) 440 | assert {:ok, <<2, 12, 118, 97, 108, 117, 101, 49, 2, 0>>} = @test_module.encode(schema, %{value1: 1}) 441 | end 442 | 443 | test "encodes an empty map" do 444 | {:ok, schema} = AvroEx.decode_schema(~S({"type": "map", "values": "int"})) 445 | assert {:ok, <<0>>} = @test_module.encode(schema, %{}) 446 | end 447 | end 448 | 449 | describe "encode (array)" do 450 | test "properly encodes an array with length, items, and terminal byte" do 451 | {:ok, schema} = AvroEx.decode_schema(~S({"type": "array", "items": "int"})) 452 | assert {:ok, <<6, 2, 4, 6, 0>>} = @test_module.encode(schema, [1, 2, 3]) 453 | end 454 | 455 | test "properly encodes an array with length, byte_size, items, and terminal byte" do 456 | {:ok, schema} = AvroEx.decode_schema(~S({"type": "array", "items": "int"})) 457 | assert {:ok, <<5, 6, 2, 4, 6, 0>>} = @test_module.encode(schema, [1, 2, 3], include_block_byte_size: true) 458 | end 459 | 460 | test "encodes an empty array" do 461 | {:ok, schema} = AvroEx.decode_schema(~S({"type": "array", "items": "int"})) 462 | assert {:ok, <<0>>} = @test_module.encode(schema, []) 463 | end 464 | end 465 | 466 | describe "encode (fixed)" do 467 | test "encodes the given value" do 468 | {:ok, schema} = AvroEx.decode_schema(~S({"type": "fixed", "name": "sha", "size": 40})) 469 | sha = binary_of_size(40) 470 | assert {:ok, encoded} = @test_module.encode(schema, sha) 471 | assert encoded == sha 472 | end 473 | 474 | test "fails if the value is too large" do 475 | {:ok, schema} = AvroEx.decode_schema(~S({"type": "fixed", "name": "sha", "size": 40})) 476 | bad_sha = binary_of_size(41) 477 | 478 | assert {:error, 479 | %AvroEx.EncodeError{ 480 | message: 481 | "Invalid size for Fixed. Size of 41 for \"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\"" 482 | }} = @test_module.encode(schema, bad_sha) 483 | end 484 | end 485 | 486 | describe "encode (enum)" do 487 | test "encodes the index of the symbol" do 488 | {:ok, enum_schema} = 489 | AvroEx.decode_schema(~S({"type": "enum", "name": "Suit", "symbols": ["heart", "spade", "diamond", "club"]})) 490 | 491 | {:ok, long_schema} = AvroEx.decode_schema(~S("long")) 492 | 493 | {:ok, heart_index} = @test_module.encode(long_schema, 0) 494 | {:ok, spade_index} = @test_module.encode(long_schema, 1) 495 | {:ok, diamond_index} = @test_module.encode(long_schema, 2) 496 | {:ok, club_index} = @test_module.encode(long_schema, 3) 497 | 498 | {:ok, heart} = @test_module.encode(enum_schema, "heart") 499 | {:ok, spade} = @test_module.encode(enum_schema, "spade") 500 | 501 | # Can handle atoms 502 | {:ok, diamond} = @test_module.encode(enum_schema, :diamond) 503 | {:ok, club} = @test_module.encode(enum_schema, :club) 504 | 505 | assert heart_index == heart 506 | assert spade_index == spade 507 | assert diamond_index == diamond 508 | assert club_index == club 509 | end 510 | end 511 | 512 | describe "EncodingError - schema mismatch" do 513 | test "(null)" do 514 | {:ok, schema} = AvroEx.decode_schema(~S("null")) 515 | 516 | assert {:error, 517 | %AvroEx.EncodeError{ 518 | message: "Schema Mismatch: Expected value of null, got :wat" 519 | }} = @test_module.encode(schema, :wat) 520 | end 521 | 522 | test "(int)" do 523 | {:ok, schema} = AvroEx.decode_schema(~S("int")) 524 | 525 | assert {:error, 526 | %AvroEx.EncodeError{ 527 | message: "Schema Mismatch: Expected value of int, got :wat" 528 | }} = @test_module.encode(schema, :wat) 529 | end 530 | 531 | test "(array)" do 532 | schema = AvroEx.decode_schema!(~S({"type": "array", "items": "int"})) 533 | 534 | assert {:error, 535 | %AvroEx.EncodeError{ 536 | message: "Schema Mismatch: Expected value of Array, got :wat" 537 | }} = @test_module.encode(schema, :wat) 538 | end 539 | 540 | test "(enum)" do 541 | schema = 542 | AvroEx.decode_schema!(~S({"type": "enum", "name": "Suit", "symbols": ["heart", "spade", "diamond", "club"]})) 543 | 544 | assert {:error, 545 | %AvroEx.EncodeError{ 546 | message: "Schema Mismatch: Expected value of Enum, got 12345" 547 | }} = @test_module.encode(schema, 12_345) 548 | end 549 | 550 | test "(fixed)" do 551 | schema = AvroEx.decode_schema!(~S({"type": "fixed", "name": "sha", "size": 40})) 552 | 553 | assert {:error, 554 | %AvroEx.EncodeError{ 555 | message: "Schema Mismatch: Expected value of Fixed, got 12345" 556 | }} = @test_module.encode(schema, 12_345) 557 | end 558 | 559 | test "(map)" do 560 | schema = AvroEx.decode_schema!(~S({"type": "map", "values": "int"})) 561 | 562 | assert {:error, 563 | %AvroEx.EncodeError{ 564 | message: "Schema Mismatch: Expected value of Map, got 12345" 565 | }} = @test_module.encode(schema, 12_345) 566 | end 567 | 568 | test "(record)" do 569 | assert schema = 570 | AvroEx.decode_schema!(~S({"type": "record", "namespace": "beam.community", "name": "Name", "fields": [ 571 | {"type": "string", "name": "first"}, 572 | {"type": "string", "name": "last"} 573 | ]})) 574 | 575 | assert {:error, 576 | %AvroEx.EncodeError{ 577 | message: "Schema Mismatch: Expected value of Record, got :wat" 578 | }} = @test_module.encode(schema, :wat) 579 | 580 | assert {:error, 581 | %AvroEx.EncodeError{ 582 | message: "Schema Mismatch: Expected value of string, got nil" 583 | }} = @test_module.encode(schema, %{}) 584 | end 585 | 586 | test "(reference)" do 587 | assert schema = 588 | AvroEx.decode_schema!(%{ 589 | "type" => "record", 590 | "namespace" => "beam.community", 591 | "name" => "Name", 592 | "fields" => [ 593 | %{ 594 | "name" => "first_name", 595 | "type" => %{ 596 | "type" => "record", 597 | "name" => "DefinedRecord", 598 | "fields" => [ 599 | %{ 600 | "type" => "string", 601 | "name" => "full" 602 | } 603 | ] 604 | } 605 | }, 606 | %{ 607 | "type" => "beam.community.DefinedRecord", 608 | "name" => "last_name" 609 | } 610 | ] 611 | }) 612 | 613 | assert {:error, 614 | %AvroEx.EncodeError{ 615 | message: "Schema Mismatch: Expected value of Record, got :wat" 616 | }} = @test_module.encode(schema, %{first_name: %{full: "foo"}, last_name: :wat}) 617 | 618 | assert {:error, 619 | %AvroEx.EncodeError{ 620 | message: "Schema Mismatch: Expected value of Record, got nil" 621 | }} = @test_module.encode(schema, %{}) 622 | end 623 | 624 | test "(reference with union)" do 625 | assert schema = 626 | AvroEx.decode_schema!(%{ 627 | "type" => "record", 628 | "namespace" => "beam.community", 629 | "name" => "Name", 630 | "fields" => [ 631 | %{ 632 | "name" => "first_name", 633 | "type" => %{ 634 | "type" => "record", 635 | "name" => "DefinedRecord", 636 | "fields" => [ 637 | %{ 638 | "type" => "string", 639 | "name" => "full" 640 | } 641 | ] 642 | } 643 | }, 644 | %{ 645 | "type" => ["null", "beam.community.DefinedRecord"], 646 | "name" => "last_name" 647 | } 648 | ] 649 | }) 650 | 651 | assert {:error, 652 | %AvroEx.EncodeError{ 653 | message: 654 | "Schema Mismatch: Expected value of Union>, got :wat" 655 | }} = @test_module.encode(schema, %{first_name: %{full: "foo"}, last_name: :wat}) 656 | end 657 | 658 | test "(union)" do 659 | schema = AvroEx.decode_schema!(~S(["null", "string"])) 660 | 661 | assert {:error, 662 | %AvroEx.EncodeError{ 663 | message: "Schema Mismatch: Expected value of Union, got 12345" 664 | }} = @test_module.encode(schema, 12_345) 665 | end 666 | end 667 | 668 | describe "EncodingError - Invalid Fixed size" do 669 | test "(fixed)" do 670 | schema = AvroEx.decode_schema!(~S({"type": "fixed", "name": "sha", "size": 40})) 671 | bad_sha = binary_of_size(39) 672 | 673 | assert {:error, 674 | %AvroEx.EncodeError{ 675 | message: 676 | "Invalid size for Fixed. Size of 39 for \"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\"" 677 | }} = @test_module.encode(schema, bad_sha) 678 | end 679 | end 680 | 681 | describe "EncodingError - Invalid Symbol" do 682 | test "(enum)" do 683 | schema = 684 | AvroEx.decode_schema!(~S({"type": "enum", "name": "Suit", "symbols": ["heart", "spade", "diamond", "club"]})) 685 | 686 | assert {:error, 687 | %AvroEx.EncodeError{ 688 | message: 689 | "Invalid symbol for Enum. Expected value in [\"heart\", \"spade\", \"diamond\", \"club\"], got \"joker\"" 690 | }} = @test_module.encode(schema, "joker") 691 | end 692 | end 693 | 694 | describe "EncodingError - Invalid string" do 695 | test "(fixed)" do 696 | schema = AvroEx.decode_schema!(~S("string")) 697 | 698 | assert {:error, 699 | %AvroEx.EncodeError{ 700 | message: "Invalid string \"<<255, 255>>\"" 701 | }} = @test_module.encode(schema, <<0xFFFF::16>>) 702 | end 703 | end 704 | 705 | @spec binary_of_size(integer, binary) :: binary 706 | def binary_of_size(size, bin \\ "") 707 | def binary_of_size(0, bin), do: bin 708 | def binary_of_size(size, bin), do: binary_of_size(size - 1, bin <> "a") 709 | end 710 | -------------------------------------------------------------------------------- /test/fixtures/decimal.avro: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/beam-community/avro_ex/aaba741ef9558fb9e7893b034b7b25125bca8007/test/fixtures/decimal.avro -------------------------------------------------------------------------------- /test/fixtures/decimal.avsc: -------------------------------------------------------------------------------- 1 | { 2 | "namespace": "example.avro", 3 | "type": "record", 4 | "name": "decimalContainer", 5 | "fields": [ 6 | { 7 | "name": "decimalField1", 8 | "type": { 9 | "type": "bytes", 10 | "scale": 15, 11 | "precision": 11, 12 | "logicalType": "decimal" 13 | } 14 | }, 15 | { 16 | "name": "decimalField2", 17 | "type": { 18 | "type": "bytes", 19 | "scale": 46, 20 | "precision": 46, 21 | "logicalType": "decimal" 22 | } 23 | }, 24 | { 25 | "name": "decimalField3", 26 | "type": { 27 | "type": "bytes", 28 | "scale": 1, 29 | "precision": 46, 30 | "logicalType": "decimal" 31 | } 32 | }, 33 | { 34 | "name": "decimalField4", 35 | "type": { 36 | "type": "bytes", 37 | "scale": 12, 38 | "precision": 46, 39 | "logicalType": "decimal" 40 | } 41 | } 42 | ] 43 | } 44 | -------------------------------------------------------------------------------- /test/property_test.exs: -------------------------------------------------------------------------------- 1 | defmodule AvroEx.PropertyTest do 2 | use ExUnit.Case, async: true 3 | use ExUnitProperties 4 | 5 | property "encode -> decode always returns back the initial data for the same schema" do 6 | check all schema <- schema(), 7 | data <- valid_data(schema), 8 | opts <- opts(), 9 | initial_size: 10 do 10 | json = Jason.encode!(schema, opts) 11 | {:ok, schema} = AvroEx.decode_schema(json) 12 | {:ok, encoded} = AvroEx.encode(schema, data) 13 | assert {:ok, ^data} = AvroEx.decode(schema, encoded) 14 | end 15 | end 16 | 17 | @spec opts() :: StreamData.t() 18 | def opts do 19 | frequency([ 20 | {4, constant([])}, 21 | {1, encoding_options()} 22 | ]) 23 | end 24 | 25 | defp encoding_options do 26 | uniq_list_of(member_of(include_block_byte_size: true), max_length: 1) 27 | end 28 | 29 | @spec schema() :: StreamData.t() 30 | def schema do 31 | sized(fn size -> schema_gen(size) end) 32 | end 33 | 34 | defp schema_gen(0), do: primitive() 35 | 36 | defp schema_gen(size) do 37 | frequency([ 38 | {4, primitive()}, 39 | {1, complex(size)} 40 | ]) 41 | end 42 | 43 | defp primitive do 44 | member_of([ 45 | "null", 46 | "boolean", 47 | "int", 48 | "long", 49 | "float", 50 | "double", 51 | "bytes", 52 | "string" 53 | ]) 54 | end 55 | 56 | defp complex(size) do 57 | one_of([ 58 | array(size), 59 | map(size), 60 | union(size) 61 | ]) 62 | end 63 | 64 | defp array(size) do 65 | gen all schema <- resize(schema(), div(size, 2)) do 66 | %{ 67 | type: "array", 68 | items: schema 69 | } 70 | end 71 | end 72 | 73 | defp map(size) do 74 | gen all schema <- resize(schema(), div(size, 2)) do 75 | %{ 76 | type: "map", 77 | values: schema 78 | } 79 | end 80 | end 81 | 82 | defp union(size) do 83 | gen all list <- 84 | schema() 85 | |> resize(div(size, 4)) 86 | |> filter(fn schema -> not is_list(schema) end) 87 | |> list_of(min_length: 1, max_length: 8) do 88 | Enum.uniq_by(list, fn 89 | %{type: _type, name: name} -> name 90 | %{type: type} -> type 91 | value -> value 92 | end) 93 | end 94 | end 95 | 96 | defp valid_data("null"), do: constant(nil) 97 | defp valid_data("boolean"), do: boolean() 98 | defp valid_data("int"), do: integer(-2_147_483_648..2_147_483_647) 99 | defp valid_data("long"), do: integer(-9_223_372_036_854_775_808..9_223_372_036_854_775_807) 100 | 101 | defp valid_data("float") do 102 | gen all float <- float(), 103 | match?(<<_float::big-float-size(32)>>, <>) do 104 | <> = <> 105 | float 106 | end 107 | end 108 | 109 | defp valid_data("double"), do: float() 110 | defp valid_data("bytes"), do: binary(min_length: 1) 111 | defp valid_data("string"), do: string(:printable) 112 | defp valid_data(%{type: "array", items: schema}), do: list_of(valid_data(schema)) 113 | defp valid_data(%{type: "map", values: schema}), do: map_of(valid_data("string"), valid_data(schema)) 114 | 115 | defp valid_data(union) when is_list(union) do 116 | union 117 | |> member_of() 118 | |> bind(&valid_data/1) 119 | end 120 | end 121 | -------------------------------------------------------------------------------- /test/schema_encoder_test.exs: -------------------------------------------------------------------------------- 1 | defmodule AvroEx.Schema.EncoderTest do 2 | use ExUnit.Case, async: true 3 | 4 | describe "encode/2" do 5 | test "primitive" do 6 | input = "int" 7 | 8 | assert schema = AvroEx.decode_schema!(input) 9 | assert AvroEx.encode_schema(schema) == ~S({"type":"int"}) 10 | end 11 | 12 | test "logical types" do 13 | input = %{"type" => "int", "logicalType" => "date"} 14 | 15 | assert schema = AvroEx.decode_schema!(input) 16 | assert AvroEx.encode_schema(schema) == ~S({"type":"int","logicalType":"date"}) 17 | end 18 | 19 | test "enum" do 20 | input = %{"type" => "enum", "symbols" => ["a"], "name" => "cool"} 21 | 22 | assert schema = AvroEx.decode_schema!(input) 23 | assert AvroEx.encode_schema(schema) == ~S({"name":"cool","symbols":["a"],"type":"enum"}) 24 | 25 | all = %{ 26 | "type" => "enum", 27 | "symbols" => ["a"], 28 | "name" => "cool", 29 | "aliases" => ["alias"], 30 | "doc" => "docs", 31 | "extra" => "val", 32 | "namespace" => "namespace" 33 | } 34 | 35 | assert schema = AvroEx.decode_schema!(all) 36 | 37 | assert AvroEx.encode_schema(schema) == 38 | ~S({"aliases":["alias"],"doc":"docs","name":"cool","namespace":"namespace","symbols":["a"],"type":"enum","extra":"val"}) 39 | end 40 | 41 | test "map" do 42 | # primitive map 43 | input = %{"type" => "map", "values" => "int"} 44 | 45 | assert schema = AvroEx.decode_schema!(input) 46 | assert AvroEx.encode_schema(schema) == ~S({"type":"map","values":{"type":"int"}}) 47 | 48 | # complex map 49 | complex = %{"type" => "map", "values" => ["null", "int"]} 50 | 51 | assert schema = AvroEx.decode_schema!(complex) 52 | assert AvroEx.encode_schema(schema) == ~S({"type":"map","values":[{"type":"null"},{"type":"int"}]}) 53 | 54 | all = %{"type" => "map", "values" => "int", "default" => %{"a" => 1}, "extra" => "val"} 55 | 56 | assert schema = AvroEx.decode_schema!(all) 57 | assert AvroEx.encode_schema(schema) == ~S({"default":{"a":1},"type":"map","values":{"type":"int"},"extra":"val"}) 58 | end 59 | 60 | test "record" do 61 | # primitive record 62 | input = %{"type" => "record", "name" => "test", "fields" => [%{"name" => "a", "type" => "string"}]} 63 | 64 | assert schema = AvroEx.decode_schema!(input) 65 | 66 | assert AvroEx.encode_schema(schema) == 67 | "{\"fields\":[{\"name\":\"a\",\"type\":{\"type\":\"string\"}}],\"name\":\"test\",\"type\":\"record\"}" 68 | 69 | # Complex record 70 | complex = %{"type" => "record", "name" => "test", "fields" => [%{"name" => "a", "type" => ["int", "string"]}]} 71 | 72 | assert schema = AvroEx.decode_schema!(complex) 73 | 74 | assert AvroEx.encode_schema(schema) == 75 | "{\"fields\":[{\"name\":\"a\",\"type\":[{\"type\":\"int\"},{\"type\":\"string\"}]}],\"name\":\"test\",\"type\":\"record\"}" 76 | 77 | all = %{ 78 | "type" => "record", 79 | "name" => "all", 80 | "namespace" => "beam.community", 81 | "doc" => "docs!", 82 | "aliases" => ["a_map"], 83 | "extra" => "val", 84 | "fields" => [ 85 | %{ 86 | "name" => "one", 87 | "type" => "int", 88 | "doc" => "field", 89 | "default" => 1, 90 | "aliases" => ["first"], 91 | "meta" => "meta" 92 | } 93 | ] 94 | } 95 | 96 | assert schema = AvroEx.decode_schema!(all) 97 | 98 | assert AvroEx.encode_schema(schema) == 99 | "{\"aliases\":[\"a_map\"],\"doc\":\"docs!\",\"fields\":[{\"aliases\":[\"first\"],\"default\":1,\"doc\":\"field\",\"name\":\"one\",\"type\":{\"type\":\"int\"},\"meta\":\"meta\"}],\"name\":\"all\",\"namespace\":\"beam.community\",\"type\":\"record\",\"extra\":\"val\"}" 100 | end 101 | 102 | test "array" do 103 | # primitive array 104 | input = %{"type" => "array", "items" => "int"} 105 | 106 | assert schema = AvroEx.decode_schema!(input) 107 | assert AvroEx.encode_schema(schema) == ~S({"items":{"type":"int"},"type":"array"}) 108 | 109 | all = %{"type" => "array", "items" => "int", "default" => [1, 2, 3]} 110 | 111 | assert schema = AvroEx.decode_schema!(all) 112 | assert AvroEx.encode_schema(schema) == ~S({"default":[1,2,3],"items":{"type":"int"},"type":"array"}) 113 | end 114 | 115 | test "fixed" do 116 | # primitive fixed 117 | input = %{"type" => "fixed", "name" => "double", "size" => 2} 118 | 119 | assert schema = AvroEx.decode_schema!(input) 120 | assert AvroEx.encode_schema(schema) == ~S({"name":"double","size":2,"type":"fixed"}) 121 | 122 | all = %{ 123 | "type" => "fixed", 124 | "name" => "double", 125 | "namespace" => "beam.community", 126 | "aliases" => ["two"], 127 | "doc" => "docs", 128 | "size" => 2, 129 | "extra" => "val" 130 | } 131 | 132 | assert schema = AvroEx.decode_schema!(all) 133 | 134 | assert AvroEx.encode_schema(schema) == 135 | "{\"aliases\":[\"two\"],\"doc\":\"docs\",\"name\":\"double\",\"namespace\":\"beam.community\",\"size\":2,\"type\":\"fixed\",\"extra\":\"val\"}" 136 | end 137 | 138 | test "reference" do 139 | input = %{ 140 | "type" => "record", 141 | "name" => "LinkedList", 142 | "fields" => [ 143 | %{"name" => "value", "type" => "int"}, 144 | %{"name" => "next", "type" => ["null", "LinkedList"]} 145 | ] 146 | } 147 | 148 | assert schema = AvroEx.decode_schema!(input) 149 | 150 | assert AvroEx.encode_schema(schema) == 151 | "{\"fields\":[{\"name\":\"value\",\"type\":{\"type\":\"int\"}},{\"name\":\"next\",\"type\":[{\"type\":\"null\"},\"LinkedList\"]}],\"name\":\"LinkedList\",\"type\":\"record\"}" 152 | end 153 | 154 | test "union" do 155 | input = ["null", "int"] 156 | assert schema = AvroEx.decode_schema!(input) 157 | assert AvroEx.encode_schema(schema) == ~S([{"type":"null"},{"type":"int"}]) 158 | end 159 | 160 | test "complex" do 161 | input = %{ 162 | "type" => "record", 163 | "name" => "complex", 164 | "fields" => [ 165 | %{"name" => "a", "type" => ["null", %{"type" => "fixed", "name" => "double", "size" => 2}]}, 166 | %{"name" => "b", "type" => %{"type" => "map", "values" => "string"}} 167 | ] 168 | } 169 | 170 | assert schema = AvroEx.decode_schema!(input) 171 | 172 | assert AvroEx.encode_schema(schema) == 173 | "{\"fields\":[{\"name\":\"a\",\"type\":[{\"type\":\"null\"},{\"name\":\"double\",\"size\":2,\"type\":\"fixed\"}]},{\"name\":\"b\",\"type\":{\"type\":\"map\",\"values\":{\"type\":\"string\"}}}],\"name\":\"complex\",\"type\":\"record\"}" 174 | end 175 | end 176 | 177 | describe "canonical encoding" do 178 | test "it collapses primitives" do 179 | input = %{"type" => "int", "logicalType" => "date"} 180 | 181 | assert schema = AvroEx.decode_schema!(input) 182 | assert AvroEx.encode_schema(schema, canonical: true) == ~S("int") 183 | end 184 | 185 | test "it replaces names with full names, drops namespace" do 186 | input = %{ 187 | "type" => "record", 188 | "name" => "MyRecord", 189 | "namespace" => "beam.community", 190 | "fields" => [ 191 | %{"name" => "a", "type" => %{"name" => "MyFixed", "type" => "fixed", "size" => 10}}, 192 | %{ 193 | "name" => "b", 194 | "type" => %{"name" => "MyEnum", "type" => "enum", "namespace" => "java.community", "symbols" => ["one"]} 195 | } 196 | ] 197 | } 198 | 199 | assert schema = AvroEx.decode_schema!(input) 200 | 201 | assert AvroEx.encode_schema(schema, canonical: true) == 202 | ~S({"name":"beam.community.MyRecord","type":"record","fields":[{"name":"a","type":{"name":"beam.community.MyFixed","type":"fixed","size":10}},{"name":"b","type":{"name":"java.community.MyEnum","type":"enum","symbols":["one"]}}]}) 203 | end 204 | 205 | test "the order fields is name, type, fields, symbols, items, values, size" do 206 | input = %{ 207 | "type" => "record", 208 | "name" => "MyRecord", 209 | "namespace" => "beam.community", 210 | "fields" => [ 211 | %{"name" => "a", "type" => %{"name" => "MyFixed", "type" => "fixed", "size" => 10}}, 212 | %{ 213 | "name" => "b", 214 | "type" => %{"name" => "MyEnum", "type" => "enum", "namespace" => "java.community", "symbols" => ["one"]} 215 | }, 216 | %{"name" => "c", "type" => %{"type" => "map", "values" => "int"}}, 217 | %{"name" => "d", "type" => %{"type" => "array", "items" => "int"}}, 218 | %{"name" => "e", "type" => "int"} 219 | ] 220 | } 221 | 222 | assert schema = AvroEx.decode_schema!(input, strict: true) 223 | 224 | assert AvroEx.encode_schema(schema, canonical: true) == 225 | "{\"name\":\"beam.community.MyRecord\",\"type\":\"record\",\"fields\":[{\"name\":\"a\",\"type\":{\"name\":\"beam.community.MyFixed\",\"type\":\"fixed\",\"size\":10}},{\"name\":\"b\",\"type\":{\"name\":\"java.community.MyEnum\",\"type\":\"enum\",\"symbols\":[\"one\"]}},{\"name\":\"c\",\"type\":{\"type\":\"map\",\"values\":\"int\"}},{\"name\":\"d\",\"type\":{\"type\":\"array\",\"items\":\"int\"}},{\"name\":\"e\",\"type\":\"int\"}]}" 226 | end 227 | end 228 | end 229 | -------------------------------------------------------------------------------- /test/schema_parser_test.exs: -------------------------------------------------------------------------------- 1 | defmodule AvroEx.Schema.ParserTest do 2 | use ExUnit.Case, async: true 3 | 4 | alias AvroEx.{Schema} 5 | alias AvroEx.Schema.{Array, Context, Fixed, Parser, Primitive, Record, Reference, Union} 6 | alias AvroEx.Schema.Enum, as: AvroEnum 7 | alias AvroEx.Schema.Map, as: AvroMap 8 | 9 | describe "primitives" do 10 | test "it can parse primitives" do 11 | for p <- Parser.primitives() do 12 | p_string = to_string(p) 13 | assert %Schema{schema: schema, context: context} = Parser.parse!(p_string) 14 | 15 | assert %Primitive{type: ^p} = schema 16 | assert context == %Context{names: %{}} 17 | end 18 | end 19 | 20 | test "it can parse complex primitives" do 21 | for p <- Parser.primitives() do 22 | p_string = to_string(p) 23 | assert %Schema{schema: schema, context: context} = Parser.parse!(%{"type" => p_string}) 24 | 25 | assert %Primitive{type: ^p} = schema 26 | assert context == %Context{names: %{}} 27 | end 28 | end 29 | 30 | test "it can parse complex primitives with additional fields" do 31 | for p <- Parser.primitives() do 32 | p_string = to_string(p) 33 | 34 | assert %Schema{schema: schema, context: context} = 35 | Parser.parse!(%{ 36 | "type" => p_string, 37 | "a" => 1, 38 | "logicalType" => "timestamp-millis", 39 | "name" => "complex" 40 | }) 41 | 42 | assert %Primitive{ 43 | type: ^p, 44 | metadata: %{"a" => 1, "logicalType" => "timestamp-millis", "name" => "complex"} 45 | } = schema 46 | 47 | assert context == %Context{names: %{}} 48 | end 49 | end 50 | 51 | test "invalid primitives raise a DecodeError" do 52 | assert_raise AvroEx.Schema.DecodeError, 53 | "Invalid schema format \"nope\"", 54 | fn -> 55 | Parser.parse!("nope") 56 | end 57 | 58 | message = "Invalid schema format %{\"type\" => \"nada\"}" 59 | 60 | assert_raise AvroEx.Schema.DecodeError, message, fn -> 61 | Parser.parse!(%{"type" => "nada"}) 62 | end 63 | end 64 | end 65 | 66 | describe "records" do 67 | test "can decode simple records" do 68 | assert %Schema{schema: schema, context: context} = 69 | Parser.parse!(%{ 70 | "type" => "record", 71 | "name" => "kyc", 72 | "aliases" => ["first_last"], 73 | "namespace" => "beam.community", 74 | "fields" => [ 75 | %{"name" => "first", "type" => "string", "default" => "bob", "extra" => "val"}, 76 | %{"name" => "last", "type" => "string"} 77 | ] 78 | }) 79 | 80 | assert schema == %Record{ 81 | name: "kyc", 82 | namespace: "beam.community", 83 | aliases: ["first_last"], 84 | fields: [ 85 | %Record.Field{ 86 | name: "first", 87 | type: %Primitive{type: :string}, 88 | default: "bob", 89 | metadata: %{"extra" => "val"} 90 | }, 91 | %Record.Field{name: "last", type: %Primitive{type: :string}} 92 | ] 93 | } 94 | 95 | assert context == %Context{ 96 | names: %{ 97 | "beam.community.first_last" => schema, 98 | "beam.community.kyc" => schema 99 | } 100 | } 101 | end 102 | 103 | test "can have fields that are logicalTypes" do 104 | assert %Schema{schema: schema, context: context} = 105 | Parser.parse!(%{ 106 | "type" => "record", 107 | "name" => "analytics", 108 | "fields" => [ 109 | %{ 110 | "name" => "timestamp", 111 | "type" => %{"type" => "string", "logicalType" => "timestamp-millis"} 112 | } 113 | ] 114 | }) 115 | 116 | assert schema == %Record{ 117 | name: "analytics", 118 | fields: [ 119 | %Record.Field{ 120 | name: "timestamp", 121 | type: %Primitive{type: :string, metadata: %{"logicalType" => "timestamp-millis"}} 122 | } 123 | ] 124 | } 125 | 126 | assert context == %Context{names: %{"analytics" => schema}} 127 | end 128 | 129 | test "fields defaults must be valid" do 130 | message = "Invalid default in Field Schema Mismatch: Expected value of long, got \"wrong\"" 131 | 132 | assert_raise AvroEx.Schema.DecodeError, message, fn -> 133 | Parser.parse!(%{ 134 | "type" => "record", 135 | "name" => "bad_default", 136 | "fields" => [ 137 | %{"name" => "key", "type" => "long", "default" => "wrong"} 138 | ] 139 | }) 140 | end 141 | end 142 | 143 | test "creating a record without a name will raise" do 144 | message = 145 | "Schema missing required key `name` for AvroEx.Schema.Record in %{\"fields\" => [%{\"name\" => \"key\", \"type\" => \"long\"}], \"type\" => \"record\"}" 146 | 147 | assert_raise AvroEx.Schema.DecodeError, message, fn -> 148 | Parser.parse!(%{ 149 | "type" => "record", 150 | "fields" => [ 151 | %{"name" => "key", "type" => "long"} 152 | ] 153 | }) 154 | end 155 | end 156 | 157 | test "names must be valid" do 158 | message = 159 | "Invalid name `123` for `name` in %{\"fields\" => [%{\"name\" => \"key\", \"type\" => \"long\"}], \"name\" => \"123\", \"type\" => \"record\"}" 160 | 161 | assert_raise AvroEx.Schema.DecodeError, message, fn -> 162 | Parser.parse!(%{ 163 | "type" => "record", 164 | "name" => "123", 165 | "fields" => [ 166 | %{"name" => "key", "type" => "long"} 167 | ] 168 | }) 169 | end 170 | end 171 | 172 | test "field names must be unique" do 173 | message = "Duplicate name `key` found in Record" 174 | 175 | assert_raise AvroEx.Schema.DecodeError, message, fn -> 176 | Parser.parse!(%{ 177 | "type" => "record", 178 | "name" => "duplicate_names", 179 | "fields" => [ 180 | %{"name" => "key", "type" => "long"}, 181 | %{"name" => "key", "type" => "long"} 182 | ] 183 | }) 184 | end 185 | end 186 | 187 | test "namespace must be valid" do 188 | message = 189 | "Invalid name `1invalid` for `namespace` in %{\"fields\" => [%{\"name\" => \"key\", \"type\" => \"long\"}], \"name\" => \"valid\", \"namespace\" => \"1invalid\", \"type\" => \"record\"}" 190 | 191 | assert_raise AvroEx.Schema.DecodeError, message, fn -> 192 | Parser.parse!(%{ 193 | "type" => "record", 194 | "name" => "valid", 195 | "namespace" => "1invalid", 196 | "fields" => [ 197 | %{"name" => "key", "type" => "long"} 198 | ] 199 | }) 200 | end 201 | end 202 | 203 | test "namespace is valid if it's empty" do 204 | assert {:ok, _schema} = AvroEx.decode_schema(~S({ 205 | "type": "record", 206 | "name": "something", 207 | "namespace": "", 208 | "fields" : [{"name": "count", "type": {"type": "string"}}] 209 | })) 210 | end 211 | end 212 | 213 | describe "unions" do 214 | test "it can decode simple unions" do 215 | assert %Schema{schema: schema, context: context} = Parser.parse!(["null", "string"]) 216 | 217 | assert schema == %Union{ 218 | possibilities: [ 219 | %Primitive{type: :null}, 220 | %Primitive{type: :string} 221 | ] 222 | } 223 | 224 | assert context == %Context{} 225 | end 226 | 227 | test "cannot have duplicated unnamed types" do 228 | message = "Union contains duplicated string in [\"string\", \"int\", \"string\"]" 229 | 230 | assert_raise AvroEx.Schema.DecodeError, message, fn -> 231 | Parser.parse!(["string", "int", "string"]) 232 | end 233 | end 234 | 235 | test "can contain duplicated types if they are named" do 236 | assert %Schema{schema: schema, context: context} = 237 | Parser.parse!([ 238 | %{"type" => "enum", "name" => "directions", "symbols" => ["east", "north", "south", "west"]}, 239 | %{"type" => "enum", "name" => "primary_colors", "symbols" => ["blue", "red", "yellow"]} 240 | ]) 241 | 242 | assert schema == %Union{ 243 | possibilities: [ 244 | %AvroEnum{name: "directions", symbols: ["east", "north", "south", "west"]}, 245 | %AvroEnum{name: "primary_colors", symbols: ["blue", "red", "yellow"]} 246 | ] 247 | } 248 | 249 | assert context == %Context{ 250 | names: %{ 251 | "directions" => %AvroEx.Schema.Enum{ 252 | name: "directions", 253 | symbols: ["east", "north", "south", "west"] 254 | }, 255 | "primary_colors" => %AvroEx.Schema.Enum{ 256 | name: "primary_colors", 257 | symbols: ["blue", "red", "yellow"] 258 | } 259 | } 260 | } 261 | end 262 | 263 | test "it can have children that are arrays" do 264 | assert %Schema{schema: schema, context: context} = 265 | Parser.parse!([ 266 | "null", 267 | %{"type" => "array", "items" => "int"} 268 | ]) 269 | 270 | assert schema == %Union{ 271 | possibilities: [ 272 | %Primitive{type: :null}, 273 | %Array{items: %Primitive{type: :int}} 274 | ] 275 | } 276 | 277 | assert context == %Context{} 278 | end 279 | 280 | test "it cannot have multiple array children" do 281 | message = 282 | "Union contains duplicated Array in [%{\"items\" => \"int\", \"type\" => \"array\"}, %{\"items\" => \"string\", \"type\" => \"array\"}]" 283 | 284 | assert_raise AvroEx.Schema.DecodeError, message, fn -> 285 | Parser.parse!([ 286 | %{"type" => "array", "items" => "int"}, 287 | %{"type" => "array", "items" => "string"} 288 | ]) 289 | end 290 | end 291 | 292 | test "cannot have duplicated named types" do 293 | message = 294 | "Union contains duplicated Enum in [%{\"name\" => \"directions\", \"symbols\" => [\"east\", \"north\", \"south\", \"west\"], \"type\" => \"enum\"}, %{\"name\" => \"directions\", \"symbols\" => [\"blue\", \"red\", \"yellow\"], \"type\" => \"enum\"}]" 295 | 296 | assert_raise AvroEx.Schema.DecodeError, message, fn -> 297 | Parser.parse!([ 298 | %{"type" => "enum", "name" => "directions", "symbols" => ["east", "north", "south", "west"]}, 299 | %{"type" => "enum", "name" => "directions", "symbols" => ["blue", "red", "yellow"]} 300 | ]) 301 | end 302 | end 303 | 304 | test "cannot be named at the top-level" do 305 | message = "Invalid schema format %{\"name\" => \"maybe_null\", \"type\" => [\"null\", \"string\"]}" 306 | 307 | assert_raise AvroEx.Schema.DecodeError, message, fn -> 308 | Parser.parse!(%{"name" => "maybe_null", "type" => ["null", "string"]}) 309 | end 310 | end 311 | 312 | test "cannot have unions as direct children" do 313 | message = 314 | "Union contains nested union Union as immediate child in [\"string\", [\"null\", \"string\"]]" 315 | 316 | assert_raise AvroEx.Schema.DecodeError, message, fn -> 317 | Parser.parse!(["string", ["null", "string"]]) 318 | end 319 | end 320 | end 321 | 322 | describe "enums" do 323 | test "can parse a basic enum" do 324 | assert %Schema{schema: schema, context: context} = 325 | Parser.parse!(%{ 326 | "type" => "enum", 327 | "name" => "directions", 328 | "namespace" => "beam.community", 329 | "extra" => "val", 330 | "symbols" => ["east", "north", "south", "west"] 331 | }) 332 | 333 | assert schema == %AvroEnum{ 334 | name: "directions", 335 | namespace: "beam.community", 336 | symbols: ["east", "north", "south", "west"], 337 | metadata: %{"extra" => "val"} 338 | } 339 | 340 | assert context == %Context{names: %{"beam.community.directions" => schema}} 341 | end 342 | 343 | test "cannot have duplicate symbols" do 344 | message = 345 | "Enum contains duplicated symbol `yes` in %{\"name\" => \"duplicate\", \"symbols\" => [\"yes\", \"no\", \"yes\"], \"type\" => \"enum\"}" 346 | 347 | assert_raise AvroEx.Schema.DecodeError, message, fn -> 348 | Parser.parse!(%{ 349 | "type" => "enum", 350 | "name" => "duplicate", 351 | "symbols" => ["yes", "no", "yes"] 352 | }) 353 | end 354 | end 355 | 356 | test "must have a valid name" do 357 | message = 358 | "Invalid name `bang!` for `name` in %{\"name\" => \"bang!\", \"symbols\" => [\"one\"], \"type\" => \"enum\"}" 359 | 360 | assert_raise AvroEx.Schema.DecodeError, message, fn -> 361 | Parser.parse!(%{ 362 | "type" => "enum", 363 | "name" => "bang!", 364 | "symbols" => ["one"] 365 | }) 366 | end 367 | end 368 | 369 | test "must have a valid namespace" do 370 | message = 371 | "Invalid name `.namespace` for `namespace` in %{\"name\" => \"name\", \"namespace\" => \".namespace\", \"symbols\" => [\"one\"], \"type\" => \"enum\"}" 372 | 373 | assert_raise AvroEx.Schema.DecodeError, message, fn -> 374 | Parser.parse!(%{ 375 | "type" => "enum", 376 | "name" => "name", 377 | "namespace" => ".namespace", 378 | "symbols" => ["one"] 379 | }) 380 | end 381 | end 382 | 383 | test "symbols must by alphanumberic or underscores, and not start with a number" do 384 | message_1 = 385 | "Invalid name `1` for `symbols` in %{\"name\" => \"non_string\", \"symbols\" => [1], \"type\" => \"enum\"}" 386 | 387 | assert_raise AvroEx.Schema.DecodeError, message_1, fn -> 388 | Parser.parse!(%{ 389 | "type" => "enum", 390 | "name" => "non_string", 391 | "symbols" => [1] 392 | }) 393 | end 394 | 395 | message_2 = 396 | "Invalid name `abcABC!` for `symbols` in %{\"name\" => \"bad_name_1\", \"symbols\" => [\"abcABC!\"], \"type\" => \"enum\"}" 397 | 398 | assert_raise AvroEx.Schema.DecodeError, message_2, fn -> 399 | Parser.parse!(%{ 400 | "type" => "enum", 401 | "name" => "bad_name_1", 402 | "symbols" => ["abcABC!"] 403 | }) 404 | end 405 | 406 | message_3 = 407 | "Invalid name `1a` for `symbols` in %{\"name\" => \"bad_name_2\", \"symbols\" => [\"1a\"], \"type\" => \"enum\"}" 408 | 409 | assert_raise AvroEx.Schema.DecodeError, message_3, fn -> 410 | Parser.parse!(%{ 411 | "type" => "enum", 412 | "name" => "bad_name_2", 413 | "symbols" => ["1a"] 414 | }) 415 | end 416 | end 417 | end 418 | 419 | describe "arrays" do 420 | test "can parse basic arrays" do 421 | assert %Schema{schema: schema, context: context} = 422 | Parser.parse!(%{ 423 | "type" => "array", 424 | "items" => "string" 425 | }) 426 | 427 | assert schema == %Array{items: %Primitive{type: :string}, default: []} 428 | assert context == %Context{} 429 | end 430 | 431 | test "can have defaults" do 432 | assert %Schema{schema: schema, context: context} = 433 | Parser.parse!(%{ 434 | "type" => "array", 435 | "items" => "int", 436 | "default" => [1, 2, 3] 437 | }) 438 | 439 | assert schema == %Array{items: %Primitive{type: :int}, default: [1, 2, 3]} 440 | assert context == %Context{} 441 | end 442 | 443 | test "default must be a valid array of that type" do 444 | message_1 = "Invalid default in Array Schema Mismatch: Expected value of int, got \"one\"" 445 | 446 | assert_raise AvroEx.Schema.DecodeError, message_1, fn -> 447 | Parser.parse!(%{ 448 | "type" => "array", 449 | "items" => "int", 450 | "default" => ["one", "two", "three"] 451 | }) 452 | end 453 | 454 | message_2 = "Invalid default in Array Schema Mismatch: Expected value of Array, got 1" 455 | 456 | assert_raise AvroEx.Schema.DecodeError, message_2, fn -> 457 | Parser.parse!(%{ 458 | "type" => "array", 459 | "items" => "int", 460 | "default" => 1 461 | }) 462 | end 463 | end 464 | end 465 | 466 | describe "fixed" do 467 | test "can parse basic fixed" do 468 | assert %Schema{schema: schema, context: context} = 469 | Parser.parse!(%{ 470 | "name" => "double", 471 | "namespace" => "one.two.three", 472 | "doc" => "two numbers", 473 | "aliases" => ["dos_nums"], 474 | "type" => "fixed", 475 | "size" => 2, 476 | "extra" => "val" 477 | }) 478 | 479 | assert schema == %Fixed{ 480 | name: "double", 481 | namespace: "one.two.three", 482 | size: 2, 483 | doc: "two numbers", 484 | aliases: ["dos_nums"], 485 | metadata: %{"extra" => "val"} 486 | } 487 | 488 | assert context == %Context{ 489 | names: %{ 490 | "one.two.three.double" => schema, 491 | "one.two.three.dos_nums" => schema 492 | } 493 | } 494 | end 495 | 496 | test "must include size" do 497 | message_1 = 498 | "Schema missing required key `size` for AvroEx.Schema.Fixed in %{\"name\" => \"missing_size\", \"type\" => \"fixed\"}" 499 | 500 | assert_raise AvroEx.Schema.DecodeError, message_1, fn -> 501 | Parser.parse!(%{ 502 | "type" => "fixed", 503 | "name" => "missing_size" 504 | }) 505 | end 506 | 507 | message_2 = 508 | "Expected `size` to be integer got \"40\" in %{\"name\" => \"string_size\", \"size\" => \"40\", \"type\" => \"fixed\"}" 509 | 510 | assert_raise AvroEx.Schema.DecodeError, message_2, fn -> 511 | Parser.parse!(%{ 512 | "type" => "fixed", 513 | "name" => "string_size", 514 | "size" => "40" 515 | }) 516 | end 517 | end 518 | 519 | test "must have a valid name" do 520 | message = "Invalid name `1bad` for `name` in %{\"name\" => \"1bad\", \"size\" => 2, \"type\" => \"fixed\"}" 521 | 522 | assert_raise AvroEx.Schema.DecodeError, message, fn -> 523 | Parser.parse!(%{ 524 | "type" => "fixed", 525 | "name" => "1bad", 526 | "size" => 2 527 | }) 528 | end 529 | end 530 | 531 | test "must have a valid namespace" do 532 | message = 533 | "Invalid name `namespace..` for `namespace` in %{\"name\" => \"bad_namespace\", \"namespace\" => \"namespace..\", \"size\" => 2, \"type\" => \"fixed\"}" 534 | 535 | assert_raise AvroEx.Schema.DecodeError, message, fn -> 536 | Parser.parse!(%{ 537 | "type" => "fixed", 538 | "name" => "bad_namespace", 539 | "namespace" => "namespace..", 540 | "size" => 2 541 | }) 542 | 543 | message = 544 | "Invalid name `namespace.` for `namespace` in %{\"name\" => \"bad_namespace\", \"namespace\" => \"namespace..\", \"size\" => 2, \"type\" => \"fixed\"}" 545 | 546 | assert_raise AvroEx.Schema.DecodeError, message, fn -> 547 | Parser.parse!(%{ 548 | "type" => "fixed", 549 | "name" => "bad_namespace", 550 | "namespace" => "namespace.", 551 | "size" => 2 552 | }) 553 | end 554 | end 555 | end 556 | end 557 | 558 | describe "maps" do 559 | test "can parse simple maps" do 560 | assert %Schema{schema: schema, context: context} = 561 | Parser.parse!(%{ 562 | "type" => "map", 563 | "values" => "string", 564 | "default" => %{"a" => "b"} 565 | }) 566 | 567 | assert schema == %AvroMap{ 568 | values: %Primitive{type: :string}, 569 | default: %{"a" => "b"} 570 | } 571 | 572 | assert context == %Context{} 573 | end 574 | 575 | test "default must be encodeable" do 576 | message_1 = "Invalid default in Map Schema Mismatch: Expected value of string, got 1" 577 | 578 | assert_raise AvroEx.Schema.DecodeError, message_1, fn -> 579 | Parser.parse!(%{ 580 | "type" => "map", 581 | "values" => "string", 582 | "default" => %{"a" => 1} 583 | }) 584 | end 585 | 586 | message_2 = "Invalid default in Map Schema Mismatch: Expected value of Map, got []" 587 | 588 | assert_raise AvroEx.Schema.DecodeError, message_2, fn -> 589 | Parser.parse!(%{ 590 | "type" => "map", 591 | "values" => "string", 592 | "default" => [] 593 | }) 594 | end 595 | end 596 | 597 | test "values must be a valid type" do 598 | message = "Found undeclared reference `nope`. Known references are empty" 599 | 600 | assert_raise AvroEx.Schema.DecodeError, message, fn -> 601 | Parser.parse!(%{ 602 | "type" => "map", 603 | "values" => "nope" 604 | }) 605 | end 606 | end 607 | end 608 | 609 | describe "name references" do 610 | test "types can be referred to by an previously defined type" do 611 | assert %Schema{schema: schema, context: context} = 612 | Parser.parse!(%{ 613 | "type" => "record", 614 | "name" => "pets", 615 | "fields" => [ 616 | %{ 617 | "name" => "favorite_pet", 618 | "type" => %{ 619 | "type" => "record", 620 | "name" => "Pet", 621 | "fields" => [ 622 | %{ 623 | "name" => "type", 624 | "type" => %{"type" => "enum", "name" => "PetType", "symbols" => ["cat", "dog"]} 625 | }, 626 | %{"name" => "name", "type" => "string"} 627 | ] 628 | } 629 | }, 630 | %{"name" => "first_pet", "type" => "Pet"} 631 | ] 632 | }) 633 | 634 | assert %Record{ 635 | name: "pets", 636 | fields: [ 637 | %Record.Field{ 638 | name: "favorite_pet", 639 | type: %Record{ 640 | name: "Pet", 641 | fields: [ 642 | %Record.Field{ 643 | name: "type", 644 | type: %AvroEnum{name: "PetType", symbols: ["cat", "dog"]} 645 | }, 646 | %Record.Field{name: "name", type: %Primitive{type: :string}} 647 | ] 648 | } 649 | }, 650 | %Record.Field{name: "first_pet", type: %Reference{type: "Pet"}} 651 | ] 652 | } = schema 653 | 654 | assert Map.keys(context.names) == ["Pet", "PetType", "pets"] 655 | end 656 | 657 | test "types can be referred by an alias" do 658 | assert %Schema{schema: schema, context: context} = 659 | Parser.parse!(%{ 660 | "type" => "record", 661 | "name" => "top", 662 | "fields" => [ 663 | %{ 664 | "name" => "one", 665 | "type" => %{"type" => "enum", "symbols" => ["x"], "name" => "a", "aliases" => ["b", "c"]} 666 | }, 667 | %{"name" => "two", "type" => "a"}, 668 | %{"name" => "three", "type" => "b"}, 669 | %{"name" => "four", "type" => "c"} 670 | ] 671 | }) 672 | 673 | assert schema == %Record{ 674 | fields: [ 675 | %AvroEx.Schema.Record.Field{ 676 | name: "one", 677 | type: %AvroEx.Schema.Enum{ 678 | aliases: ["b", "c"], 679 | name: "a", 680 | symbols: ["x"] 681 | } 682 | }, 683 | %AvroEx.Schema.Record.Field{ 684 | name: "two", 685 | type: %AvroEx.Schema.Reference{type: "a"} 686 | }, 687 | %AvroEx.Schema.Record.Field{ 688 | name: "three", 689 | type: %AvroEx.Schema.Reference{type: "b"} 690 | }, 691 | %AvroEx.Schema.Record.Field{ 692 | name: "four", 693 | type: %AvroEx.Schema.Reference{type: "c"} 694 | } 695 | ], 696 | name: "top" 697 | } 698 | 699 | assert Map.keys(context.names) == ["a", "b", "c", "top"] 700 | end 701 | 702 | test "can create recursive types" do 703 | assert %Schema{schema: schema} = 704 | Parser.parse!(%{ 705 | "type" => "record", 706 | "name" => "recursive", 707 | "fields" => [ 708 | %{"name" => "nested", "type" => ["null", "recursive"]} 709 | ] 710 | }) 711 | 712 | assert schema == %Record{ 713 | name: "recursive", 714 | fields: [ 715 | %Record.Field{ 716 | name: "nested", 717 | type: %Union{possibilities: [%Primitive{type: :null}, %Reference{type: "recursive"}]} 718 | } 719 | ] 720 | } 721 | end 722 | 723 | test "aliases must be valid" do 724 | message_1 = 725 | "Invalid name `` for `aliases` in %{\"aliases\" => \"\", \"fields\" => [%{\"name\" => \"one\", \"type\" => \"string\"}], \"name\" => \"invalid_aliases\", \"type\" => \"record\"}" 726 | 727 | assert_raise AvroEx.Schema.DecodeError, message_1, fn -> 728 | Parser.parse!(%{ 729 | "type" => "record", 730 | "name" => "invalid_aliases", 731 | "aliases" => "", 732 | "fields" => [%{"name" => "one", "type" => "string"}] 733 | }) 734 | end 735 | 736 | message_2 = 737 | "Invalid name `bad name` for `aliases` in %{\"aliases\" => [\"bad name\"], \"fields\" => [%{\"name\" => \"one\", \"type\" => \"string\"}], \"name\" => \"invalid_aliases\", \"type\" => \"record\"}" 738 | 739 | assert_raise AvroEx.Schema.DecodeError, message_2, fn -> 740 | Parser.parse!(%{ 741 | "type" => "record", 742 | "name" => "invalid_aliases", 743 | "aliases" => ["bad name"], 744 | "fields" => [%{"name" => "one", "type" => "string"}] 745 | }) 746 | end 747 | end 748 | 749 | test "must refer to types previously defined" do 750 | message = "Found undeclared reference `callback`. Known references are `invalid_ref`" 751 | 752 | assert_raise AvroEx.Schema.DecodeError, message, fn -> 753 | Parser.parse!(%{ 754 | "type" => "record", 755 | "name" => "invalid_ref", 756 | "fields" => [ 757 | %{"name" => "one", "type" => "callback"}, 758 | %{"name" => "two", "type" => %{"name" => "callback", "type" => "fixed", "size" => 2}} 759 | ] 760 | }) 761 | end 762 | end 763 | 764 | test "namespaces are inherited" do 765 | assert %Schema{schema: schema} = 766 | Parser.parse!(%{ 767 | "type" => "record", 768 | "name" => "inferred_reference", 769 | "namespace" => "beam.community", 770 | "fields" => [ 771 | %{"name" => "one", "type" => %{"name" => "callback", "type" => "fixed", "size" => 2}}, 772 | %{"name" => "two", "type" => "callback"} 773 | ] 774 | }) 775 | 776 | assert schema == %Record{ 777 | name: "inferred_reference", 778 | namespace: "beam.community", 779 | fields: [ 780 | %Record.Field{name: "one", type: %Fixed{name: "callback", size: 2}}, 781 | %Record.Field{name: "two", type: %Reference{type: "beam.community.callback"}} 782 | ] 783 | } 784 | 785 | assert %Schema{schema: schema} = 786 | Parser.parse!(%{ 787 | "type" => "record", 788 | "name" => "qualified_reference", 789 | "namespace" => "beam.community", 790 | "fields" => [ 791 | %{"name" => "one", "type" => %{"name" => "callback", "type" => "fixed", "size" => 2}}, 792 | %{"name" => "two", "type" => "beam.community.callback"} 793 | ] 794 | }) 795 | 796 | assert schema == %Record{ 797 | name: "qualified_reference", 798 | namespace: "beam.community", 799 | fields: [ 800 | %Record.Field{name: "one", type: %Fixed{name: "callback", size: 2}}, 801 | %Record.Field{name: "two", type: %Reference{type: "beam.community.callback"}} 802 | ] 803 | } 804 | 805 | assert %Schema{schema: schema} = 806 | Parser.parse!(%{ 807 | "type" => "record", 808 | "name" => "aliased_reference", 809 | "namespace" => "beam.community", 810 | "fields" => [ 811 | %{ 812 | "name" => "one", 813 | "type" => %{"name" => "callback", "aliases" => ["alias"], "type" => "fixed", "size" => 2} 814 | }, 815 | %{"name" => "two", "type" => "beam.community.alias"} 816 | ] 817 | }) 818 | 819 | assert schema == %Record{ 820 | name: "aliased_reference", 821 | namespace: "beam.community", 822 | fields: [ 823 | %Record.Field{name: "one", type: %Fixed{name: "callback", size: 2, aliases: ["alias"]}}, 824 | %Record.Field{name: "two", type: %Reference{type: "beam.community.alias"}} 825 | ] 826 | } 827 | 828 | assert %Schema{schema: schema} = 829 | Parser.parse!(%{ 830 | "type" => "record", 831 | "name" => "beam.community.from_name", 832 | "namespace" => "ignore", 833 | "fields" => [ 834 | %{ 835 | "name" => "one", 836 | "type" => %{"name" => "callback", "aliases" => ["alias"], "type" => "fixed", "size" => 2} 837 | }, 838 | %{"name" => "two", "type" => "beam.community.alias"} 839 | ] 840 | }) 841 | 842 | assert schema == %Record{ 843 | name: "beam.community.from_name", 844 | namespace: "ignore", 845 | fields: [ 846 | %Record.Field{name: "one", type: %Fixed{name: "callback", size: 2, aliases: ["alias"]}}, 847 | %Record.Field{name: "two", type: %Reference{type: "beam.community.alias"}} 848 | ] 849 | } 850 | end 851 | end 852 | 853 | describe "strict parsing" do 854 | test "logicalType on a field will raise" do 855 | message = 856 | "Unrecognized schema key `logicalType` for AvroEx.Schema.Record.Field in %{\"logicalType\" => \"timestamp-millis\", \"name\" => \"timestamp\", \"type\" => \"long\"}" 857 | 858 | assert_raise AvroEx.Schema.DecodeError, message, fn -> 859 | Parser.parse!( 860 | %{ 861 | "type" => "record", 862 | "name" => "analytics", 863 | "fields" => [ 864 | %{"name" => "timestamp", "type" => "long", "logicalType" => "timestamp-millis"} 865 | ] 866 | }, 867 | strict: true 868 | ) 869 | end 870 | end 871 | 872 | test "extra fields on enum will raise" do 873 | message = 874 | "Unrecognized schema key `extra` for AvroEx.Schema.Enum in %{\"extra\" => \"value\", \"name\" => \"extra_enum\", \"symbols\" => [\"one\", \"two\"], \"type\" => \"enum\"}" 875 | 876 | assert_raise AvroEx.Schema.DecodeError, message, fn -> 877 | Parser.parse!( 878 | %{ 879 | "type" => "enum", 880 | "name" => "extra_enum", 881 | "symbols" => ["one", "two"], 882 | "extra" => "value" 883 | }, 884 | strict: true 885 | ) 886 | end 887 | end 888 | 889 | test "extra fields on fixed will raise" do 890 | message = 891 | "Unrecognized schema key `extra` for AvroEx.Schema.Fixed in %{\"extra\" => \"value\", \"name\" => \"double\", \"size\" => 2, \"type\" => \"fixed\"}" 892 | 893 | assert_raise AvroEx.Schema.DecodeError, message, fn -> 894 | Parser.parse!( 895 | %{ 896 | "type" => "fixed", 897 | "size" => 2, 898 | "name" => "double", 899 | "extra" => "value" 900 | }, 901 | strict: true 902 | ) 903 | end 904 | end 905 | end 906 | end 907 | -------------------------------------------------------------------------------- /test/schema_test.exs: -------------------------------------------------------------------------------- 1 | defmodule AvroEx.Schema.Test do 2 | use ExUnit.Case, async: true 3 | 4 | require __MODULE__.Macros 5 | import __MODULE__.Macros 6 | 7 | alias AvroEx.Schema 8 | alias AvroEx.Schema.Enum, as: AvroEnum 9 | alias AvroEx.Schema.Map, as: AvroMap 10 | alias AvroEx.Schema.Record.Field 11 | alias AvroEx.Schema.{Array, Context, Fixed, Primitive, Record, Reference, Union} 12 | 13 | doctest AvroEx.Schema, import: true 14 | 15 | @test_module AvroEx.Schema 16 | 17 | @spec json_add_property(binary | map, atom | binary, any) :: map | binary 18 | def json_add_property(str, property, value) when is_binary(str) do 19 | str 20 | |> Jason.decode!() 21 | |> json_add_property(property, value) 22 | |> Jason.encode!() 23 | end 24 | 25 | def json_add_property(json, property, value) when is_map(json) and is_atom(property) do 26 | json_add_property(json, Atom.to_string(property), value) 27 | end 28 | 29 | def json_add_property(json, property, value) when is_map(json) and is_binary(property) do 30 | Map.update(json, property, value, fn _ -> value end) 31 | end 32 | 33 | @json ~S""" 34 | { 35 | "type": "record", 36 | "name": "MyRecord", 37 | "namespace": "me.cjpoll.avro_ex", 38 | "aliases": ["OldRecord", "SomeRecord"], 39 | "doc": "A record for testing", 40 | "fields": [ 41 | { 42 | "type": "long", 43 | "name": "field3", 44 | "doc": "some field", 45 | "aliases": ["field1", "field2"] 46 | }, 47 | { 48 | "type": { 49 | "type": "record", 50 | "name": "ChildRecord", 51 | "aliases": ["InnerRecord"], 52 | "fields": [] 53 | }, 54 | "name": "field6", 55 | "doc": "some field", 56 | "aliases": ["field4", "field5"] 57 | } 58 | ] 59 | } 60 | """ 61 | 62 | describe "parse record" do 63 | @schema AvroEx.Schema.Record 64 | 65 | test "works" do 66 | child_record = %@schema{ 67 | name: "ChildRecord", 68 | aliases: ["InnerRecord"], 69 | fields: [] 70 | } 71 | 72 | parent = %@schema{ 73 | aliases: ["OldRecord", "SomeRecord"], 74 | doc: "A record for testing", 75 | name: "MyRecord", 76 | namespace: "me.cjpoll.avro_ex", 77 | fields: [ 78 | %Field{ 79 | type: %Primitive{ 80 | type: :long, 81 | metadata: %{} 82 | }, 83 | name: "field3", 84 | doc: "some field", 85 | aliases: ["field1", "field2"] 86 | }, 87 | %Field{ 88 | type: child_record, 89 | name: "field6", 90 | doc: "some field", 91 | aliases: ["field4", "field5"] 92 | } 93 | ] 94 | } 95 | 96 | context = %Context{ 97 | names: %{ 98 | "me.cjpoll.avro_ex.OldRecord" => parent, 99 | "me.cjpoll.avro_ex.SomeRecord" => parent, 100 | "me.cjpoll.avro_ex.MyRecord" => parent, 101 | "me.cjpoll.avro_ex.ChildRecord" => child_record, 102 | "me.cjpoll.avro_ex.InnerRecord" => child_record 103 | } 104 | } 105 | 106 | {:ok, %@test_module{} = schema} = AvroEx.decode_schema(@json) 107 | 108 | assert parent == schema.schema 109 | assert context == schema.context 110 | end 111 | 112 | handles_metadata() 113 | end 114 | 115 | describe "parse union" do 116 | test "primitives" do 117 | assert {:ok, 118 | %AvroEx.Schema{ 119 | schema: %Union{ 120 | possibilities: [ 121 | %Primitive{type: :null}, 122 | %Primitive{type: :int} 123 | ] 124 | } 125 | }} = AvroEx.decode_schema(~S(["null", "int"])) 126 | end 127 | 128 | test "record in union" do 129 | child_record = %Record{ 130 | name: "ChildRecord", 131 | aliases: ["InnerRecord"] 132 | } 133 | 134 | parent = %Record{ 135 | aliases: ["OldRecord", "SomeRecord"], 136 | doc: "A record for testing", 137 | name: "MyRecord", 138 | namespace: "me.cjpoll.avro_ex", 139 | fields: [ 140 | %Field{ 141 | type: %Primitive{ 142 | type: :long, 143 | metadata: %{} 144 | }, 145 | name: "field3", 146 | doc: "some field", 147 | aliases: ["field1", "field2"] 148 | }, 149 | %Field{ 150 | type: child_record, 151 | name: "field6", 152 | doc: "some field", 153 | aliases: ["field4", "field5"] 154 | } 155 | ] 156 | } 157 | 158 | context = %Context{ 159 | names: %{ 160 | "me.cjpoll.avro_ex.OldRecord" => parent, 161 | "me.cjpoll.avro_ex.SomeRecord" => parent, 162 | "me.cjpoll.avro_ex.MyRecord" => parent, 163 | "me.cjpoll.avro_ex.ChildRecord" => child_record, 164 | "me.cjpoll.avro_ex.InnerRecord" => child_record 165 | } 166 | } 167 | 168 | {:ok, %AvroEx.Schema{} = schema} = AvroEx.decode_schema(~s(["null", #{@json}])) 169 | 170 | assert ^context = schema.context 171 | 172 | assert %Union{ 173 | possibilities: [ 174 | %Primitive{type: :null}, 175 | ^parent 176 | ] 177 | } = schema.schema 178 | end 179 | 180 | test "union in record" do 181 | schema = ~S""" 182 | {"type": "record", "name": "arecord", "fields": [ 183 | {"type": ["null", "int"], "name": "a"} 184 | ]} 185 | """ 186 | 187 | assert {:ok, 188 | %AvroEx.Schema{ 189 | schema: %Record{ 190 | fields: [ 191 | %Field{ 192 | name: "a", 193 | type: %Union{ 194 | possibilities: [ 195 | %Primitive{type: :null}, 196 | %Primitive{type: :int} 197 | ] 198 | } 199 | } 200 | ] 201 | } 202 | }} = AvroEx.decode_schema(schema) 203 | end 204 | end 205 | 206 | describe "parse map" do 207 | @json ~S({"type": "map", "values": "int"}) 208 | @schema AvroMap 209 | 210 | handles_metadata() 211 | 212 | test "doesn't blow up" do 213 | assert {:ok, 214 | %@test_module{ 215 | schema: %@schema{} 216 | }} = AvroEx.decode_schema(@json) 217 | end 218 | 219 | test "matches the given type" do 220 | assert {:ok, %@test_module{schema: %@schema{values: %Primitive{type: :int}}}} = AvroEx.decode_schema(@json) 221 | end 222 | 223 | test "works with a union" do 224 | assert {:ok, 225 | %@test_module{ 226 | schema: %@schema{ 227 | values: %Union{ 228 | possibilities: [ 229 | %Primitive{type: :null}, 230 | %Primitive{type: :int} 231 | ] 232 | } 233 | } 234 | }} = 235 | @json 236 | |> json_add_property(:values, ["null", "int"]) 237 | |> AvroEx.decode_schema() 238 | end 239 | end 240 | 241 | describe "parse array" do 242 | @json ~S({"type": "array", "items": "int"}) 243 | @schema Array 244 | test "doesn't blow up" do 245 | assert {:ok, %@test_module{schema: %@schema{}}} = AvroEx.decode_schema(@json) 246 | end 247 | 248 | handles_metadata() 249 | end 250 | 251 | describe "encodable? (primitive)" do 252 | @values %{ 253 | "null" => nil, 254 | "boolean" => false, 255 | "int" => 1, 256 | "long" => 1, 257 | "float" => 1.0, 258 | "double" => 1.0, 259 | "bytes" => "12345", 260 | "string" => "12345" 261 | } 262 | 263 | for a <- @values, 264 | b <- @values do 265 | test "#{inspect(a)} vs #{inspect(b)}" do 266 | {ka, va} = unquote(a) 267 | {_kb, vb} = unquote(b) 268 | {:ok, schema} = AvroEx.decode_schema(~s(#{inspect(ka)})) 269 | 270 | assert @test_module.encodable?(schema, va) 271 | assert @test_module.encodable?(schema, vb) == (va === vb) 272 | end 273 | end 274 | 275 | test "accepts atoms as strings" do 276 | {:ok, schema} = AvroEx.decode_schema(~S("string")) 277 | assert @test_module.encodable?(schema, :dave) 278 | refute @test_module.encodable?(schema, nil) 279 | end 280 | 281 | test "does not accept non-utf8 strings as string" do 282 | {:ok, schema} = AvroEx.decode_schema(~S("string")) 283 | refute @test_module.encodable?(schema, <<128>>) 284 | end 285 | 286 | test "does accept non-utf8 binaries as bytes" do 287 | {:ok, schema} = AvroEx.decode_schema(~S("bytes")) 288 | assert @test_module.encodable?(schema, <<128>>) 289 | end 290 | 291 | test "does not accept non-binary bitstrings as string" do 292 | {:ok, schema} = AvroEx.decode_schema(~S("string")) 293 | refute @test_module.encodable?(schema, <<0::7>>) 294 | end 295 | 296 | test "does not accept non-binary bitstrings as bytes" do 297 | {:ok, schema} = AvroEx.decode_schema(~S("bytes")) 298 | refute @test_module.encodable?(schema, <<0::7>>) 299 | end 300 | end 301 | 302 | describe "parse (enum)" do 303 | test "doesn't blow up" do 304 | assert {:ok, _enum_schema} = 305 | AvroEx.decode_schema( 306 | ~S({"type": "enum", "name": "Suit", "symbols": ["heart", "spade", "diamond", "club"]}) 307 | ) 308 | end 309 | 310 | test "returns an Enum struct" do 311 | assert {:ok, %Schema{schema: %AvroEnum{}}} = 312 | AvroEx.decode_schema( 313 | ~S({"type": "enum", "name": "Suit", "symbols": ["heart", "spade", "diamond", "club"]}) 314 | ) 315 | end 316 | 317 | test "fails if the symbols aren't all strings" do 318 | assert {:ok, %Schema{schema: %AvroEnum{}}} = 319 | AvroEx.decode_schema( 320 | ~S({"type": "enum", "name": "Suit", "symbols": ["heart", "spade", "diamond", "club"]}) 321 | ) 322 | end 323 | end 324 | 325 | describe "encodable? (record: non-nested)" do 326 | setup do 327 | schema = """ 328 | { 329 | "type": "record", 330 | "name": "Person", 331 | "fields": [ 332 | {"name": "first_name", "type": "string"}, 333 | {"name": "age", "type": "int"} 334 | ] 335 | } 336 | """ 337 | 338 | {:ok, parsed_schema} = AvroEx.decode_schema(schema) 339 | {:ok, %{schema: parsed_schema}} 340 | end 341 | 342 | test "can be encoded with a map", %{schema: schema} do 343 | assert @test_module.encodable?(schema, %{"first_name" => "Cody", "age" => 30}) 344 | end 345 | 346 | test "can not be encoded with a proplist", %{schema: schema} do 347 | refute @test_module.encodable?(schema, [{"first_name", "Cody"}, {"age", 30}]) 348 | end 349 | 350 | test "checks that the value typings match", %{schema: schema} do 351 | refute @test_module.encodable?(schema, %{"first_name" => "Cody", "age" => "Cody"}) 352 | refute @test_module.encodable?(schema, %{"first_name" => 30, "age" => 30}) 353 | refute @test_module.encodable?(schema, %{"first_name" => "Cody", "ages" => 30}) 354 | end 355 | 356 | test "records can have keys as atoms", %{schema: schema} do 357 | assert @test_module.encodable?(schema, %{first_name: "Dave", age: 32}) 358 | end 359 | end 360 | 361 | describe "encodable? (record: nested)" do 362 | setup do 363 | schema = """ 364 | { 365 | "type": "record", 366 | "name": "Person", 367 | "fields": [ 368 | {"name": "first_name", "type": "string"}, 369 | {"name": "age", "type": "int"}, 370 | { 371 | "name": "thing", 372 | "type":{ 373 | "type": "record", 374 | "name": "Thing", 375 | "fields": [ 376 | {"name": "some_field", "type": "null"} 377 | ] 378 | } 379 | } 380 | ] 381 | } 382 | """ 383 | 384 | {:ok, parsed_schema} = AvroEx.decode_schema(schema) 385 | {:ok, %{schema: parsed_schema}} 386 | end 387 | 388 | test "works as expected", %{schema: schema} do 389 | data = %{"first_name" => "Cody", "age" => 30, "thing" => %{"some_field" => nil}} 390 | 391 | assert @test_module.encodable?(schema, data) 392 | refute @test_module.encodable?(schema, %{"first_name" => "Cody", "age" => 30}) 393 | end 394 | 395 | test "checks typing on child records", %{schema: schema} do 396 | data = %{"first_name" => "Cody", "age" => 30, "thing" => %{"some_field" => 1}} 397 | refute @test_module.encodable?(schema, data) 398 | end 399 | end 400 | 401 | describe "encodable? (record: named)" do 402 | setup do 403 | schema = """ 404 | { 405 | "type": "record", 406 | "name": "LinkedList", 407 | "fields": [ 408 | {"name": "value", "type": "int"}, 409 | {"name": "next", "type": ["null", "LinkedList"]} 410 | ] 411 | } 412 | """ 413 | 414 | {:ok, parsed_schema} = AvroEx.decode_schema(schema) 415 | {:ok, %{schema: parsed_schema}} 416 | end 417 | 418 | test "works with a named type", %{schema: schema} do 419 | assert @test_module.encodable?(schema, %{ 420 | "value" => 1, 421 | "next" => %{"value" => 2, "next" => nil} 422 | }) 423 | end 424 | end 425 | 426 | describe "encodable? (union)" do 427 | test "works as expected" do 428 | {:ok, schema} = AvroEx.decode_schema(~S(["null", "string", "int"])) 429 | 430 | assert @test_module.encodable?(schema, nil) 431 | assert @test_module.encodable?(schema, "hello") 432 | assert @test_module.encodable?(schema, 25) 433 | 434 | refute @test_module.encodable?(schema, 25.1) 435 | refute @test_module.encodable?(schema, true) 436 | refute @test_module.encodable?(schema, %{"Hello" => "world"}) 437 | end 438 | 439 | test "works with logical types" do 440 | {:ok, schema} = AvroEx.decode_schema(~S(["null", {"type": "long", "logicalType":"timestamp-millis"}])) 441 | 442 | assert @test_module.encodable?(schema, nil) 443 | assert @test_module.encodable?(schema, DateTime.utc_now()) 444 | assert @test_module.encodable?(schema, 1_525_658_987) 445 | 446 | refute @test_module.encodable?(schema, 1.5) 447 | refute @test_module.encodable?(schema, "AvroEx") 448 | refute @test_module.encodable?(schema, Time.utc_now()) 449 | 450 | assert schema = AvroEx.decode_schema!(%{"type" => "int", "logicalType" => "date"}) 451 | assert @test_module.encodable?(schema, Date.utc_today()) 452 | end 453 | end 454 | 455 | describe "encodable? (map)" do 456 | test "works as expected" do 457 | {:ok, schema} = AvroEx.decode_schema(~S({"type": "map", "values": "int"})) 458 | assert @test_module.encodable?(schema, %{"value" => 1, "value2" => 2, "value3" => 3}) 459 | end 460 | 461 | test "fails if key is not a string" do 462 | {:ok, schema} = AvroEx.decode_schema(~S({"type": "map", "values": "int"})) 463 | refute @test_module.encodable?(schema, %{1 => 1}) 464 | end 465 | 466 | test "fails if value does not match type" do 467 | {:ok, schema} = AvroEx.decode_schema(~S({"type": "map", "values": "int"})) 468 | refute @test_module.encodable?(schema, %{"value" => 1.1}) 469 | end 470 | 471 | test "fails if one value does not match type" do 472 | {:ok, schema} = AvroEx.decode_schema(~S({"type": "map", "values": "int"})) 473 | refute @test_module.encodable?(schema, %{"value" => 11, "value2" => 12, "value3" => 1.1}) 474 | end 475 | 476 | test "works with a union" do 477 | {:ok, schema} = AvroEx.decode_schema(~S({"type": "map", "values": ["null", "int"]})) 478 | assert @test_module.encodable?(schema, %{"value" => 1, "value2" => 2, "value3" => nil}) 479 | refute @test_module.encodable?(schema, %{"value" => 1, "value2" => 2.1, "value3" => nil}) 480 | end 481 | 482 | test "works with an empty map" do 483 | {:ok, schema} = AvroEx.decode_schema(~S({"type": "map", "values": "int"})) 484 | assert @test_module.encodable?(schema, %{}) 485 | end 486 | 487 | test "maps can have atom keys" do 488 | {:ok, schema} = AvroEx.decode_schema(~S({"type": "map", "values": "int"})) 489 | assert @test_module.encodable?(schema, %{a: 1, b: 2}) 490 | end 491 | end 492 | 493 | describe "encodable? (array)" do 494 | test "works as expected" do 495 | {:ok, schema} = AvroEx.decode_schema(~S({"type": "array", "items": "int"})) 496 | assert @test_module.encodable?(schema, [1, 2, 3, 4, 5]) 497 | end 498 | 499 | test "fails if item does not match type" do 500 | {:ok, schema} = AvroEx.decode_schema(~S({"type": "array", "items": "int"})) 501 | refute @test_module.encodable?(schema, [1.1]) 502 | end 503 | 504 | test "fails if one item does not match type" do 505 | {:ok, schema} = AvroEx.decode_schema(~S({"type": "array", "items": "int"})) 506 | refute @test_module.encodable?(schema, [1, 2, 3, 4.5, 6]) 507 | end 508 | 509 | test "works with a union" do 510 | {:ok, schema} = AvroEx.decode_schema(~S({"type": "array", "items": ["null", "int"]})) 511 | assert @test_module.encodable?(schema, [1, 2, nil, 3, 4, nil, 5]) 512 | assert @test_module.encodable?(schema, [nil, 2, nil, 3, 4, nil, 5]) 513 | refute @test_module.encodable?(schema, [1, 2.1, nil]) 514 | end 515 | 516 | test "works with an empty array" do 517 | {:ok, schema} = AvroEx.decode_schema(~S({"type": "array", "items": "int"})) 518 | assert @test_module.encodable?(schema, []) 519 | end 520 | end 521 | 522 | describe "encodable? (fixed)" do 523 | test "works as expected" do 524 | {:ok, schema} = AvroEx.decode_schema(~S({"type": "fixed", "name": "SHA", "size": 40})) 525 | assert @test_module.encodable?(schema, "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa") 526 | end 527 | 528 | test "fails if size is too small" do 529 | {:ok, schema} = AvroEx.decode_schema(~S({"type": "fixed", "size": 40, "name": "SHA"})) 530 | refute @test_module.encodable?(schema, "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa") 531 | end 532 | 533 | test "fails if size is too large" do 534 | {:ok, schema} = AvroEx.decode_schema(~S({"type": "fixed", "size": 40, "name": "SHA"})) 535 | refute @test_module.encodable?(schema, "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa") 536 | end 537 | end 538 | 539 | describe "encodable? (enum)" do 540 | test "works as expected" do 541 | {:ok, schema} = 542 | AvroEx.decode_schema(~S({"type": "enum", "name": "Suit", "symbols": ["heart", "spade", "diamond", "club"]})) 543 | 544 | assert @test_module.encodable?(schema, "heart") 545 | end 546 | 547 | test "fails if string is not in symbols" do 548 | {:ok, schema} = 549 | AvroEx.decode_schema(~S({"type": "enum", "name": "Suit", "symbols": ["heart", "spade", "diamond", "club"]})) 550 | 551 | refute @test_module.encodable?(schema, "kkjasdfkasdfj") 552 | end 553 | 554 | test "enums can have atoms" do 555 | {:ok, schema} = 556 | AvroEx.decode_schema(~S({"type": "enum", "name": "Suit", "symbols": ["heart", "spade", "diamond", "club"]})) 557 | 558 | assert @test_module.encodable?(schema, :heart) 559 | end 560 | end 561 | end 562 | -------------------------------------------------------------------------------- /test/support/encode_macros.ex: -------------------------------------------------------------------------------- 1 | defmodule AvroEx.Encode.Test.Macros do 2 | defmacro assert_result(m, f, a, result) do 3 | quote do 4 | test "#{unquote(m)}.#{unquote(f)} - #{unquote(:erlang.unique_integer())}" do 5 | assert apply(unquote(m), unquote(f), unquote(a)) == unquote(result) 6 | end 7 | end 8 | end 9 | end 10 | -------------------------------------------------------------------------------- /test/support/schema_macros.ex: -------------------------------------------------------------------------------- 1 | defmodule AvroEx.Schema.Test.Macros do 2 | defmacro handles_metadata do 3 | quote do 4 | test "has a default empty metadata" do 5 | assert {:ok, %@test_module{schema: %@schema{metadata: %{}}}} = AvroEx.decode_schema(@json) 6 | end 7 | 8 | test "includes extra metadata if given" do 9 | assert {:ok, %@test_module{schema: %@schema{metadata: %{"meta_prop" => "abc"}}}} = 10 | @json 11 | |> json_add_property("meta_prop", "abc") 12 | |> AvroEx.decode_schema() 13 | end 14 | end 15 | end 16 | 17 | defmacro parse_primitive(passed_in_type, primitive_type) do 18 | quote do 19 | test "simple #{unquote(passed_in_type)}" do 20 | assert {:ok, 21 | %@test_module{ 22 | schema: %AvroEx.Schema.Primitive{type: unquote(primitive_type)}, 23 | context: %AvroEx.Schema.Context{names: %{}} 24 | }} = AvroEx.decode_schema(~s("#{unquote(passed_in_type)}")) 25 | end 26 | 27 | test "complex #{unquote(passed_in_type)}" do 28 | assert {:ok, 29 | %@test_module{ 30 | schema: %AvroEx.Schema.Primitive{type: unquote(primitive_type)}, 31 | context: %AvroEx.Schema.Context{names: %{}} 32 | }} = AvroEx.decode_schema(~s({"type": "#{unquote(passed_in_type)}"})) 33 | end 34 | 35 | test "complex #{unquote(passed_in_type)} with metadata" do 36 | assert {:ok, 37 | %@test_module{ 38 | schema: %AvroEx.Schema.Primitive{ 39 | type: unquote(primitive_type), 40 | metadata: %{"some" => "metadata"} 41 | }, 42 | context: %AvroEx.Schema.Context{names: %{}} 43 | }} = AvroEx.decode_schema(~s({"type": "#{unquote(passed_in_type)}", "some": "metadata"})) 44 | end 45 | end 46 | end 47 | end 48 | -------------------------------------------------------------------------------- /test/test_helper.exs: -------------------------------------------------------------------------------- 1 | ExUnit.start() 2 | --------------------------------------------------------------------------------