├── .credo.exs
├── .formatter.exs
├── .gitignore
├── .semaphore
    └── semaphore.yml
├── .tool-versions
├── LICENSE
├── README.md
├── config
    ├── config.exs
    └── test.exs
├── docs
    └── todo.md
├── lib
    ├── adapters
    │   ├── csv.ex
    │   ├── file_manager
    │   │   └── file_manager.ex
    │   └── identity.ex
    ├── data_quacker.ex
    ├── data_quacker
    │   ├── adapter.ex
    │   ├── builder.ex
    │   ├── context.ex
    │   ├── matcher.ex
    │   ├── skipper.ex
    │   ├── sourcer.ex
    │   ├── transformer.ex
    │   └── validator.ex
    └── schema
    │   ├── error.ex
    │   ├── helpers
    │       ├── fun_wrapper.ex
    │       └── wrapped_fun.ex
    │   ├── schema.ex
    │   └── state.ex
├── mix.exs
├── mix.lock
├── priv
    └── plts
    │   └── .gitignore
└── test
    ├── adapters
        ├── csv_test.exs
        └── identity_test.exs
    ├── data_quacker
        └── skipper_test.exs
    ├── examples
        ├── pond_example_test.exs
        ├── pricing_example_test.exs
        └── students_example_test.exs
    ├── helpers_tests
        └── fun_wrapper_test.exs
    ├── schema
        └── state_test.exs
    ├── support
        ├── case.ex
        └── mock_file_manager.ex
    └── test_helper.exs


/.credo.exs:
--------------------------------------------------------------------------------
  1 | %{
  2 |   configs: [
  3 |     %{
  4 |       name: "default",
  5 |       files: %{
  6 |         included: [
  7 |           "lib/",
  8 |           "test/",
  9 |           "priv/"
 10 |         ],
 11 |         excluded: [~r"/_build/", ~r"/deps/", ~r"/node_modules/", ~r"/priv/repo/migrations/"]
 12 |       },
 13 |       plugins: [],
 14 |       requires: [],
 15 |       strict: true,
 16 |       parse_timeout: 5000,
 17 |       color: true,
 18 |       checks: [
 19 |         {Credo.Check.Consistency.ExceptionNames, []},
 20 |         {Credo.Check.Consistency.LineEndings, []},
 21 |         {Credo.Check.Consistency.ParameterPatternMatching, []},
 22 |         {Credo.Check.Consistency.SpaceAroundOperators, []},
 23 |         {Credo.Check.Consistency.SpaceInParentheses, []},
 24 |         {Credo.Check.Consistency.TabsOrSpaces, []},
 25 |         {Credo.Check.Design.AliasUsage,
 26 |          [priority: :low, if_nested_deeper_than: 3, if_called_more_often_than: 1]},
 27 |         {Credo.Check.Design.TagTODO, false},
 28 |         {Credo.Check.Design.TagFIXME, []},
 29 |         {Credo.Check.Readability.AliasOrder, []},
 30 |         {Credo.Check.Readability.FunctionNames, []},
 31 |         {Credo.Check.Readability.LargeNumbers, []},
 32 |         {Credo.Check.Readability.MaxLineLength, [priority: :low, max_length: 120]},
 33 |         {Credo.Check.Readability.ModuleAttributeNames, []},
 34 |         {Credo.Check.Readability.ModuleDoc, false},
 35 |         {Credo.Check.Readability.ModuleNames, []},
 36 |         {Credo.Check.Readability.ParenthesesInCondition, []},
 37 |         {Credo.Check.Readability.ParenthesesOnZeroArityDefs, false},
 38 |         {Credo.Check.Readability.PredicateFunctionNames, []},
 39 |         {Credo.Check.Readability.PreferImplicitTry, []},
 40 |         {Credo.Check.Readability.RedundantBlankLines, []},
 41 |         {Credo.Check.Readability.Semicolons, []},
 42 |         {Credo.Check.Readability.SpaceAfterCommas, []},
 43 |         {Credo.Check.Readability.StringSigils, []},
 44 |         {Credo.Check.Readability.TrailingBlankLine, []},
 45 |         {Credo.Check.Readability.TrailingWhiteSpace, []},
 46 |         {Credo.Check.Readability.UnnecessaryAliasExpansion, []},
 47 |         {Credo.Check.Readability.VariableNames, []},
 48 |         {Credo.Check.Refactor.CondStatements, []},
 49 |         {Credo.Check.Refactor.CyclomaticComplexity, false},
 50 |         {Credo.Check.Refactor.FunctionArity, []},
 51 |         {Credo.Check.Refactor.LongQuoteBlocks, []},
 52 |         {Credo.Check.Refactor.MatchInCondition, []},
 53 |         {Credo.Check.Refactor.NegatedConditionsInUnless, []},
 54 |         {Credo.Check.Refactor.NegatedConditionsWithElse, []},
 55 |         {Credo.Check.Refactor.Nesting, []},
 56 |         {Credo.Check.Refactor.UnlessWithElse, []},
 57 |         {Credo.Check.Refactor.WithClauses, []},
 58 |         {Credo.Check.Warning.ApplicationConfigInModuleAttribute, []},
 59 |         {Credo.Check.Warning.BoolOperationOnSameValues, []},
 60 |         {Credo.Check.Warning.ExpensiveEmptyEnumCheck, []},
 61 |         {Credo.Check.Warning.IExPry, []},
 62 |         {Credo.Check.Warning.IoInspect, []},
 63 |         {Credo.Check.Warning.MixEnv, false},
 64 |         {Credo.Check.Warning.OperationOnSameValues, []},
 65 |         {Credo.Check.Warning.OperationWithConstantResult, []},
 66 |         {Credo.Check.Warning.RaiseInsideRescue, []},
 67 |         {Credo.Check.Warning.UnusedEnumOperation, []},
 68 |         {Credo.Check.Warning.UnusedFileOperation, []},
 69 |         {Credo.Check.Warning.UnusedKeywordOperation, []},
 70 |         {Credo.Check.Warning.UnusedListOperation, []},
 71 |         {Credo.Check.Warning.UnusedPathOperation, []},
 72 |         {Credo.Check.Warning.UnusedRegexOperation, []},
 73 |         {Credo.Check.Warning.UnusedStringOperation, []},
 74 |         {Credo.Check.Warning.UnusedTupleOperation, []},
 75 |         {Credo.Check.Warning.UnsafeExec, []},
 76 |         {Credo.Check.Consistency.MultiAliasImportRequireUse, []},
 77 |         {Credo.Check.Consistency.UnusedVariableNames, []},
 78 |         {Credo.Check.Design.DuplicatedCode, false},
 79 |         {Credo.Check.Readability.AliasAs, []},
 80 |         {Credo.Check.Readability.BlockPipe, false},
 81 |         {Credo.Check.Readability.ImplTrue, []},
 82 |         {Credo.Check.Readability.MultiAlias, []},
 83 |         {Credo.Check.Readability.SeparateAliasRequire, false},
 84 |         {Credo.Check.Readability.SinglePipe, []},
 85 |         {Credo.Check.Readability.Specs, false},
 86 |         {Credo.Check.Readability.StrictModuleLayout, false},
 87 |         {Credo.Check.Readability.WithCustomTaggedTuple, false},
 88 |         {Credo.Check.Refactor.ABCSize, false},
 89 |         {Credo.Check.Refactor.AppendSingleItem, []},
 90 |         {Credo.Check.Refactor.DoubleBooleanNegation, []},
 91 |         {Credo.Check.Refactor.ModuleDependencies, []},
 92 |         {Credo.Check.Refactor.NegatedIsNil, false},
 93 |         {Credo.Check.Refactor.PipeChainStart, []},
 94 |         {Credo.Check.Refactor.VariableRebinding, false},
 95 |         {Credo.Check.Warning.LeakyEnvironment, false},
 96 |         {Credo.Check.Warning.MapGetUnsafePass, false},
 97 |         {Credo.Check.Warning.UnsafeToAtom, []}
 98 |       ]
 99 |     }
100 |   ]
101 | }
102 | 


--------------------------------------------------------------------------------
/.formatter.exs:
--------------------------------------------------------------------------------
1 | # Used by "mix format"
2 | [
3 |   inputs: ["{mix,.formatter}.exs", "{config,lib,test}/**/*.{ex,exs}"]
4 | ]
5 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # The directory Mix will write compiled artifacts to.
 2 | /_build/
 3 | 
 4 | # If you run "mix test --cover", coverage assets end up here.
 5 | /cover/
 6 | 
 7 | # The directory Mix downloads your dependencies sources to.
 8 | /deps/
 9 | 
10 | # Where third-party dependencies like ExDoc output generated docs.
11 | /doc/
12 | 
13 | # Ignore .fetch files in case you like to edit your project deps locally.
14 | /.fetch
15 | 
16 | # If the VM crashes, it generates a dump, let's ignore it too.
17 | erl_crash.dump
18 | 
19 | # Also ignore archive artifacts (built via "mix archive.build").
20 | *.ez
21 | 
22 | # Ignore package tarball (built via "mix hex.build").
23 | data_quacker-*.tar
24 | 
25 | 


--------------------------------------------------------------------------------
/.semaphore/semaphore.yml:
--------------------------------------------------------------------------------
 1 | version: "v1.0"
 2 | name: Tests and checks
 3 | agent:
 4 |   machine:
 5 |     type: e1-standard-2
 6 |     os_image: ubuntu1804
 7 |   containers:
 8 |     - name: main
 9 |       image: semaphoreci/elixir:1.12.0
10 | 
11 | blocks:
12 |   - name: Install dependencies
13 |     task:
14 |       jobs:
15 |         - name: Install dependencies
16 |           commands:
17 |             - checkout
18 |             - mix local.hex --force
19 |             - mix local.rebar --force
20 |             - cache restore
21 |             - cache restore dialyzer-plt
22 |             - mix do deps.get, compile, dialyzer --plt
23 |             - MIX_ENV=test mix compile
24 |             - cache store
25 |             - cache store dialyzer-plt priv/plts/
26 |   - name: Run checks
27 |     task:
28 |       prologue:
29 |         commands:
30 |           - checkout
31 |           - mix local.hex --force
32 |           - mix local.rebar --force
33 |           - cache restore
34 |           - cache restore dialyzer-plt
35 |       jobs:
36 |         - name: Run formatter
37 |           commands:
38 |             - mix format --check-formatted
39 |         - name: Run credo
40 |           commands:
41 |             - mix credo --strict
42 |         - name: Run dialyzer
43 |           commands:
44 |             - mix dialyzer
45 |             - cache store
46 |   - name: Run tests
47 |     task:
48 |       env_vars:
49 |         - name: MIX_ENV
50 |           value: test
51 |       prologue:
52 |         commands:
53 |           - checkout
54 |           - mix local.hex --force
55 |           - mix local.rebar --force
56 |           - cache restore
57 |       jobs:
58 |         - name: Run tests
59 |           commands:
60 |             - mix test
61 | 


--------------------------------------------------------------------------------
/.tool-versions:
--------------------------------------------------------------------------------
1 | elixir 1.12.2-otp-24
2 | erlang 24.0.5
3 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 |                                  Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    APPENDIX: How to apply the Apache License to your work.
179 | 
180 |       To apply the Apache License to your work, attach the following
181 |       boilerplate notice, with the fields enclosed by brackets "[]"
182 |       replaced with your own identifying information. (Don't include
183 |       the brackets!)  The text should be enclosed in the appropriate
184 |       comment syntax for the file format. We also recommend that a
185 |       file or class name and description of purpose be included on the
186 |       same "printed page" as the copyright notice for easier
187 |       identification within third-party archives.
188 | 
189 |    Copyright [yyyy] [name of copyright owner]
190 | 
191 |    Licensed under the Apache License, Version 2.0 (the "License");
192 |    you may not use this file except in compliance with the License.
193 |    You may obtain a copy of the License at
194 | 
195 |        http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 |    Unless required by applicable law or agreed to in writing, software
198 |    distributed under the License is distributed on an "AS IS" BASIS,
199 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 |    See the License for the specific language governing permissions and
201 |    limitations under the License.
202 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # DataQuacker
 2 | 
 3 | DataQuacker is a library which aims at helping validating, transforming and parsing non-sandboxed data, like CSV files.
 4 | 
 5 | It features a simple DSL similar to that of Ecto, which allows the user to declaratively describe the rules for mapping columns in the source into a desired structure. It also makes it easy to specify rules for validating, transforming and skipping specific fields and rows.
 6 | 
 7 | The documentation along with usage examples can be found at [hexdocs.pm](https://hexdocs.pm/data_quacker/DataQuacker.html)
 8 | 
 9 | To see the next steps for this library take a look at: [todo.md](./docs/todo.md)
10 | 
11 | ## Installation
12 | 
13 | To install the library, add it to your `mix.exs` deps.
14 | 
15 | ```elixir
16 | def deps do
17 |   [
18 |     {:data_quacker, "~> 0.1.1"}
19 |   ]
20 | end
21 | ```
22 | 
23 | ## Contribution
24 | 
25 | Any contribution is greatly appreciated. If you find anything working incorrectly or missing in this library or its documentation, please open an issue or a pull request.
26 | 
27 | Issues inquiring about usage and best practices are also welcome.
28 | 
29 | ## Testimonials
30 | 
31 | "..." ~ the rubber duck on my desk
32 | 


--------------------------------------------------------------------------------
/config/config.exs:
--------------------------------------------------------------------------------
1 | use Mix.Config
2 | 
3 | if Mix.env() == :test do
4 |   import_config("test.exs")
5 | end
6 | 


--------------------------------------------------------------------------------
/config/test.exs:
--------------------------------------------------------------------------------
1 | use Mix.Config
2 | 
3 | config :data_quacker, :file_manager, DataQuacker.MockFileManager
4 | 


--------------------------------------------------------------------------------
/docs/todo.md:
--------------------------------------------------------------------------------
 1 | # TODOs
 2 | - [x] Rename `parse` to `transform`
 3 | - [x] Throw a meaningful exception if a transformer or validator returns an unexpected data type
 4 | - [x] Basic documentation
 5 | - [x] Generalise parsing, allow adapters
 6 | - [x] Guard against fields or schemas with non-atom names
 7 | - [x] Guard against two fields of identical name in the same parent, two schemas of identical name within the same module
 8 | - [x] Example tests
 9 | - [ ] Tests for Schema
10 | - [ ] Tests for DataQuacker (core)
11 | - [ ] Tests for Adapters
12 | - [ ] Typespecs, Dialyzer
13 | - [ ] Full documentation
14 | 


--------------------------------------------------------------------------------
/lib/adapters/csv.ex:
--------------------------------------------------------------------------------
 1 | defmodule DataQuacker.Adapters.CSV do
 2 |   @moduledoc ~S"""
 3 |   This is a CSV adapter which can parse CSV files
 4 |   from the local filesystem or fetched over http.
 5 | 
 6 |   It is the default used if no adapter is specified.
 7 | 
 8 |   ## Example source
 9 | 
10 |   - Local file path: `"path/to/csv/file.csv"`
11 |   - Remote file url: `"https://remote_file.com/file/abc"`
12 |   """
13 | 
14 |   @behaviour DataQuacker.Adapter
15 | 
16 |   @impl DataQuacker.Adapter
17 |   @doc ~S"""
18 |   Takes in a string with the path or url to the file, and a keyword list of options.
19 | 
20 |   ## Options
21 |   - `:separator` - the ASCII value of the column separator in the CSV file; usually retrieved with the `?*` notation where "*" is the character, for example: `?,` for a comma, `?;` for a semicolon, etc.
22 |   - `:local?` - a boolean flag representing whether the file is present on the local file system or on a remote server
23 |   """
24 |   def parse_source(file_path_or_url, opts) do
25 |     case get_file(file_path_or_url, opts) do
26 |       {:ok, raw_data} -> decode_source(raw_data, get_separator(opts))
27 |       error -> error
28 |     end
29 |   end
30 | 
31 |   defp get_file(file_path_or_url, opts) do
32 |     case Keyword.get(opts, :local?, true) do
33 |       true -> {:ok, file_manager().stream!(file_path_or_url)}
34 |       false -> {:ok, file_manager().read_link!(file_path_or_url)}
35 |     end
36 |   rescue
37 |     _error -> {:error, "File does not exist or is corrupted"}
38 |   end
39 | 
40 |   defp decode_source(source_stream, separator) do
41 |     source_stream
42 |     |> CSV.decode(separator: separator)
43 |     |> Enum.into([])
44 |     |> case do
45 |       [headers | rows] -> {:ok, %{headers: headers, rows: rows}}
46 |       error -> error
47 |     end
48 |   end
49 | 
50 |   defp get_separator(opts) do
51 |     Keyword.get(opts, :separator, ?,)
52 |   end
53 | 
54 |   @impl DataQuacker.Adapter
55 |   def get_headers(%{headers: headers}), do: headers
56 | 
57 |   @impl DataQuacker.Adapter
58 |   def get_rows(%{rows: rows}), do: {:ok, rows}
59 | 
60 |   @impl DataQuacker.Adapter
61 |   def get_row(row), do: row
62 | 
63 |   defp file_manager() do
64 |     Application.get_env(:data_quacker, :file_manager) || DataQuacker.FileManager
65 |   end
66 | end
67 | 


--------------------------------------------------------------------------------
/lib/adapters/file_manager/file_manager.ex:
--------------------------------------------------------------------------------
 1 | defmodule DataQuacker.FileManager do
 2 |   @moduledoc false
 3 | 
 4 |   @callback stream!(Path.t()) :: Enumerable.t() | File.Stream.t() | {:error, String.t()}
 5 |   @callback read_link!(Path.t()) :: {:ok, binary()} | {:error, String.t()}
 6 | 
 7 |   defdelegate stream!(path), to: File
 8 |   defdelegate read_link!(url), to: File
 9 | end
10 | 


--------------------------------------------------------------------------------
/lib/adapters/identity.ex:
--------------------------------------------------------------------------------
 1 | defmodule DataQuacker.Adapters.Identity do
 2 |   @moduledoc ~S"""
 3 |   This is an "identity adapter".
 4 |   It takes in a map with `:headers` and `:rows` as the keys.
 5 | 
 6 |   This adapter is very useful for testing a particular schema,
 7 |   but can also be used as the actual adapter if needed.
 8 | 
 9 |   ## Example source
10 | 
11 |   ```elixir
12 |   %{
13 |     headers: ["First name", "Last name", "Age"],
14 |     rows: [
15 |       ["John", "Smith", "21"],
16 |       # ...
17 |     ]
18 |   }
19 |   ```
20 |   """
21 | 
22 |   @behaviour DataQuacker.Adapter
23 | 
24 |   @impl DataQuacker.Adapter
25 |   @doc ~S"""
26 |   Takes in a map with `:headers` and `:rows` keys, where the value under `:headers` is a list of strings, and the value under `:rows` is a list of lists of anything.
27 | 
28 |   > Note: Each list in in the rows list must be of the same length as the headers list.
29 |   """
30 |   def parse_source(source, _opts) do
31 |     {:ok, source}
32 |   end
33 | 
34 |   @impl DataQuacker.Adapter
35 |   def get_headers(%{headers: headers}), do: {:ok, headers}
36 | 
37 |   @impl DataQuacker.Adapter
38 |   def get_rows(%{rows: rows}), do: {:ok, rows}
39 | 
40 |   @impl DataQuacker.Adapter
41 |   def get_row(row), do: {:ok, row}
42 | end
43 | 


--------------------------------------------------------------------------------
/lib/data_quacker.ex:
--------------------------------------------------------------------------------
  1 | defmodule DataQuacker do
  2 |   @moduledoc """
  3 |   DataQuacker is a library which aims at helping validating, transforming and parsing non-sandboxed data.
  4 | 
  5 |   The most common example for such data, and the original idea behind this project, is CSV files.
  6 |   The scope of this library is not, however, in  any way limited to CSV files.
  7 |   This library ships by default with two adapters: `DataQuacker.Adapters.CSV` for CSV files,
  8 |   and `DataQuacker.Adapters.Identity` for "in-memory data".
  9 |   Any other data source may be used with the help of a third party adapters; see: `DataQuacker.Adapter`.
 10 | 
 11 |   This library is comprised of three main components:
 12 | 
 13 |   - `DataQuacker`, which provides the `parse/4` function to parse data using a schema
 14 |   - `DataQuacker.Schema`, which a DSL for declaratively defining schemas which describe the mapping between the source data and the desired output
 15 |   - `DataQuacker.Adapters.CSV` and `DataQuacker.Adapters.Identity`, which extract data from sources into a format required by the  `parse/4` function
 16 | 
 17 |   > Note: If you find anything missing from or unclear in the documentation, please do not hesitate to open an issue on the project's [Github repository](https://github.com/fiodorbaczynski/data_quacker).
 18 | 
 19 |   ## Testing
 20 | 
 21 |   The tests for parsing data which is external or non-sandboxed are often difficult to implement well,
 22 |   since that data may need to change over time.
 23 |   For example, editing CSV files used for tests, when the requirements change,
 24 |   can be tedious.
 25 | 
 26 |   For this reason, using a different adapter, which takes Elixir data as the input, for tests is recommend.
 27 |   In integration tests for this library the `DataQuacker.Adapters.Identity` adapter is used.
 28 | 
 29 |   The easiest way to switch out adapters in tests is to put the desired adapter in the `test.exs` config.
 30 |   You can find out how to do this under the "Options" section in the documentation for the `parse/4` function.
 31 | 
 32 |   ## Examples
 33 | 
 34 |   > Note: Most of the "juice", like transforming, validating, nesting, skipping, etc., is in the `DataQuacker.Schema` module, so the more complex and interesting examples also live there. Please take a look at its documentation for more in-depth examples.
 35 | 
 36 |   > Note: A fully working implementation of these examples can be found in the tests inside the "examples" directory.
 37 | 
 38 |   Given the following table of ducks in a pond, in the form of a CSV file:
 39 | 
 40 |   |   Type   |     Colour     | Age |
 41 |   |:--------:|:--------------:|-----|
 42 |   | Mallard  | green          | 3   |
 43 |   | Domestic | white          | 2   |
 44 |   | Mandarin | multi-coloured | 4   |
 45 | 
 46 |   we want to have a list of maps with `:type`, `:colour` and `:age` as the keys.
 47 | 
 48 |   This can be achieved by creating the following schema and parser modules:
 49 | 
 50 |   Schema
 51 | 
 52 |   ```elixir
 53 |   defmodule PondSchema do
 54 |     use DataQuacker.Schema
 55 | 
 56 |     schema :pond do
 57 |       field :type do
 58 |         source("type")
 59 |       end
 60 | 
 61 |       field :colour do
 62 |         # make the "u" optional
 63 |         # in case we get an American data source :)
 64 | 
 65 |         source(~r/colou?r/i)
 66 |       end
 67 | 
 68 |       field :age do
 69 |         source("age")
 70 |       end
 71 |     end
 72 |   end
 73 |   ```
 74 | 
 75 |   Parser
 76 | 
 77 |   ```
 78 |   defmodule PondParser do
 79 |     def parse(file_path) do
 80 |       DataQuacker.parse(
 81 |         file_path,
 82 |         PondSchema.schema_structure(:pond),
 83 |         nil
 84 |       )
 85 |     end
 86 |   end
 87 |   ```
 88 | 
 89 |   ```elixir
 90 |   iex> PondParser.parse("path/to/file.csv")
 91 |   iex> {:ok, [
 92 |   iex>   {:ok, %{type: "Mandarin", colour: "multi-coloured", age: "4"}},
 93 |   iex>   {:ok, %{type: "Domestic", colour: "white", age: "2"}},
 94 |   iex>   {:ok, %{type: "Mallard", colour: "green", age: "3"}},
 95 |   iex> ]}
 96 |   ```
 97 | 
 98 |   Using this schema and parser we get a tuple of `:ok` or `:error`, and a list of rows,
 99 |   each of which is also a tuple of `:ok` or `:error`, but with a map as the second element.
100 |   The topmost `:ok` or `:error` indicates whether *all* rows are valid,
101 |   and those for individual rows indicate whether that particular row is valid
102 | 
103 |   > Note: The rows in the result are in the reverse order compared to the source rows. This is because for large lists reversing may be an expensive operation, which is often redundant, for example if the result is supposed to be inserted in a database.
104 | 
105 |   Now suppose we also want to validate that the type is one in a list of types we know,
106 |   and get the age in the form of an integer.
107 |   We need to make some changes to our schema
108 | 
109 |   ```elixir
110 |   defmodule PondSchema do
111 |     use DataQuacker.Schema
112 | 
113 |     schema :pond do
114 |       field :type do
115 |         validate(fn type -> type in ["Mallard", "Domestic", "Mandarin"] end)
116 | 
117 |         source("type")
118 |       end
119 | 
120 |       field :colour do
121 |         # make the "u" optional
122 |         # in case we get an American data source :)
123 | 
124 |         source(~r/colou?r/i)
125 |       end
126 | 
127 |       field :age do
128 |         transform(fn age_str ->
129 |           case Integer.parse(str) do
130 |             {age_int, _} -> {:ok, age_int}
131 |             :error -> :error
132 |           end
133 |         end)
134 | 
135 |         source("age")
136 |       end
137 |     end
138 |   end
139 |   ```
140 | 
141 |   Using the same input file the output is now:
142 | 
143 |   ```elixir
144 |   iex> PondParser.parse("path/to/file.csv")
145 |   iex> {:ok, [
146 |   iex>   {:ok, %{type: "Mandarin", colour: "multi-coloured", age: 4}},
147 |   iex>   {:ok, %{type: "Domestic", colour: "white", age: 2}},
148 |   iex>   {:ok, %{type: "Mallard", colour: "green", age: 3}},
149 |   iex> ]}
150 |   ```
151 | 
152 |   (the difference is in the type of "age")
153 | 
154 |   If we add some invalid fields to the file, however, the result will be quite different:
155 | 
156 |   |   Type   |     Colour     | Age      |
157 |   |:--------:|:--------------:|----------|
158 |   | Mallard  | green          | 3        |
159 |   | Domestic | white          | 2        |
160 |   | Mandarin | multi-coloured | 4        |
161 |   | Mystery  | golden         | 100      |
162 |   | Black    | black          | Infinity |
163 | 
164 |   ```elixir
165 |   iex> PondParser.parse("path/to/file.csv")
166 |   iex> {:error, [
167 |   iex>   :error,
168 |   iex>   :error,
169 |   iex>   {:ok, %{type: "Mandarin", colour: "multi-coloured", age: 4}}
170 |   iex>   {:ok, %{type: "Domestic", colour: "white", age: 2}},
171 |   iex>   {:ok, %{type: "Mallard", colour: "green", age: 3}},
172 |   iex> ]}
173 |   ```
174 | 
175 |   Since the last two rows of the input are invalid, the first two rows in the output are errors.
176 | 
177 |   > Note: The errors can be made more descriptive by returning tuples `{:error, any()}` from the validators and parsers. You can see this in action in the examples for the `DataQuacker.Schema` module.
178 |   """
179 | 
180 |   alias DataQuacker.Builder
181 | 
182 |   @doc """
183 |   Takes in a source, a schema, support data, and a keyword list of options.
184 |   Returns a tuple with `:ok` or `:error` (indicating whether all rows are valid) as the first element,
185 |   and a list of tuples `{:ok, map()} | {:error, any()} | :error)`.
186 |   In case of `{:ok, map()}` for a given row, the map is the output defined in the schema.
187 | 
188 |   ## Source
189 | 
190 |   Any data which will be given to the adapter so that it can retrieve the source data.
191 |   In case of the `DataQuacker.Adapter.CSV` this can be a file path or a file url.
192 | 
193 |   ## Schema
194 | 
195 |   A schema formed with the DSL from `DataQuacker.Schema`.
196 | 
197 |   ## Support data
198 | 
199 |   Any data which is supposed to be accessible inside various schema elements when parsing a source.
200 | 
201 |   ## Options
202 | 
203 |   The options can also be specified in the config, for example:
204 | 
205 |   ```elixir
206 |   use Mix.Config
207 | 
208 |   # ...
209 | 
210 |   config :data_quacker,
211 |     adapter: DataQuacker.Adapters.Identity,
212 |     adapter_opts: []
213 | 
214 |   # ...
215 |   ```
216 | 
217 |   - `:adapter` - the adapter module to be used to retrieve the source data; defaults to `DataQuacker.Adapters.CSV`
218 |   - `:adapter_opts` - a keyword list of opts to be passed to the adapter; defaults to `[separator: ?,, local?: true]`; for a list of available adapter options see the documentation for the particular adapter
219 |   """
220 |   @spec parse(any(), map(), any(), Keyword.t()) ::
221 |           {:ok, list({:ok, map()} | {:error, any()} | :error)}
222 |           | {:error, list({:ok, map()} | {:error, any()} | :error)}
223 |   def parse(source, schema, support_data, opts \\ []) do
224 |     with opts <- apply_default_opts(opts),
225 |          adapter <- get_adapter(opts),
226 |          {:ok, source} <- adapter.parse_source(source, get_adapter_opts(opts)) do
227 |       Builder.call(source, schema, support_data, adapter)
228 |     end
229 |   end
230 | 
231 |   defp apply_default_opts(opts) do
232 |     default_opts()
233 |     |> Keyword.merge(Application.get_all_env(:data_quacker))
234 |     |> Keyword.merge(opts)
235 |   end
236 | 
237 |   defp default_opts do
238 |     [
239 |       adapter: DataQuacker.Adapters.CSV,
240 |       adapter_opts: [separator: ?,, local?: true]
241 |     ]
242 |   end
243 | 
244 |   defp get_adapter(opts) do
245 |     Keyword.get(opts, :adapter)
246 |   end
247 | 
248 |   defp get_adapter_opts(opts) do
249 |     Keyword.get(opts, :adapter_opts, [])
250 |   end
251 | end
252 | 


--------------------------------------------------------------------------------
/lib/data_quacker/adapter.ex:
--------------------------------------------------------------------------------
 1 | defmodule DataQuacker.Adapter do
 2 |   @moduledoc ~S"""
 3 |   Specifies the behaviour to which adapters must conform.
 4 | 
 5 |   An adapter must implement these functions: `parse_source/2`, `get_headers/1`, `get_rows/1`, `get_row/1`.
 6 | 
 7 |   The first one takes a source (e.g. a file path) and a keyword list of options,
 8 |   and returns a tuple of `{:ok, any()}` or `{:error, any()}`.
 9 |   In case of success the second element of the tuple
10 |   will be the value given to the other two function.
11 | 
12 |   The second one takes the result of `parse_source/2`
13 |   and returns `{:ok, list(any())} | {:error, any()}`.
14 |   In case of success the second element of the tuple
15 |   will be the value used to determine the indexes
16 |   of sources described in the schema.
17 | 
18 |   The third one takes the result of `parse_source/2`
19 |   and returns `{:ok, list(any())} | {:error, any()}`.
20 |   In case of success each subsequent element of the resulting list
21 |   will be passed to the get row function.
22 | 
23 |   The last one takes an element of the list
24 |   which is the result of `get_rows/1`
25 |   and returns `{:ok, list(any())} | {:error, any()}`.
26 |   In case of success the resulting list will be treated
27 |   as the list of columns in a row of the source.
28 | 
29 |   > Note: The resulting list in the `get_row/1` function must be of the same length as the resulting list in the `get_headers/1` function.
30 | 
31 |   For an example implementation take a look at the built-in adapters.
32 | 
33 |   > The rationale behind this API for adapters is that, depending on the source, potential errors may occur at different stages of parsing the source. For example the CSV library included in the default CSV adapter returns a tuple with `:ok` or `:error`as the first element for each row. However, some external APIs, like Google Sheets, return a list of rows without specifying for each whether it's valid or not. Therefore we need for it to be possible to specify that for each row, but not required for an adapter to eagerly iterate over all of the rows and wrap them in a tuple with `:ok`.
34 |   """
35 | 
36 |   @callback parse_source(any(), Keyword.t()) :: {:ok, any()} | {:error, any()}
37 |   @callback get_headers(any()) :: {:ok, list(any())} | {:error, any()}
38 |   @callback get_rows(any()) :: {:ok, list(any())} | {:error, any()}
39 |   @callback get_row(any()) :: {:ok, list(any())} | {:error, any()}
40 | end
41 | 


--------------------------------------------------------------------------------
/lib/data_quacker/builder.ex:
--------------------------------------------------------------------------------
  1 | defmodule DataQuacker.Builder do
  2 |   @moduledoc false
  3 | 
  4 |   alias DataQuacker.Context
  5 |   alias DataQuacker.Matcher
  6 |   alias DataQuacker.Skipper
  7 |   alias DataQuacker.Sourcer
  8 |   alias DataQuacker.Transformer
  9 |   alias DataQuacker.Validator
 10 | 
 11 |   def call(
 12 |         source,
 13 |         %{__name__: schema_name, matchers: matchers, rows: schema_rows} = _schema,
 14 |         support_data,
 15 |         adapter
 16 |       ) do
 17 |     with {:ok, headers} <- adapter.get_headers(source),
 18 |          {:ok, source_rows} <- adapter.get_rows(source),
 19 |          context <-
 20 |            support_data
 21 |            |> Context.new()
 22 |            |> Context.update_metadata(:schema, schema_name),
 23 |          {:ok, column_mappings} <- Matcher.call(headers, matchers, context) do
 24 |       build_source_rows(source_rows, schema_rows, column_mappings, context, adapter)
 25 |     end
 26 |   end
 27 | 
 28 |   defp build_source_rows(
 29 |          _source_rows,
 30 |          _schema_rows,
 31 |          _column_mappings,
 32 |          _context,
 33 |          _adapter,
 34 |          _acc \\ [],
 35 |          _all_ok? \\ true
 36 |        )
 37 | 
 38 |   defp build_source_rows(
 39 |          [source_row | rest],
 40 |          schema_rows,
 41 |          column_mappings,
 42 |          context,
 43 |          adapter,
 44 |          acc,
 45 |          all_ok?
 46 |        ) do
 47 |     context = Context.increment_row(context)
 48 |     source_row = adapter.get_row(source_row)
 49 | 
 50 |     {result, context} = do_build_source_row(source_row, schema_rows, column_mappings, context)
 51 | 
 52 |     build_source_rows(
 53 |       rest,
 54 |       schema_rows,
 55 |       column_mappings,
 56 |       context,
 57 |       adapter,
 58 |       result ++ acc,
 59 |       all_ok? and
 60 |         Enum.all?(result, fn
 61 |           {:ok, _res} -> true
 62 |           _el -> false
 63 |         end)
 64 |     )
 65 |   end
 66 | 
 67 |   defp build_source_rows([], _schema_rows, _column_mappings, _context, _adapter, acc, true),
 68 |     do: {:ok, acc}
 69 | 
 70 |   defp build_source_rows([], _schema_rows, _column_mappings, _context, _adapter, acc, false),
 71 |     do: {:error, acc}
 72 | 
 73 |   defp do_build_source_row({:ok, source_row}, schema_rows, column_mappings, context) do
 74 |     values = parse_row_values(source_row, column_mappings)
 75 | 
 76 |     build_schema_rows(schema_rows, values, context)
 77 |   end
 78 | 
 79 |   defp do_build_source_row(error, _schema_rows, _column_mappings, _context), do: error
 80 | 
 81 |   defp build_schema_rows(_schema_rows, _values, _context, acc \\ [])
 82 | 
 83 |   defp build_schema_rows([row | rest], values, context, acc) do
 84 |     case do_build_schema_row(row, values, context) do
 85 |       :skip -> build_schema_rows(rest, values, context, acc)
 86 |       {:ok, fields, context} -> build_schema_rows(rest, values, context, [{:ok, fields} | acc])
 87 |       error -> build_schema_rows(rest, values, context, [error | acc])
 88 |     end
 89 |   end
 90 | 
 91 |   defp build_schema_rows([], _values, context, acc), do: {acc, context}
 92 | 
 93 |   defp do_build_schema_row(
 94 |          %{
 95 |            __index__: row_index,
 96 |            fields: fields,
 97 |            validators: validators,
 98 |            transformers: transformers,
 99 |            skip_if: skip_if
100 |          },
101 |          values,
102 |          context
103 |        ) do
104 |     with context <- Context.update_metadata(context, :row, row_index),
105 |          {:ok, fields, context} <- fields |> Enum.into([]) |> build_fields(values, context),
106 |          {:ok, fields, context} <- Transformer.call(fields, transformers, context),
107 |          :ok <- Validator.call(fields, validators, context),
108 |          false <- Skipper.call(fields, skip_if, context) do
109 |       {:ok, fields, context}
110 |     else
111 |       true -> :skip
112 |       error -> error
113 |     end
114 |   end
115 | 
116 |   defp build_fields(_fields, _values, _context, _acc \\ %{})
117 | 
118 |   defp build_fields([{field_name, field} | fields], values, context, acc) do
119 |     case do_build_field(field, values, context) do
120 |       :skip ->
121 |         build_fields(fields, values, context, acc)
122 | 
123 |       {:ok, field, context} ->
124 |         build_fields(fields, values, context, Map.put(acc, field_name, field))
125 | 
126 |       error ->
127 |         error
128 |     end
129 |   end
130 | 
131 |   defp build_fields([], _values, context, acc), do: {:ok, acc, context}
132 | 
133 |   defp do_build_field(
134 |          %{
135 |            __name__: field_name,
136 |            validators: validators,
137 |            transformers: transformers,
138 |            skip_if: skip_if
139 |          } = field,
140 |          values,
141 |          context
142 |        ) do
143 |     with context <- Context.update_metadata(context, :field, field_name),
144 |          {:ok, value, context} <- do_build_field_value(field, values, context),
145 |          {:ok, value, context} <- Transformer.call(value, transformers, context),
146 |          :ok <- Validator.call(value, validators, context),
147 |          false <- Skipper.call(value, skip_if, context) do
148 |       {:ok, value, context}
149 |     else
150 |       true -> :skip
151 |       error -> error
152 |     end
153 |   end
154 | 
155 |   defp do_build_field_value(%{__type__: :sourced, source: source}, values, context) do
156 |     {:ok, Sourcer.call(source, values, context), context}
157 |   end
158 | 
159 |   defp do_build_field_value(%{__type__: :wrapper, subfields: subfields}, values, context) do
160 |     subfields
161 |     |> Enum.into([])
162 |     |> build_fields(values, context)
163 |   end
164 | 
165 |   defp parse_row_values(row, column_mappings) do
166 |     column_mappings
167 |     |> Enum.map(fn {target, index} -> {target, Enum.at(row, index)} end)
168 |     |> Enum.into(%{})
169 |   end
170 | end
171 | 


--------------------------------------------------------------------------------
/lib/data_quacker/context.ex:
--------------------------------------------------------------------------------
 1 | defmodule DataQuacker.Context do
 2 |   @moduledoc ~S"""
 3 |   This module provides a struct
 4 |   to hold contextual data
 5 |   for CSV parsing
 6 | 
 7 |   ## Metadata
 8 | 
 9 |   Metadata is a tuple of an atom and another atom or a non-negative integer. The first is the type of the entity currently being processed (`:field`, `:row`, etc.). The second is the name or index of the entity (name in case of a field, index in case of row).
10 | 
11 |   ## Support data
12 | 
13 |   Support data can be of any Elixir data type. It is the exact value passed as support_data to the `DataQuacker.parse/4` at runtime.
14 | 
15 |   ## Source row
16 | 
17 |   Source row is a a non-negative integer. The value is the index of the source row currently being processed.
18 |   """
19 | 
20 |   @type t :: %__MODULE__{
21 |           metadata: {atom(), atom() | non_neg_integer()},
22 |           support_data: any(),
23 |           source_row: non_neg_integer()
24 |         }
25 |   defstruct [:metadata, :support_data, source_row: 0]
26 | 
27 |   @doc false
28 |   def new(support_data), do: %__MODULE__{support_data: support_data}
29 | 
30 |   @doc false
31 |   def update_metadata(context, type, name_or_index) do
32 |     %__MODULE__{context | metadata: {type, name_or_index}}
33 |   end
34 | 
35 |   @doc false
36 |   def increment_row(%__MODULE__{source_row: source_row} = context) do
37 |     %__MODULE__{context | source_row: source_row + 1}
38 |   end
39 | end
40 | 


--------------------------------------------------------------------------------
/lib/data_quacker/matcher.ex:
--------------------------------------------------------------------------------
 1 | defmodule DataQuacker.Matcher do
 2 |   @moduledoc false
 3 | 
 4 |   alias DataQuacker.Schema.WrappedFun
 5 | 
 6 |   def call(headers, rules, context), do: compile_rules(rules, headers, context)
 7 | 
 8 |   defp compile_rules(_rules, _headers, _context, acc \\ [])
 9 | 
10 |   defp compile_rules(
11 |          [%{rule: matching_function, target: target} | rest],
12 |          headers,
13 |          context,
14 |          acc
15 |        ) do
16 |     case get_header_index(headers, matching_function, context) do
17 |       nil ->
18 |         {:error, {:header_not_found, target}}
19 | 
20 |       index ->
21 |         compile_rules(
22 |           rest,
23 |           headers,
24 |           context,
25 |           [{target, index} | acc]
26 |         )
27 |     end
28 |   end
29 | 
30 |   defp compile_rules([], _headers, _context, acc), do: {:ok, acc}
31 | 
32 |   defp get_header_index(headers, matching_function, context) do
33 |     Enum.find_index(headers, &apply_function(matching_function, &1, context))
34 |   end
35 | 
36 |   defp apply_function(%WrappedFun{arity: 1, callable: callable}, column, _context) do
37 |     callable.(column)
38 |   end
39 | 
40 |   defp apply_function(%WrappedFun{arity: 2, callable: callable}, column, context) do
41 |     callable.(column, context)
42 |   end
43 | end
44 | 


--------------------------------------------------------------------------------
/lib/data_quacker/skipper.ex:
--------------------------------------------------------------------------------
 1 | defmodule DataQuacker.Skipper do
 2 |   @moduledoc false
 3 | 
 4 |   alias DataQuacker.Context
 5 | 
 6 |   alias DataQuacker.Schema.WrappedFun
 7 | 
 8 |   @type skipper_result :: true | false
 9 | 
10 |   @spec call(any(), nil, any()) :: false
11 |   def call(_value, nil, _context), do: false
12 | 
13 |   @spec call(any(), WrappedFun.t(), Context.t()) :: skipper_result()
14 |   def call(value, skipping_rule, context) do
15 |     case apply_function(skipping_rule, value, context) do
16 |       result when is_boolean(result) ->
17 |         result
18 | 
19 |       el ->
20 |         raise """
21 | 
22 |         Skipper in #{elem(context.metadata, 0)} #{elem(context.metadata, 1)}
23 |         returned an incorrect value #{inspect(el)}.
24 | 
25 |         Skippers can only have returns of type:
26 |         `true | false`
27 |         """
28 |     end
29 |   end
30 | 
31 |   @spec apply_function(any(), WrappedFun.t(1), Context.t()) :: any()
32 |   defp apply_function(%WrappedFun{arity: 1, callable: callable}, value, _context) do
33 |     callable.(value)
34 |   end
35 | 
36 |   @spec apply_function(any(), WrappedFun.t(2), Context.t()) :: any()
37 |   defp apply_function(%WrappedFun{arity: 2, callable: callable}, value, context) do
38 |     callable.(value, context)
39 |   end
40 | end
41 | 


--------------------------------------------------------------------------------
/lib/data_quacker/sourcer.ex:
--------------------------------------------------------------------------------
 1 | defmodule DataQuacker.Sourcer do
 2 |   @moduledoc false
 3 | 
 4 |   alias DataQuacker.Schema.WrappedFun
 5 | 
 6 |   def call(%WrappedFun{} = getter_function, _values, context) do
 7 |     apply_function(getter_function, context)
 8 |   end
 9 | 
10 |   def call(target, values, _context) do
11 |     get_value(target, values)
12 |   end
13 | 
14 |   defp apply_function(%WrappedFun{arity: 0, callable: callable}, _context) do
15 |     callable.()
16 |   end
17 | 
18 |   defp apply_function(%WrappedFun{arity: 1, callable: callable}, context) do
19 |     callable.(context)
20 |   end
21 | 
22 |   defp get_value(target, values) do
23 |     Map.get(values, target)
24 |   end
25 | end
26 | 


--------------------------------------------------------------------------------
/lib/data_quacker/transformer.ex:
--------------------------------------------------------------------------------
 1 | defmodule DataQuacker.Transformer do
 2 |   @moduledoc false
 3 | 
 4 |   alias DataQuacker.Context
 5 | 
 6 |   alias DataQuacker.Schema.WrappedFun
 7 | 
 8 |   @type transformation_result :: {:ok, any()} | {:ok, any(), any()} | {:error, any()} | :error
 9 | 
10 |   @spec call(any(), nonempty_list(WrappedFun.t()), Context.t()) :: transformation_result()
11 |   def call(value, [transformer | rest], context) do
12 |     case apply_transformer(value, transformer, context) do
13 |       {:ok, value} ->
14 |         call(value, rest, context)
15 | 
16 |       {:ok, value, support_data} ->
17 |         call(value, rest, %{context | support_data: support_data})
18 | 
19 |       {:error, _details} = error ->
20 |         error
21 | 
22 |       :error ->
23 |         :error
24 | 
25 |       el ->
26 |         raise """
27 | 
28 |         Transformer in #{elem(context.metadata, 0)} #{elem(context.metadata, 1)}
29 |         returned an incorrect value #{inspect(el)}.
30 | 
31 |         Transformers can only have returns of type:
32 |         `{:ok, any()} | {:ok, any(), any()} | {:error, any()} | :error`
33 |         """
34 |     end
35 |   end
36 | 
37 |   @spec call(any(), [], Context.t()) :: {:ok, any(), Context.t()}
38 |   def call(value, [], context), do: {:ok, value, context}
39 | 
40 |   @spec apply_transformer(any(), WrappedFun.t(1), Context.t()) :: any()
41 |   defp apply_transformer(value, %WrappedFun{callable: callable, arity: 1}, _context) do
42 |     callable.(value)
43 |   end
44 | 
45 |   @spec apply_transformer(any(), WrappedFun.t(2), Context.t()) :: any()
46 |   defp apply_transformer(value, %WrappedFun{callable: callable, arity: 2}, context) do
47 |     callable.(value, context)
48 |   end
49 | end
50 | 


--------------------------------------------------------------------------------
/lib/data_quacker/validator.ex:
--------------------------------------------------------------------------------
 1 | defmodule DataQuacker.Validator do
 2 |   @moduledoc false
 3 | 
 4 |   alias DataQuacker.Context
 5 | 
 6 |   alias DataQuacker.Schema.WrappedFun
 7 | 
 8 |   @type validation_result :: :ok | :error | {:error, any()}
 9 | 
10 |   @spec call(any(), nonempty_list(WrappedFun.t()), Context.t()) :: validation_result()
11 |   def call(value, [validator | rest], context) do
12 |     case apply_validation(value, validator, context) do
13 |       :ok ->
14 |         call(value, rest, context)
15 | 
16 |       true ->
17 |         call(value, rest, context)
18 | 
19 |       false ->
20 |         :error
21 | 
22 |       {:error, _details} = error ->
23 |         error
24 | 
25 |       :error ->
26 |         :error
27 | 
28 |       el ->
29 |         raise """
30 | 
31 |         Validator in #{elem(context.metadata, 0)} #{elem(context.metadata, 1)}
32 |         returned an incorrect value #{inspect(el)}.
33 | 
34 |         Validators can only have returns of type:
35 |         `:ok | :error | {:error, any()} | true | false`
36 |         """
37 |     end
38 |   end
39 | 
40 |   @spec call(any(), [], Context.t()) :: :ok
41 |   def call(_value, [], _context), do: :ok
42 | 
43 |   @spec apply_validation(any(), WrappedFun.t(1), Context.t()) :: any()
44 |   defp apply_validation(value, %WrappedFun{callable: callable, arity: 1}, _context) do
45 |     callable.(value)
46 |   end
47 | 
48 |   @spec apply_validation(any(), WrappedFun.t(2), Context.t()) :: any()
49 |   defp apply_validation(value, %WrappedFun{callable: callable, arity: 2}, context) do
50 |     callable.(value, context)
51 |   end
52 | end
53 | 


--------------------------------------------------------------------------------
/lib/schema/error.ex:
--------------------------------------------------------------------------------
1 | defmodule DataQuacker.SchemaError do
2 |   defexception [:message]
3 | end
4 | 


--------------------------------------------------------------------------------
/lib/schema/helpers/fun_wrapper.ex:
--------------------------------------------------------------------------------
 1 | defmodule DataQuacker.Schema.FunWrapper do
 2 |   @moduledoc false
 3 | 
 4 |   alias DataQuacker.Schema.WrappedFun
 5 |   alias DataQuacker.SchemaError
 6 | 
 7 |   defmacro wrap_fun(fun, expected_arity \\ nil) do
 8 |     arity = fun_arity(fun)
 9 |     args = fun_args(arity)
10 |     name = random_name()
11 | 
12 |     maybe_assert_arity!(arity, expected_arity)
13 | 
14 |     quote do
15 |       def unquote(name)(unquote_splicing(args)) do
16 |         unquote(fun).(unquote_splicing(args))
17 |       end
18 | 
19 |       %WrappedFun{callable: &(__MODULE__.unquote(name) / unquote(arity)), arity: unquote(arity)}
20 |     end
21 |   end
22 | 
23 |   defp fun_arity(quoted_fun) do
24 |     with {fun, _} <- Code.eval_quoted(quoted_fun),
25 |          fun_info <- :erlang.fun_info(fun),
26 |          arity when not is_nil(arity) <- Keyword.get(fun_info, :arity) do
27 |       arity
28 |     else
29 |       _error -> raise SchemaError, "Invalid function given"
30 |     end
31 |   end
32 | 
33 |   defp fun_args(0), do: []
34 | 
35 |   defp fun_args(arity) do
36 |     Enum.map(1..arity, fn i ->
37 |       # credo:disable-for-next-line Credo.Check.Warning.UnsafeToAtom
38 |       arg_name = String.to_atom("arg#{i}")
39 | 
40 |       # AST for a variable
41 |       {arg_name, [], __MODULE__}
42 |     end)
43 |   end
44 | 
45 |   defp maybe_assert_arity!(arity, expected_arity) do
46 |     {unquoted_expected_arity, _} = Code.eval_quoted(expected_arity)
47 | 
48 |     case unquoted_expected_arity do
49 |       nil ->
50 |         :ok
51 | 
52 |       %Range{first: first, last: last} when arity >= first and arity <= last ->
53 |         :ok
54 | 
55 |       i when is_integer(i) and arity == i ->
56 |         :ok
57 | 
58 |       el ->
59 |         raise SchemaError, """
60 | 
61 |         A function of unexpected arity #{arity} given.
62 |         Should be #{inspect(el)}
63 |         """
64 |     end
65 |   end
66 | 
67 |   defp random_name() do
68 |     64
69 |     |> :crypto.strong_rand_bytes()
70 |     |> Base.url_encode64()
71 |     |> binary_part(0, 64)
72 |     |> String.to_atom()
73 |   end
74 | end
75 | 


--------------------------------------------------------------------------------
/lib/schema/helpers/wrapped_fun.ex:
--------------------------------------------------------------------------------
 1 | defmodule DataQuacker.Schema.WrappedFun do
 2 |   @moduledoc false
 3 | 
 4 |   @type t :: %__MODULE__{
 5 |           callable: (... -> any()),
 6 |           arity: non_neg_integer()
 7 |         }
 8 | 
 9 |   @type t(arity) :: %__MODULE__{
10 |           callable: (... -> any()),
11 |           arity: arity
12 |         }
13 |   defstruct [:callable, :arity]
14 | end
15 | 


--------------------------------------------------------------------------------
/lib/schema/schema.ex:
--------------------------------------------------------------------------------
   1 | # credo:disable-for-this-file Credo.Check.Refactor.AppendSingleItem
   2 | defmodule DataQuacker.Schema do
   3 |   @moduledoc ~S"""
   4 |   Defines macros for creating data schemas
   5 |   which represents a mapping from the source to the desired output.
   6 | 
   7 |   > Note: To use the macros you have to put `use DataQuacker.Schema` in the desired module.
   8 | 
   9 |   A schema can be defined to represent the structure of an arbitrarily nested map or list of maps.
  10 |   This is done with the `schema/2`, `row/2` and `field/3` macros.
  11 |   Additionally, there are two special macros: `validate/1` and `transform/1`.
  12 |   Lastly, the `source/1` and `virtual_source/1` macros are used
  13 |   to define the data which should be inserted in a particular field.
  14 |   These allow for validation and transformation to be performed
  15 |   on a specific subset of the output data.
  16 | 
  17 |   > Note: the `row/2` and `field/3` macros represent the *output* structure,
  18 |   while the `source/1` and `virtual_source/1` macros reference the input data.
  19 |   Since both the input and the output can be said to have rows,
  20 |   the term "source row" is used in the documentation to denote a row in the input data.
  21 |   The term "row" is used to denote a row in the output.
  22 | 
  23 |   All of the structure-defining macros take a block as their last argument
  24 |   which can be thought of as their "body". The `schema/2` and `field/2` macros
  25 |   also take a name as their first argument, and `row/2` and `field/3`
  26 |   take a keyword list of options as their first and second argument respectively.
  27 | 
  28 |   More information can be found in the documentation for the specific macros.
  29 | 
  30 |   ## Examples
  31 | 
  32 |   > Note: A fully working implementation of these examples can be found in the tests inside the "examples" directory.
  33 | 
  34 |   Suppose we have a table of students in the form of a CSV file, which looks like this:
  35 | 
  36 |   | First name | Last name | Age | Favourite subject |
  37 |   |:----------:|:---------:|:---:|:-----------------:|
  38 |   | John       | Smith     | 19  | Maths             |
  39 |   | Adam       | Johnson   | 18  | Physics           |
  40 |   | Quackers   | the Duck  | 1   | Programming       |
  41 | 
  42 |   Also suppose our desired output is a list of tuples with maps with the following structure:
  43 | 
  44 |   ```elixir
  45 |   {:ok, %{
  46 |     first_name: "...",
  47 |     last_name: "...",
  48 |     age: "...",
  49 |     favourite_subject: "..."
  50 |   }}
  51 |   ```
  52 | 
  53 |   The mapping from the table to the list of maps can be represented as follows:
  54 | 
  55 |   ```elixir
  56 |   defmodule StudentsSchema do
  57 |     use DataQuacker.Schema
  58 | 
  59 |     schema :students do
  60 |       field :first_name do
  61 |         source("first name")
  62 |       end
  63 | 
  64 |       field :last_name do
  65 |         source("last name")
  66 |       end
  67 | 
  68 |       field :age do
  69 |         source("age")
  70 |       end
  71 | 
  72 |       field :favourite_subject do
  73 |         source("favourite subject")
  74 |       end
  75 |     end
  76 |   end
  77 |   ```
  78 | 
  79 |   This looks great (I hope!), but realistically we would like age to be an Integer,
  80 |   and favourite subject to be somehow validated. This can be achieved by modifying the previous schema, like this:
  81 | 
  82 |   ```elixir
  83 |   defmodule StudentsSchema do
  84 |     use DataQuacker.Schema
  85 | 
  86 |     schema :students do
  87 |       field :first_name do
  88 |         source("first name")
  89 |       end
  90 | 
  91 |       field :last_name do
  92 |         source("last name")
  93 |       end
  94 | 
  95 |       field :age do
  96 |         transform(fn age ->
  97 |           case Integer.parse(age) do
  98 |             {age_int, _} -> {:ok, age_int}
  99 |             :error -> {:error, "Invalid value #{age} given"}
 100 |           end
 101 |         end)
 102 | 
 103 |         source("age")
 104 |       end
 105 | 
 106 |       field :favourite_subject do
 107 |         validate(fn subj -> subj in ["Maths", "Physics", "Programming"] end)
 108 | 
 109 |         source("favourite subject")
 110 |       end
 111 |     end
 112 |   end
 113 |   ```
 114 | 
 115 |   Now our result will be a list of maps, like:
 116 |   ```elixir
 117 |   [
 118 |     # ...
 119 |     {:ok, %{
 120 |       age: 123,
 121 |       # ...
 122 |     }}
 123 |     # ...
 124 |   ]
 125 |   ```
 126 | 
 127 |   > Note: To see how to use such schema to parse a CSV file, please see the example in the documentation for the `DataQuacker` module.
 128 | 
 129 |   However if, for example, an invalid age is given,
 130 |   the entire row where the error occurred will result in the following tuple:
 131 |   `{:error, "Invalid value blabla given"}`
 132 | 
 133 |   Great, but what if we have the "First name" and "Last name" columns in our CSV files,
 134 |   but only a `:full_name` field in our database? No problem, Fields can be arbitrarily nested.
 135 | 
 136 |   It's just a small tweak:
 137 | 
 138 |   ```elixir
 139 |   defmodule StudentsSchema do
 140 |     use DataQuacker.Schema
 141 | 
 142 |     schema :students do
 143 |       field :full_name do
 144 |         transform(fn %{first_name: first_name, last_name: last_name} ->
 145 |           {:ok, "#{first_name} #{last_name}"}
 146 |         end)
 147 | 
 148 |         field :first_name do
 149 |           source("first name")
 150 |         end
 151 | 
 152 |         field :last_name do
 153 |           source("last name")
 154 |         end
 155 |       end
 156 | 
 157 |       # ...
 158 |     end
 159 |   end
 160 |   ```
 161 | 
 162 |   Now our output is:
 163 | 
 164 |   ```elixir
 165 |   {:ok, [
 166 |     #...
 167 |     {:ok, %{
 168 |       full_name: "John Smith",
 169 |       # ...
 170 |     }}
 171 |     #...
 172 |   ]}
 173 |   ```
 174 | 
 175 |   To illustrate some more functionality, let's take a look at another example.
 176 |   We will start with a very simple CSV source file
 177 |   which will gradually become more and more complex,
 178 |   and so will our rules for parsing it.
 179 | 
 180 |   | Apartment/flat size (in m^2) | Price per 1 month |
 181 |   |:----------------------------:|:-----------------:|
 182 |   | 40                           | 1000              |
 183 |   | 50                           | 1100              |
 184 | 
 185 |   ```elixir
 186 |   defmodule PricingSchema do
 187 |     use DataQuacker.Schema
 188 | 
 189 |     schema :pricing do
 190 |       field :size do
 191 |         transform(fn size ->
 192 |           case Integer.parse(size) do
 193 |             {size_int, _} -> {:ok, size_int}
 194 |             :error -> {:error, "Invalid value #{size} given"}
 195 |           end
 196 |         end)
 197 | 
 198 |         source("Apartment/flat size (in m^2)")
 199 |       end
 200 | 
 201 |       field :price do
 202 |         transform(fn price ->
 203 |           case Integer.parse(price) do
 204 |             {price_int, _} -> {:ok, price_int}
 205 |             :error -> {:error, "Invalid value #{price} given"}
 206 |           end
 207 |         end)
 208 | 
 209 |         source("Price per 1 month")
 210 |       end
 211 |     end
 212 |   end
 213 |   ```
 214 | 
 215 |   The above results in:
 216 |   ```elixir
 217 |   [
 218 |     {:ok, %{size: 50, price: 1100}},
 219 |     {:ok, %{size: 40, price: 1000}}
 220 |   ]
 221 |   ```
 222 | 
 223 |   > Note: The rows in the result are in the reverse order compared to the source rows. This is because for large lists reversing may be an expensive operation, which is often redundant, for example if the result is supposed to be inserted in a database.
 224 | 
 225 |   This schema could work, but there are some problems with it.
 226 | 
 227 |   It's not fun to copy&paste the function for parsing string to int
 228 |   over and over again. That's why we'll create a regular function
 229 |   and pass a reference to it in both places.
 230 | 
 231 |   ```elixir
 232 |   defmodule PricingSchema do
 233 |     use DataQuacker.Schema
 234 | 
 235 |     schema :pricing do
 236 |       field :size do
 237 |         transform(&PricingSchema.parse_int/1)
 238 |         # ...
 239 |       end
 240 | 
 241 |       field :price do
 242 |         transform(&PricingSchema.parse_int/1)
 243 |         # ...
 244 |       end
 245 |     end
 246 | 
 247 |     def parse_int(str) do
 248 |       case Integer.parse(str) do
 249 |         {int, _} -> {:ok, int}
 250 |         :error -> {:error, "Invalid value #{str} given"}
 251 |       end
 252 |     end
 253 |   end
 254 |   ```
 255 | 
 256 |   > Note: the reference to the function must be written out in full (including the module name),
 257 |   because it will be executed in a different context.
 258 | 
 259 |   This is better, but still not ideal for two reasons.
 260 |   First of all, we source our data based on simple string matching. While this will still work
 261 |   if the casing in the headers changes, it will not if "Price per 1 month" changes to "Price *for* 1 month",
 262 |   or "Apartment/flat size (in m^2)" to "Apartment *or* flat size (in m^2)".
 263 |   Since most likely we do not have control over the source, these can change unexpectedly.
 264 |   Second of all, our error messages are quite vague since they do not specify the offending source row and field.
 265 | 
 266 |   To tackle the first one we can change our `source/1` macros to be either strings, regexes,
 267 |   lists of strings or custom functions. The details of each approach is specified
 268 |   in the docs for the `source/1` macro, but for now we will just us a list of strings.
 269 | 
 270 |   `source("Apartment/flat size (in m^2)")` -> `source(["apartment", "size"])`
 271 | 
 272 |   `source("Apartment/flat size (in m^2)")` -> `source(["price", "1"])`
 273 | 
 274 |   The above mean "match a header which contains apartment and size"
 275 |   and "match a header which contains apartment and 1".
 276 | 
 277 |   > Note: The order of the headers is inconsequential.
 278 | 
 279 |   As for the second issue, transform can actually be given a one- or two-argument function.
 280 |   If it is given a one-argument function, the argument at execution will be the value of the field
 281 |   or row. If it is given a two-argument function, the second argument will be a `%Context{}` struct.
 282 |   Which contains the following fields: `:metadata`, `:support_data`, `:source_row`.
 283 |   Support data is an arbitrary value of any type that can be passed in at parse time.
 284 |   It can be used to, for example, validate something against a database without having to fetch the data
 285 |   for each row. More on that in the documentation of the `DataQuacker` module. For now, however, we only need `metadata` and `source_row`. The first one is a tuple
 286 |   of an atom and an atom or a tuple, where the first element is the type (`:field` or `:row`)
 287 |   and the second one is the name or index in the case of a row.
 288 |   The second one is just the index of the source row which is being processed.
 289 | 
 290 |   > Note: the term "source row" is used here to denote a row in the input file. The term row
 291 |   is used to denote a row of output.
 292 | 
 293 |   We can therefore change our `parse_int/1` function into
 294 | 
 295 |   ```elixir
 296 |   def parse_int(str, %{metadata: metadata, source_row: source_row}) do
 297 |     case Integer.parse(str) do
 298 |       {int, _} -> {:ok, int}
 299 |       :error -> {:error, "Error processing #{elem(metadata, 0)} #{elem(metadata, 1)} in row #{source_row}; '#{str}' given"}
 300 |     end
 301 |   end
 302 |   ```
 303 | 
 304 |   An example error will look like this: `{:error, "Error processing field price in row 2; 'oops' given"}`
 305 | 
 306 |   The next case we will be dealing with here is again a "small change" to the source file.
 307 | 
 308 |   | Apartment/flat size (in m^2) | Price per 1 month | Price per 3 months |
 309 |   |:----------------------------:|:-----------------:|--------------------|
 310 |   | 40                           | 1000              | 2800               |
 311 |   | 50                           | 1100              | 3000               |
 312 |   | 60                           |                   | 3600               |
 313 | 
 314 |   Now each source row contains two different prices for different lease period.
 315 |   Additionally, for the bigger apartments there may only be an option
 316 |   to rent for three months.
 317 | 
 318 |   We could create a schema to parse the data int rows like:
 319 |   `%{size: 40, price_1: 1000, price_3: 2800}`,
 320 |   but this is not ideal since we would have to deal with `nil` at `:price_1`,
 321 |   and we probably want separate rows in the database for each lease duration,
 322 |   as this will allow us to easily pull out the price for a specific size and lease duration
 323 |   using SQL indexes.
 324 | 
 325 |   A better structure therefore would look like this
 326 |   ```elixir
 327 |   [
 328 |     # ...
 329 |     {:ok, %{size: 40, duration: 3, price: 2800}},
 330 |     {:ok, %{size: 40, duration: 1, price: 1000}}
 331 |   ]
 332 |   ```
 333 | 
 334 |   This is where the `row/2` macro comes in. It allows us to specify any number of output rows
 335 |   for a single input row. Previously we did not use this macro at all,
 336 |   since the lack of it implies there is exactly one output row per input row.
 337 | 
 338 |   This is our new schema:
 339 | 
 340 |   ```elixir
 341 |   defmodule PricingSchema do
 342 |     use DataQuacker.Schema
 343 | 
 344 |     schema :pricing do
 345 |       row skip_if: (fn %{price: price} -> is_nil(price) end) do
 346 |         field :size do
 347 |           transform(&PricingSchema.parse_int/2)
 348 | 
 349 |           source(["apartment", "size"])
 350 |         end
 351 | 
 352 |         field :duration do
 353 |           virtual_source(1)
 354 |         end
 355 | 
 356 |         field :price do
 357 |           transform(&PricingSchema.parse_int/2)
 358 | 
 359 |           source(["price", "1"])
 360 |         end
 361 |       end
 362 | 
 363 |       row do
 364 |         field :size do
 365 |           transform(&PricingSchema.parse_int/2)
 366 | 
 367 |           source(["apartment", "size"])
 368 |         end
 369 | 
 370 |         field :duration do
 371 |           virtual_source(3)
 372 |         end
 373 | 
 374 |         field :price do
 375 |           transform(&PricingSchema.parse_int/2)
 376 | 
 377 |           source(["price", "3"])
 378 |         end
 379 |       end
 380 |     end
 381 | 
 382 |     def parse_int("", _), do: {:ok, nil}
 383 | 
 384 |     def parse_int(str, %{metadata: metadata, source_row: source_row}) do
 385 |       case Integer.parse(str) do
 386 |         {int, _} -> {:ok, int}
 387 |         :error -> {:error, "Error processing #{elem(metadata, 0)} #{elem(metadata, 1)} in row #{source_row}; '#{str}' given"}
 388 |       end
 389 |     end
 390 |   end
 391 |   ```
 392 | 
 393 |   There are a few new interesting things going on here.
 394 | 
 395 |   Firstly, as we can see, any column in the source can be inserted multiple times
 396 |   within the schema. This is particularly useful if for a single input row
 397 |   we want to have multiple output rows which share some of the fields.
 398 | 
 399 |   Secondly, we added a new field `:duration` which instead of being sourced from the input data
 400 |   is just a static value. We achieved it with the `virtual_source/1` macro
 401 |   which either takes a value or a function returning a value to be injected into the field.
 402 |   This is useful for us to be able to make the output structure as close to our database model as we can.
 403 | 
 404 |   > Note: There is a special case in the `parse_int/2` function to return nil on empty input,
 405 |   because `Integer.parse/2` will return an error given an empty string.
 406 | 
 407 |   Lastly, we added a special option to the first output row, called `skip_if`.
 408 |   The function we provided will be evaluated for each output row representing a one-month lease price,
 409 |   and if it returns `true` the row will not appear in the actual result.
 410 | 
 411 |   Using our latest schema and the CSV presented above, we get this result:
 412 |   ```elixir
 413 |   {:ok, [
 414 |     {:ok, %{duration: 3, price: 3600, size: 60}},
 415 |     {:ok, %{duration: 3, price: 3000, size: 50}},
 416 |     {:ok, %{duration: 1, price: 1100, size: 50}},
 417 |     {:ok, %{duration: 3, price: 2800, size: 40}},
 418 |     {:ok, %{duration: 1, price: 1000, size: 40}}
 419 |   ]}
 420 |   ```
 421 | 
 422 |   The last case is about multiple transformations on the same field.
 423 | 
 424 |   Our source file has changed again, so that it includes some non-integer prices.
 425 |   We could just switch our usage of `Integer.parse/2` to `Decimal.parse/1`,
 426 |   but there is a catch: `Decimal.parse/1` expects `.` (dot) to be the decimal separator,
 427 |   and our source uses `,` (comma).
 428 |   For this reason we will need to first replace the commas with periods, and then convert.
 429 | 
 430 |   As the transformer we provide for the `:price` field is an arbitrary Elixir function,
 431 |   we could do both of those operations at once.
 432 |   That would work, but for schemas which have very complex transformation and validation rules,
 433 |   the function could get bloated quickly.
 434 | 
 435 |   The goal of this library is to avoid that complexity, and allow for easy understanding
 436 |   of the custom rules. This is why it's recommended to split te transformers into multiple functions.
 437 | 
 438 |   Let's create two functions: `parse_decimal/2` and `replace_commas/1`.
 439 | 
 440 |   > Note: To follow this example you will have to install the `Decimal` library, which you can find at [hex.pm/packages/decimal](https://hex.pm/packages/decimal).
 441 | 
 442 |   ```elixir
 443 |   def replace_commas(str) do
 444 |     {:ok, String.replace(str, ",", ".")}
 445 |   end
 446 | 
 447 |   def parse_decimal("", _), do: {:ok, nil}
 448 | 
 449 |   def parse_decimal(str, %{metadata: metadata, source_row: source_row}) do
 450 |     case Decimal.parse(str) do
 451 |       {decimal, ""} -> {:ok, decimal}
 452 |       :error -> {:error, "Error processing #{elem(metadata, 0)} #{elem(metadata, 1)} in row #{source_row}; '#{str}' given"}
 453 |     end
 454 |   end
 455 |   ```
 456 | 
 457 |   We can now change our `:price` fields to use these functions:
 458 | 
 459 |   ```elixir
 460 |   # ...
 461 | 
 462 |   field :price do
 463 |     transform(&PricingSchema.replace_commas/1)
 464 |     transform(&PricingSchema.parse_decimal/2)
 465 | 
 466 |     source(["price", "1"])
 467 |   end
 468 | 
 469 |   # ...
 470 | 
 471 |   field :price do
 472 |     transform(&PricingSchema.replace_commas/1)
 473 |     transform(&PricingSchema.parse_decimal/2)
 474 | 
 475 |     source(["price", "3"])
 476 |   end
 477 | 
 478 |   # ...
 479 |   ```
 480 | 
 481 |   > Note: Different transformers for the same field or row may take different numbers of arguments, depending on whether the context is needed in the particular function.
 482 | 
 483 |   The final schema should look like this:
 484 | 
 485 |   ```elixir
 486 |   defmodule PricingSchema do
 487 |     use DataQuacker.Schema
 488 | 
 489 |     schema :pricing do
 490 |       row skip_if: (fn %{price: price} -> is_nil(price) end) do
 491 |         field :size do
 492 |           transform(&PricingSchema.parse_int/2)
 493 | 
 494 |           source(["apartment", "size"])
 495 |         end
 496 | 
 497 |         field :duration do
 498 |           virtual_source(1)
 499 |         end
 500 | 
 501 |         field :price do
 502 |           transform(&PricingSchema.replace_commas/1)
 503 |           transform(&PricingSchema.parse_decimal/2)
 504 | 
 505 |           source(["price", "1"])
 506 |         end
 507 |       end
 508 | 
 509 |       row do
 510 |         field :size do
 511 |           transform(&PricingSchema.parse_int/2)
 512 | 
 513 |           source(["apartment", "size"])
 514 |         end
 515 | 
 516 |         field :duration do
 517 |           virtual_source(3)
 518 |         end
 519 | 
 520 |         field :price do
 521 |           transform(&PricingSchema.replace_commas/1)
 522 |           transform(&PricingSchema.parse_decimal/2)
 523 | 
 524 |           source(["price", "3"])
 525 |         end
 526 |       end
 527 |     end
 528 | 
 529 |     def parse_int("", _), do: {:ok, nil}
 530 | 
 531 |     def parse_int(str, %{metadata: metadata, source_row: source_row}) do
 532 |       case Integer.parse(str) do
 533 |         {int, _} -> {:ok, int}
 534 |         :error -> {:error, "Error processing #{elem(metadata, 0)} #{elem(metadata, 1)} in row #{source_row}; '#{str}' given"}
 535 |       end
 536 |     end
 537 | 
 538 |     def replace_commas(str) do
 539 |       {:ok, String.replace(str, ",", ".")}
 540 |     end
 541 | 
 542 |     def parse_decimal("", _), do: {:ok, nil}
 543 | 
 544 |     def parse_decimal(str, %{metadata: metadata, source_row: source_row}) do
 545 |       case Decimal.parse(str) do
 546 |         {decimal, ""} -> {:ok, decimal}
 547 |         :error -> {:error, "Error processing #{elem(metadata, 0)} #{elem(metadata, 1)} in row #{source_row}; '#{str}' given"}
 548 |       end
 549 |     end
 550 |   end
 551 |   ```
 552 |   """
 553 | 
 554 |   alias DataQuacker.Schema.State
 555 | 
 556 |   alias DataQuacker.SchemaError
 557 | 
 558 |   import DataQuacker.Schema.FunWrapper
 559 | 
 560 |   @doc false
 561 |   defmacro __using__(_opts) do
 562 |     quote do
 563 |       import unquote(__MODULE__)
 564 | 
 565 |       @state State.new()
 566 |       @schema_names []
 567 |     end
 568 |   end
 569 | 
 570 |   @doc ~S"""
 571 |   Defines a schema and a `schema_structure/1` function
 572 |   which takes the schema name as the argument
 573 |   and returns the schema in a form that can be passed to a parser.
 574 | 
 575 |   Multiple schemas can be defined in a single module.
 576 | 
 577 |   The result structure is a map with the following types:
 578 |   ```elixir
 579 |   %{
 580 |     __name__: atom(),
 581 |     rows: list(),
 582 |     matchers: list()
 583 |   }
 584 |   ```
 585 | 
 586 |   The macro takes in a name and a block with which the rows, fields, etc. can be defined.
 587 |   The block must contain at least one row. Note, however, that if no row is explicitly specified,
 588 |   but at least one field is, the schema is assumed to have exactly one row which contains all of the fields.
 589 | 
 590 |   > Note: if one or many fields are present directly inside the schema, the row macro cannot be used explicitly.
 591 |   The same is true the other way around - if at least one row is specified explicitly,
 592 |   fields can only appear inside rows, not directly in the schema.
 593 | 
 594 |   Unlike `row/2` and `field/3`, the `schema/2` macro cannot have validators or transformers.
 595 |   If there is only one row, but it needs to define validators or transformers,
 596 |   the schema must define this row explicitly.
 597 |   """
 598 |   defmacro schema(name, do: block) when is_atom(name) do
 599 |     quote do
 600 |       if unquote(name) in @schema_names do
 601 |         raise SchemaError, """
 602 | 
 603 |         Invalid schema name.
 604 |         There already exists a schema #{inspect(unquote(name))}
 605 |         on this module.
 606 |         """
 607 |       end
 608 | 
 609 |       if not Enum.empty?(@state.cursor) do
 610 |         raise SchemaError, """
 611 | 
 612 |         Invalid schema position.
 613 |         Schema can only appear as a top-level module macro
 614 |         (cannot be nested in other schemas).
 615 |         """
 616 |       end
 617 | 
 618 |       @state State.new()
 619 |       @state State.register(@state, :schema, {unquote(name), %{}})
 620 | 
 621 |       unquote(block)
 622 | 
 623 |       if Enum.empty?(@state.rows) do
 624 |         raise SchemaError, """
 625 | 
 626 |         Invalid schema usage.
 627 |         Schema must have at least
 628 |         one row or one field.
 629 |         """
 630 |       end
 631 | 
 632 |       if State.flagged?(@state, :has_loose_fields?) do
 633 |         @state State.update(@state, :row, %{fields: @state.fields})
 634 | 
 635 |         @state State.cursor_exit(@state)
 636 |       end
 637 | 
 638 |       @state State.cursor_exit(@state)
 639 | 
 640 |       @state State.update(@state, :schema, %{
 641 |                matchers: @state.matchers,
 642 |                rows: @state.rows
 643 |              })
 644 | 
 645 |       def schema_structure(unquote(name)) do
 646 |         @state.schema
 647 |       end
 648 |     end
 649 |   end
 650 | 
 651 |   defmacro schema(name, do: _block) do
 652 |     quote do
 653 |       raise SchemaError, """
 654 | 
 655 |       Invalid schema name.
 656 |       Must be an atom, #{inspect(unquote(name))} given.
 657 |       """
 658 |     end
 659 |   end
 660 | 
 661 |   @doc ~S"""
 662 |   Defines an output row.
 663 |   Can only be used directly inside a schema, and only if the schema has no fields
 664 |   directly inside it.
 665 | 
 666 |   This macro takes in a keyword list of options, and a block within which the fields,
 667 |   validators and transformers can be specified.
 668 | 
 669 |   ## Options
 670 |     * `:skip_if` - a function of arity 1 or 2, which returns `true` or `false` given the value of the row and optionally the context; `true` means the row should be skipped from the output, `false` is a "noop"
 671 | 
 672 |   > Note: The order of execution is always: transformers, then validators, then "skip_if".
 673 |   """
 674 |   defmacro row(opts \\ [], do: block) do
 675 |     quote do
 676 |       if State.flagged?(@state, :has_loose_fields?) do
 677 |         raise SchemaError, """
 678 | 
 679 |         Invalid row usage.
 680 |         Rows cannot appear in a schema
 681 |         if the schema has loose fields
 682 |         (fields appearing outside of any row).
 683 |         """
 684 |       end
 685 | 
 686 |       if not State.cursor_at?(@state, :schema) do
 687 |         raise SchemaError, """
 688 | 
 689 |         Invalid row position.
 690 |         Rows can only appear directly
 691 |         inside a schema.
 692 |         """
 693 |       end
 694 | 
 695 |       @state State.clear_fields(@state)
 696 |       @state State.register(
 697 |                @state,
 698 |                :row,
 699 |                {length(@state.rows), %{skip_if: skip_if_opt(unquote(opts))}}
 700 |              )
 701 | 
 702 |       unquote(block)
 703 | 
 704 |       @state State.update(@state, :row, %{fields: @state.fields})
 705 | 
 706 |       if @state.fields == %{} do
 707 |         raise SchemaError, """
 708 | 
 709 |         Invalid row usage.
 710 |         Rows must have at least one subfield.
 711 |         """
 712 |       end
 713 | 
 714 |       @state State.cursor_exit(@state)
 715 |     end
 716 |   end
 717 | 
 718 |   @doc ~S"""
 719 |   Defines an output field.
 720 |   Can be used inside a schema, a row or another field.
 721 |   Can only be used directly inside a schema if the schema has no explicitly defined rows.
 722 |   Can only be used inside another field if that field has no source.
 723 | 
 724 |   This macro takes in a name, a keyword list of options, and a block within which the subfields or source,
 725 |   and validators and transformers can be specified.
 726 |   Can either specify exactly one source (virtual or regular) or subfields.
 727 | 
 728 |   ## Options
 729 |     * `:skip_if` - a function of arity 1 or 2, which returns `true` or `false` given the value of the field and optionally the context; `true` means the field should be skipped from the output, `false` is a "noop"
 730 | 
 731 |   > Note: The order of execution is always: transformers, then validators, then "skip_if"
 732 |   """
 733 |   defmacro field(_name, _opts \\ [], _)
 734 | 
 735 |   defmacro field(name, opts, do: block) when is_atom(name) do
 736 |     quote do
 737 |       if State.cursor_at?(@state, nil) do
 738 |         raise SchemaError, """
 739 | 
 740 |         Invalid field position.
 741 |         Fields can only appear inside a schema,
 742 |         rows or other fields.
 743 |         """
 744 |       end
 745 | 
 746 |       if State.cursor_at?(@state, :schema) and not Enum.empty?(@state.rows) do
 747 |         raise SchemaError, """
 748 | 
 749 |         Invalid field usage.
 750 |         Fields cannot appear directly inside a schema
 751 |         if the schema explicitly declares rows.
 752 |         """
 753 |       end
 754 | 
 755 |       if State.cursor_at?(@state, :schema) do
 756 |         @state State.flag(@state, :has_loose_fields?, true)
 757 |         @state State.register(@state, :row, {length(@state.rows), %{}})
 758 |       end
 759 | 
 760 |       if State.cursor_at?(@state, :field) and State.get(@state, :field).__type__ == :sourced do
 761 |         raise SchemaError, """
 762 | 
 763 |         Invalid field usage.
 764 |         A field can either have subfields or a source,
 765 |         but not both.
 766 |         """
 767 |       end
 768 | 
 769 |       if State.cursor_at?(@state, :row) and Map.has_key?(@state.fields, unquote(name)) do
 770 |         raise SchemaError, """
 771 | 
 772 |         Invalid field name.
 773 |         There already exists a field of the same name
 774 |         in this row.
 775 |         """
 776 |       end
 777 | 
 778 |       if State.cursor_at?(@state, :field) and
 779 |            Map.has_key?(State.get(@state, :field).subfields, unquote(name)) do
 780 |         raise SchemaError, """
 781 | 
 782 |         Invalid field name.
 783 |         There already exists a subfield of the same name
 784 |         in this field.
 785 |         """
 786 |       end
 787 | 
 788 |       if State.cursor_at?(@state, :field) do
 789 |         @state State.update(@state, :field, %{__type__: :wrapper})
 790 |       end
 791 | 
 792 |       @state State.register(
 793 |                @state,
 794 |                :field,
 795 |                {unquote(name), %{skip_if: skip_if_opt(unquote(opts))}}
 796 |              )
 797 | 
 798 |       unquote(block)
 799 | 
 800 |       if is_nil(State.get(@state, :field).__type__) do
 801 |         raise SchemaError, """
 802 | 
 803 |         Invalid field usage.
 804 |         Fields must either have a source
 805 |         or at least one subfield.
 806 |         """
 807 |       end
 808 | 
 809 |       @state State.cursor_exit(@state)
 810 |     end
 811 |   end
 812 | 
 813 |   defmacro field(name, _opts, do: _block) do
 814 |     quote do
 815 |       raise SchemaError, """
 816 | 
 817 |       Invalid field name.
 818 |       Must be an atom, #{inspect(unquote(name))} given.
 819 |       """
 820 |     end
 821 |   end
 822 | 
 823 |   @doc ~S"""
 824 |   Defines a source mapping from the input.
 825 |   Can only be used inside a field, and only if that field does not define any subfields
 826 |   or any other source.
 827 | 
 828 |   This macro takes in either a "needle" which can be string, a regex, a list of strings,
 829 |   or a function of arity 1 or 2.
 830 | 
 831 |   ## Needle
 832 |     * when is a string - the downcased header name for a particular column must contain the downcased string given as the needle for the column to match
 833 |     * when is a regex - the header name for a particular column must match the needle for the column to match
 834 |     * when is a list of strings - the downcase header name for a particular column must contain all of the downcased elements given as the needle for the column to match
 835 |     * when is a function - given the header name for a particular column, and optionally the context, must return `true` for the column to match; the function must always return `true` or `false`
 836 |   """
 837 |   defmacro source(needle) do
 838 |     {unquoted_needle, _} = Code.eval_quoted(needle)
 839 | 
 840 |     case unquoted_needle do
 841 |       string when is_binary(string) ->
 842 |         quote do
 843 |           source(fn column_name ->
 844 |             String.contains?(String.downcase(column_name), unquote(String.downcase(needle)))
 845 |           end)
 846 |         end
 847 | 
 848 |       list when is_list(list) ->
 849 |         quote do
 850 |           source(fn column_name ->
 851 |             column_name = String.downcase(column_name)
 852 | 
 853 |             Enum.all?(
 854 |               unquote(Enum.map(needle, &String.downcase(&1))),
 855 |               &String.contains?(column_name, &1)
 856 |             )
 857 |           end)
 858 |         end
 859 | 
 860 |       %Regex{} ->
 861 |         quote do
 862 |           source(fn column_name ->
 863 |             Regex.match?(unquote(needle), column_name)
 864 |           end)
 865 |         end
 866 | 
 867 |       fun when is_function(fun) ->
 868 |         quote do
 869 |           if not State.cursor_at?(@state, :field) do
 870 |             raise SchemaError, """
 871 | 
 872 |             Invalid source position.
 873 |             Sources can only appear inside fields.
 874 |             """
 875 |           end
 876 | 
 877 |           if State.get(@state, :field).__type__ == :sourced do
 878 |             raise SchemaError, """
 879 | 
 880 |             Invalid source usage.
 881 |             Only one source per field is allowed.
 882 |             """
 883 |           end
 884 | 
 885 |           if State.get(@state, :field).__type__ == :wrapper do
 886 |             raise SchemaError, """
 887 | 
 888 |             Invalid source usage.
 889 |             A field can either have subfields or a source,
 890 |             but not both.
 891 |             """
 892 |           end
 893 | 
 894 |           @state State.register(@state, :matcher, wrap_fun(unquote(needle), 1..2))
 895 |           @state State.update(@state, :field, %{__type__: :sourced, source: State.target(@state)})
 896 |         end
 897 | 
 898 |       _el ->
 899 |         quote do
 900 |           raise SchemaError, """
 901 | 
 902 |           Invalid column source type.
 903 |           Must be a string, a regex expression or a function
 904 |           which can be used to match a column name.
 905 |           """
 906 |         end
 907 |     end
 908 |   end
 909 | 
 910 |   @doc ~S"""
 911 |   Defines a value to be injected to a particular field.
 912 |   Can only be used inside a field, and only if that field does not define any subfields
 913 |   or any other source.
 914 | 
 915 |   This macro takes in either a literal value, or a function of arity 0 or 1.
 916 | 
 917 |   ## Value
 918 |     * when is a function - optionally given the context, can return any value to be injected inside the field
 919 |     * else - the value is injected inside the field "as is"
 920 |   """
 921 |   defmacro virtual_source(value) do
 922 |     {unquoted_value, _} = Code.eval_quoted(value)
 923 | 
 924 |     case unquoted_value do
 925 |       fun when is_function(fun) ->
 926 |         quote do
 927 |           if not State.cursor_at?(@state, :field) do
 928 |             raise SchemaError, """
 929 | 
 930 |             Invalid source position.
 931 |             Sources can only appear inside fields.
 932 |             """
 933 |           end
 934 | 
 935 |           if State.get(@state, :field).__type__ == :sourced do
 936 |             raise SchemaError, """
 937 | 
 938 |             Invalid source usage.
 939 |             Only one source per field is allowed.
 940 |             """
 941 |           end
 942 | 
 943 |           if State.get(@state, :field).__type__ == :wrapper do
 944 |             raise SchemaError, """
 945 | 
 946 |             Invalid source usage.
 947 |             A field can either have subfields or a source,
 948 |             but not both.
 949 |             """
 950 |           end
 951 | 
 952 |           @state State.update(@state, :field, %{
 953 |                    __type__: :sourced,
 954 |                    source: wrap_fun(unquote(value), 0..1)
 955 |                  })
 956 |         end
 957 | 
 958 |       _el ->
 959 |         quote do
 960 |           virtual_source(fn -> unquote(value) end)
 961 |         end
 962 |     end
 963 |   end
 964 | 
 965 |   @doc ~S"""
 966 |   Defines a validator for a field or row.
 967 |   Can only be used inside a field or row.
 968 | 
 969 |   This macro takes in a function of arity 1 or 2, which will be applied to the value of the row or the field where the validator was defined. Multiple validators are allowed, and will be executed in the order in which they are defined.
 970 | 
 971 |   > Note: To use validators on a row, the row must be defined explicitly. Implicit rows cannot have validators.
 972 | 
 973 |   ## Fun
 974 |     * when is a function - given the field's or row's value and optionally the context, must return either `true`, `false`, `:ok`, `:error` or a tuple `{:error, any()}`, where `true` and `ok` are the success typing, and `false`, `:error` and `{:error, any()}` are the error typing; the entire output row will be an error row if any validation inside it or inside its fields fails
 975 |   """
 976 |   defmacro validate(fun) do
 977 |     quote do
 978 |       validator = wrap_fun(unquote(fun), 1..2)
 979 | 
 980 |       cond do
 981 |         State.cursor_at?(@state, :row) ->
 982 |           validators = @state |> State.get(:row) |> Map.get(:validators)
 983 | 
 984 |           @state State.update(@state, :row, %{validators: validators ++ [validator]})
 985 | 
 986 |         State.cursor_at?(@state, :field) ->
 987 |           validators = @state |> State.get(:field) |> Map.get(:validators)
 988 | 
 989 |           @state State.update(@state, :field, %{validators: validators ++ [validator]})
 990 | 
 991 |         true ->
 992 |           raise SchemaError, """
 993 | 
 994 |           Incorrect validator position.
 995 |           Validators can only appear
 996 |           inside rows or fields.
 997 |           """
 998 |       end
 999 |     end
1000 |   end
1001 | 
1002 |   @doc ~S"""
1003 |   Defines a data transformer for a field or row.
1004 |   Can only be used inside a field or row.
1005 | 
1006 |   This macro takes in a function of arity 1 or 2, which will be applied to the value of the row or the field where the transformer was defined. Multiple transformers are allowed, and will be executed in the order in which they are defined.
1007 | 
1008 |   > Note: To use transformers on a row, the row must be defined explicitly. Implicit rows cannot have transformers.
1009 | 
1010 |   ## Fun
1011 |     * when is a function - given the field's or row's value and optionally the context, must return either `{:ok, any()}`, `{:error, any()}` or `:error`, where `{:ok, any()}` is the success typing and `{:error, any()}`, and `:error` are the error typing; the second element of the success tuple is taken to be the new value of the row or field; the entire output row will be an error row if any validation inside it or inside its fields fails
1012 |   """
1013 |   defmacro transform(fun) do
1014 |     quote do
1015 |       transformer = wrap_fun(unquote(fun), 1..2)
1016 | 
1017 |       cond do
1018 |         State.cursor_at?(@state, :row) ->
1019 |           transformers = @state |> State.get(:row) |> Map.get(:transformers)
1020 | 
1021 |           @state State.update(@state, :row, %{transformers: transformers ++ [transformer]})
1022 | 
1023 |         State.cursor_at?(@state, :field) ->
1024 |           transformers = @state |> State.get(:field) |> Map.get(:transformers)
1025 | 
1026 |           @state State.update(@state, :field, %{transformers: transformers ++ [transformer]})
1027 | 
1028 |         true ->
1029 |           raise SchemaError, """
1030 | 
1031 |           Incorrect transformer position.
1032 |           Transformers can only appear
1033 |           inside rows or fields.
1034 |           """
1035 |       end
1036 |     end
1037 |   end
1038 | 
1039 |   @doc false
1040 |   defmacro skip_if_opt(opts) do
1041 |     {unquoted_opts, _} = Code.eval_quoted(opts)
1042 | 
1043 |     case Keyword.fetch(unquoted_opts, :skip_if) do
1044 |       {:ok, fun} when is_function(fun) ->
1045 |         quote do
1046 |           wrap_fun(unquote(Keyword.get(opts, :skip_if)), 1..2)
1047 |         end
1048 | 
1049 |       :error ->
1050 |         quote do
1051 |           nil
1052 |         end
1053 | 
1054 |       _el ->
1055 |         quote do
1056 |           raise SchemaError, """
1057 | 
1058 |           Invalid skip_if type
1059 |           must be a function
1060 |           with arity 1 or 2.
1061 |           """
1062 |         end
1063 |     end
1064 |   end
1065 | end
1066 | 


--------------------------------------------------------------------------------
/lib/schema/state.ex:
--------------------------------------------------------------------------------
  1 | # credo:disable-for-this-file Credo.Check.Refactor.AppendSingleItem
  2 | defmodule DataQuacker.Schema.State do
  3 |   @moduledoc false
  4 | 
  5 |   alias DataQuacker.Schema.State
  6 | 
  7 |   defstruct cursor: [], flags: %{}, schema: %{}, matchers: [], rows: [], fields: %{}
  8 | 
  9 |   def new(), do: %State{}
 10 | 
 11 |   def clear_fields(state) do
 12 |     %State{state | fields: %{}}
 13 |   end
 14 | 
 15 |   def flag(%State{flags: flags} = state, flag, value) do
 16 |     flags = Map.put(flags, flag, value)
 17 | 
 18 |     %State{state | flags: flags}
 19 |   end
 20 | 
 21 |   def flagged?(%State{flags: flags}, flag) do
 22 |     Map.get(flags, flag, false)
 23 |   end
 24 | 
 25 |   def cursor_at?(%State{cursor: []}, type), do: is_nil(type)
 26 | 
 27 |   def cursor_at?(%State{cursor: cursor}, type) do
 28 |     elem(hd(cursor), 0) == type
 29 |   end
 30 | 
 31 |   def target(%State{cursor: cursor}) do
 32 |     target_from_cursor(cursor)
 33 |   end
 34 | 
 35 |   def cursor_exit(%State{cursor: cursor} = state, levels \\ 1) do
 36 |     %State{state | cursor: Enum.drop(cursor, levels)}
 37 |   end
 38 | 
 39 |   def register(%State{cursor: cursor} = state, :schema, {schema_name, schema}) do
 40 |     cursor = [{:schema, schema_name} | cursor]
 41 | 
 42 |     schema = Map.merge(new_schema(schema_name), schema)
 43 | 
 44 |     %State{state | schema: schema, cursor: cursor}
 45 |   end
 46 | 
 47 |   def register(%State{cursor: cursor, rows: rows} = state, :row, {row_index, row}) do
 48 |     cursor = [{:row, row_index} | cursor]
 49 | 
 50 |     row = Map.merge(new_row(row_index), row)
 51 |     rows = rows ++ [row]
 52 | 
 53 |     %State{state | rows: rows, cursor: cursor}
 54 |   end
 55 | 
 56 |   def register(%State{cursor: cursor, fields: fields} = state, :field, {field_name, field}) do
 57 |     cursor = [{:field, field_name} | cursor]
 58 |     needle = field_needle(cursor)
 59 | 
 60 |     field = Map.merge(new_field(field_name), field)
 61 |     fields = put_in(fields, Enum.reverse(needle), field)
 62 | 
 63 |     %State{state | fields: fields, cursor: cursor}
 64 |   end
 65 | 
 66 |   def register(%State{matchers: matchers, cursor: cursor} = state, :matcher, rule) do
 67 |     matcher = %{rule: rule, target: target_from_cursor(cursor)}
 68 |     matchers = [matcher | matchers]
 69 | 
 70 |     %State{state | matchers: matchers}
 71 |   end
 72 | 
 73 |   def update(%State{schema: existing_schema} = state, :schema, schema) do
 74 |     schema = Map.merge(existing_schema, schema)
 75 | 
 76 |     %State{state | schema: schema}
 77 |   end
 78 | 
 79 |   def update(%State{cursor: cursor, rows: rows} = state, :row, row) do
 80 |     index = elem(hd(cursor), 1)
 81 | 
 82 |     rows = List.update_at(rows, index, &Map.merge(&1, row))
 83 | 
 84 |     %State{state | rows: rows}
 85 |   end
 86 | 
 87 |   def update(%State{cursor: cursor, fields: fields} = state, :field, field) do
 88 |     needle = field_needle(cursor)
 89 | 
 90 |     fields = update_in(fields, Enum.reverse(needle), &Map.merge(&1, field))
 91 | 
 92 |     %State{state | fields: fields}
 93 |   end
 94 | 
 95 |   def get(%State{cursor: cursor, rows: rows}, :row) do
 96 |     Enum.at(rows, elem(hd(cursor), 1))
 97 |   end
 98 | 
 99 |   def get(%State{cursor: cursor, fields: fields}, :field) do
100 |     needle = field_needle(cursor)
101 | 
102 |     get_in(fields, Enum.reverse(needle))
103 |   end
104 | 
105 |   defp new_schema(name) do
106 |     %{__name__: name, matchers: [], rows: []}
107 |   end
108 | 
109 |   defp new_row(index) do
110 |     %{__index__: index, fields: %{}, validators: [], transformers: [], skip_if: nil}
111 |   end
112 | 
113 |   defp new_field(name) do
114 |     %{
115 |       __name__: name,
116 |       __type__: nil,
117 |       source: nil,
118 |       subfields: %{},
119 |       validators: [],
120 |       transformers: [],
121 |       skip_if: nil
122 |     }
123 |   end
124 | 
125 |   defp fields_cursor(cursor) do
126 |     cursor |> Enum.split_while(&(elem(&1, 0) == :field)) |> elem(0)
127 |   end
128 | 
129 |   defp target_from_cursor(cursor) do
130 |     Enum.map(cursor, &elem(&1, 1))
131 |   end
132 | 
133 |   defp field_needle(cursor) do
134 |     cursor |> fields_cursor() |> target_from_cursor() |> Enum.intersperse(:subfields)
135 |   end
136 | end
137 | 


--------------------------------------------------------------------------------
/mix.exs:
--------------------------------------------------------------------------------
 1 | defmodule DataQuacker.MixProject do
 2 |   use Mix.Project
 3 | 
 4 |   def project do
 5 |     [
 6 |       app: :data_quacker,
 7 |       version: "0.1.1",
 8 |       elixir: "~> 1.12",
 9 |       deps: deps(),
10 |       elixirc_paths: elixirc_paths(Mix.env()),
11 |       build_embedded: Mix.env() == :prod,
12 |       start_permanent: Mix.env() == :prod,
13 |       package: package(),
14 |       name: "DataQuacker",
15 |       description:
16 |         "A library for validating transforming and parsing non-sandboxed data (e.g. CSV files)",
17 |       source_url: "https://github.com/fiodorbaczynski/data_quacker",
18 |       homepage_url: "https://github.com/fiodorbaczynski/data_quacker",
19 |       docs: docs()
20 |     ]
21 |   end
22 | 
23 |   defp elixirc_paths(:test), do: ["lib", "test/support"]
24 | 
25 |   defp elixirc_paths(_), do: ["lib"]
26 | 
27 |   def application do
28 |     [
29 |       extra_applications: [:logger, :crypto]
30 |     ]
31 |   end
32 | 
33 |   def package do
34 |     [
35 |       name: "data_quacker",
36 |       files: ["lib", ".formatter.exs", "mix.exs", "README*", "LICENSE*"],
37 |       maintainers: ["Fiodor Baczyński"],
38 |       licenses: ["Apache-2.0"],
39 |       links: %{"GitHub" => "https://github.com/fiodorbaczynski/data_quacker"}
40 |     ]
41 |   end
42 | 
43 |   defp deps do
44 |     [
45 |       {:credo, "~> 1.5", only: :dev, runtime: false},
46 |       {:dialyxir, "~> 1.1.0", only: :dev, runtime: false},
47 |       {:ex_doc, "~> 0.25", only: :dev, runtime: false},
48 |       {:csv, "~> 2.4"},
49 |       {:decimal, "~> 2.0", only: :test},
50 |       {:mox, "~> 1.0.0", only: :test}
51 |     ]
52 |   end
53 | 
54 |   defp docs() do
55 |     [
56 |       main: "DataQuacker",
57 |       extras: ["README.md"],
58 |       source_url: "https://github.com/elixir-ecto/ecto",
59 |       groups_for_modules: [
60 |         Schema: [
61 |           DataQuacker.Schema
62 |         ],
63 |         Parsing: [
64 |           DataQuacker,
65 |           DataQuacker.Context
66 |         ],
67 |         Adapters: [
68 |           DataQuacker.Adapter,
69 |           DataQuacker.Adapters.CSV,
70 |           DataQuacker.Adapters.Identity
71 |         ]
72 |       ]
73 |     ]
74 |   end
75 | end
76 | 


--------------------------------------------------------------------------------
/mix.lock:
--------------------------------------------------------------------------------
 1 | %{
 2 |   "bunt": {:hex, :bunt, "0.2.0", "951c6e801e8b1d2cbe58ebbd3e616a869061ddadcc4863d0a2182541acae9a38", [:mix], [], "hexpm", "7af5c7e09fe1d40f76c8e4f9dd2be7cebd83909f31fee7cd0e9eadc567da8353"},
 3 |   "credo": {:hex, :credo, "1.5.6", "e04cc0fdc236fefbb578e0c04bd01a471081616e741d386909e527ac146016c6", [:mix], [{:bunt, "~> 0.2.0", [hex: :bunt, repo: "hexpm", optional: false]}, {:file_system, "~> 0.2.8", [hex: :file_system, repo: "hexpm", optional: false]}, {:jason, "~> 1.0", [hex: :jason, repo: "hexpm", optional: false]}], "hexpm", "4b52a3e558bd64e30de62a648518a5ea2b6e3e5d2b164ef5296244753fc7eb17"},
 4 |   "csv": {:hex, :csv, "2.4.1", "50e32749953b6bf9818dbfed81cf1190e38cdf24f95891303108087486c5925e", [:mix], [{:parallel_stream, "~> 1.0.4", [hex: :parallel_stream, repo: "hexpm", optional: false]}], "hexpm", "54508938ac67e27966b10ef49606e3ad5995d665d7fc2688efb3eab1307c9079"},
 5 |   "decimal": {:hex, :decimal, "2.0.0", "a78296e617b0f5dd4c6caf57c714431347912ffb1d0842e998e9792b5642d697", [:mix], [], "hexpm", "34666e9c55dea81013e77d9d87370fe6cb6291d1ef32f46a1600230b1d44f577"},
 6 |   "dialyxir": {:hex, :dialyxir, "1.1.0", "c5aab0d6e71e5522e77beff7ba9e08f8e02bad90dfbeffae60eaf0cb47e29488", [:mix], [{:erlex, ">= 0.2.6", [hex: :erlex, repo: "hexpm", optional: false]}], "hexpm", "07ea8e49c45f15264ebe6d5b93799d4dd56a44036cf42d0ad9c960bc266c0b9a"},
 7 |   "earmark": {:hex, :earmark, "1.3.5", "0db71c8290b5bc81cb0101a2a507a76dca659513984d683119ee722828b424f6", [:mix], [], "hexpm", "762b999fd414fb41e297944228aa1de2cd4a3876a07f968c8b11d1e9a2190d07"},
 8 |   "earmark_parser": {:hex, :earmark_parser, "1.4.15", "b29e8e729f4aa4a00436580dcc2c9c5c51890613457c193cc8525c388ccb2f06", [:mix], [], "hexpm", "044523d6438ea19c1b8ec877ec221b008661d3c27e3b848f4c879f500421ca5c"},
 9 |   "erlex": {:hex, :erlex, "0.2.6", "c7987d15e899c7a2f34f5420d2a2ea0d659682c06ac607572df55a43753aa12e", [:mix], [], "hexpm", "2ed2e25711feb44d52b17d2780eabf998452f6efda104877a3881c2f8c0c0c75"},
10 |   "ex_doc": {:hex, :ex_doc, "0.25.1", "4b736fa38dc76488a937e5ef2944f5474f3eff921de771b25371345a8dc810bc", [:mix], [{:earmark_parser, "~> 1.4.0", [hex: :earmark_parser, repo: "hexpm", optional: false]}, {:makeup_elixir, "~> 0.14", [hex: :makeup_elixir, repo: "hexpm", optional: false]}, {:makeup_erlang, "~> 0.1", [hex: :makeup_erlang, repo: "hexpm", optional: false]}], "hexpm", "3200b0a69ddb2028365281fbef3753ea9e728683863d8cdaa96580925c891f67"},
11 |   "file_system": {:hex, :file_system, "0.2.10", "fb082005a9cd1711c05b5248710f8826b02d7d1784e7c3451f9c1231d4fc162d", [:mix], [], "hexpm", "41195edbfb562a593726eda3b3e8b103a309b733ad25f3d642ba49696bf715dc"},
12 |   "jason": {:hex, :jason, "1.2.2", "ba43e3f2709fd1aa1dce90aaabfd039d000469c05c56f0b8e31978e03fa39052", [:mix], [{:decimal, "~> 1.0 or ~> 2.0", [hex: :decimal, repo: "hexpm", optional: true]}], "hexpm", "18a228f5f0058ee183f29f9eae0805c6e59d61c3b006760668d8d18ff0d12179"},
13 |   "makeup": {:hex, :makeup, "1.0.5", "d5a830bc42c9800ce07dd97fa94669dfb93d3bf5fcf6ea7a0c67b2e0e4a7f26c", [:mix], [{:nimble_parsec, "~> 0.5 or ~> 1.0", [hex: :nimble_parsec, repo: "hexpm", optional: false]}], "hexpm", "cfa158c02d3f5c0c665d0af11512fed3fba0144cf1aadee0f2ce17747fba2ca9"},
14 |   "makeup_elixir": {:hex, :makeup_elixir, "0.15.1", "b5888c880d17d1cc3e598f05cdb5b5a91b7b17ac4eaf5f297cb697663a1094dd", [:mix], [{:makeup, "~> 1.0", [hex: :makeup, repo: "hexpm", optional: false]}, {:nimble_parsec, "~> 1.1", [hex: :nimble_parsec, repo: "hexpm", optional: false]}], "hexpm", "db68c173234b07ab2a07f645a5acdc117b9f99d69ebf521821d89690ae6c6ec8"},
15 |   "makeup_erlang": {:hex, :makeup_erlang, "0.1.1", "3fcb7f09eb9d98dc4d208f49cc955a34218fc41ff6b84df7c75b3e6e533cc65f", [:mix], [{:makeup, "~> 1.0", [hex: :makeup, repo: "hexpm", optional: false]}], "hexpm", "174d0809e98a4ef0b3309256cbf97101c6ec01c4ab0b23e926a9e17df2077cbb"},
16 |   "mox": {:hex, :mox, "1.0.0", "4b3c7005173f47ff30641ba044eb0fe67287743eec9bd9545e37f3002b0a9f8b", [:mix], [], "hexpm", "201b0a20b7abdaaab083e9cf97884950f8a30a1350a1da403b3145e213c6f4df"},
17 |   "nimble_parsec": {:hex, :nimble_parsec, "1.1.0", "3a6fca1550363552e54c216debb6a9e95bd8d32348938e13de5eda962c0d7f89", [:mix], [], "hexpm", "08eb32d66b706e913ff748f11694b17981c0b04a33ef470e33e11b3d3ac8f54b"},
18 |   "parallel_stream": {:hex, :parallel_stream, "1.0.6", "b967be2b23f0f6787fab7ed681b4c45a215a81481fb62b01a5b750fa8f30f76c", [:mix], [], "hexpm", "639b2e8749e11b87b9eb42f2ad325d161c170b39b288ac8d04c4f31f8f0823eb"},
19 | }
20 | 


--------------------------------------------------------------------------------
/priv/plts/.gitignore:
--------------------------------------------------------------------------------
1 | *
2 | !.gitignore
3 | 


--------------------------------------------------------------------------------
/test/adapters/csv_test.exs:
--------------------------------------------------------------------------------
 1 | defmodule DataQuacker.CSVAdapterTest do
 2 |   use ExUnit.Case, async: true
 3 | 
 4 |   import Mox
 5 | 
 6 |   alias DataQuacker.Adapters.CSV
 7 | 
 8 |   setup do
 9 |     {:ok,
10 |      sample_source: [
11 |        ["a", "b", "c"],
12 |        ["a1", "b1", "c1"],
13 |        ["a2", "b2", "c2"],
14 |        ["a3", "b3", "c3"]
15 |      ]}
16 |   end
17 | 
18 |   describe "parse_source/2" do
19 |     test "given a local file path, should parse the source", %{
20 |       sample_source: [headers | rows] = sample_source
21 |     } do
22 |       expect(DataQuacker.MockFileManager, :stream!, fn "sample_path.csv" ->
23 |         Stream.map(sample_source, &Enum.join(&1, ","))
24 |       end)
25 | 
26 |       assert CSV.parse_source("sample_path.csv", local?: true) ==
27 |                {:ok, %{headers: {:ok, headers}, rows: Enum.map(rows, &{:ok, &1})}}
28 |     end
29 | 
30 |     test "with semicolon set as the separator given a local file path, should parse the source",
31 |          %{sample_source: [headers | rows] = sample_source} do
32 |       expect(DataQuacker.MockFileManager, :stream!, fn "sample_path.csv" ->
33 |         Stream.map(sample_source, &Enum.join(&1, ";"))
34 |       end)
35 | 
36 |       assert CSV.parse_source("sample_path.csv", local?: true, separator: ?;) ==
37 |                {:ok, %{headers: {:ok, headers}, rows: Enum.map(rows, &{:ok, &1})}}
38 |     end
39 | 
40 |     test "given a remote file url, should parse the source", %{
41 |       sample_source: [headers | rows] = sample_source
42 |     } do
43 |       expect(DataQuacker.MockFileManager, :read_link!, fn "file_url.com" ->
44 |         Enum.map(sample_source, &Enum.join(&1, ","))
45 |       end)
46 | 
47 |       assert CSV.parse_source("file_url.com", local?: false) ==
48 |                {:ok, %{headers: {:ok, headers}, rows: Enum.map(rows, &{:ok, &1})}}
49 |     end
50 | 
51 |     test "with semicolon set as the separator given a remote file url, should parse the source",
52 |          %{sample_source: [headers | rows] = sample_source} do
53 |       expect(DataQuacker.MockFileManager, :read_link!, fn "file_url.com" ->
54 |         Enum.map(sample_source, &Enum.join(&1, ";"))
55 |       end)
56 | 
57 |       assert CSV.parse_source("file_url.com", local?: false, separator: ?;) ==
58 |                {:ok, %{headers: {:ok, headers}, rows: Enum.map(rows, &{:ok, &1})}}
59 |     end
60 |   end
61 | 
62 |   #  describe "get_headers/1" do
63 |   #    test "returns the value under the headers key, wrapped in an ':ok' tuple", %{
64 |   #      sample_source: sample_source
65 |   #    } do
66 |   #      assert Identity.get_headers(sample_source) == {:ok, sample_source.headers}
67 |   #    end
68 |   #  end
69 |   #
70 |   #  describe "get_rows/1" do
71 |   #    test "returns the value under the rows key, wrapped in an ':ok' tuple", %{
72 |   #      sample_source: sample_source
73 |   #    } do
74 |   #      assert Identity.get_rows(sample_source) == {:ok, sample_source.rows}
75 |   #    end
76 |   #  end
77 |   #
78 |   #  describe "get_row/1" do
79 |   #    test "returns the given row wrapped in an ':ok' tuple", %{sample_source: sample_source} do
80 |   #      row = Enum.random(sample_source.rows)
81 |   #
82 |   #      assert Identity.get_row(row) == {:ok, row}
83 |   #    end
84 |   #  end
85 | end
86 | 


--------------------------------------------------------------------------------
/test/adapters/identity_test.exs:
--------------------------------------------------------------------------------
 1 | defmodule DataQuacker.IdentityAdapterTest do
 2 |   use ExUnit.Case, async: true
 3 | 
 4 |   alias DataQuacker.Adapters.Identity
 5 | 
 6 |   setup do
 7 |     {:ok,
 8 |      sample_source: %{
 9 |        headers: ["a", "b", "c"],
10 |        rows: [
11 |          ["a1", "b1", "c1"],
12 |          ["a2", "b2", "c2"],
13 |          ["a3", "b3", "c3"]
14 |        ]
15 |      }}
16 |   end
17 | 
18 |   describe "parse_source/2" do
19 |     test "returns the source as-is, wrapped in an ':ok' tuple", %{sample_source: sample_source} do
20 |       assert Identity.parse_source(sample_source, []) == {:ok, sample_source}
21 |     end
22 |   end
23 | 
24 |   describe "get_headers/1" do
25 |     test "returns the value under the headers key, wrapped in an ':ok' tuple", %{
26 |       sample_source: sample_source
27 |     } do
28 |       assert Identity.get_headers(sample_source) == {:ok, sample_source.headers}
29 |     end
30 |   end
31 | 
32 |   describe "get_rows/1" do
33 |     test "returns the value under the rows key, wrapped in an ':ok' tuple", %{
34 |       sample_source: sample_source
35 |     } do
36 |       assert Identity.get_rows(sample_source) == {:ok, sample_source.rows}
37 |     end
38 |   end
39 | 
40 |   describe "get_row/1" do
41 |     test "returns the given row wrapped in an ':ok' tuple", %{sample_source: sample_source} do
42 |       row = Enum.random(sample_source.rows)
43 | 
44 |       assert Identity.get_row(row) == {:ok, row}
45 |     end
46 |   end
47 | end
48 | 


--------------------------------------------------------------------------------
/test/data_quacker/skipper_test.exs:
--------------------------------------------------------------------------------
 1 | defmodule DataQuacker.SkipperTest do
 2 |   use ExUnit.Case, async: true
 3 | 
 4 |   alias DataQuacker.Skipper
 5 | 
 6 |   alias DataQuacker.Context
 7 | 
 8 |   alias DataQuacker.Schema.WrappedFun
 9 | 
10 |   describe "call/3" do
11 |     setup do
12 |       skipper_fun_1 = %WrappedFun{arity: 1, callable: fn value -> value == "abc" end}
13 | 
14 |       skipper_fun_2 = %WrappedFun{
15 |         arity: 2,
16 |         callable: fn value, context -> value == context.support_data.expected_value end
17 |       }
18 | 
19 |       incorrect_type_skipper_fun_1 = %WrappedFun{arity: 1, callable: fn _value -> :ok end}
20 | 
21 |       incorrect_type_skipper_fun_2 = %WrappedFun{
22 |         arity: 2,
23 |         callable: fn _value, _context -> :ok end
24 |       }
25 | 
26 |       {:ok,
27 |        skipper_fun_1: skipper_fun_1,
28 |        skipper_fun_2: skipper_fun_2,
29 |        incorrect_type_skipper_fun_1: incorrect_type_skipper_fun_1,
30 |        incorrect_type_skipper_fun_2: incorrect_type_skipper_fun_2}
31 |     end
32 | 
33 |     test "given a skipper function with arity 1 and a value should apply the function to the value",
34 |          %{skipper_fun_1: skipper_fun_1} do
35 |       assert Skipper.call("abc", skipper_fun_1, %Context{}) == true
36 |       assert Skipper.call("def", skipper_fun_1, %Context{}) == false
37 |     end
38 | 
39 |     test "given a skipper function with arity 2 and a value should apply the function to the value with the context",
40 |          %{skipper_fun_2: skipper_fun_2} do
41 |       assert Skipper.call("abc", skipper_fun_2, %Context{support_data: %{expected_value: "abc"}}) ==
42 |                true
43 | 
44 |       assert Skipper.call("def", skipper_fun_2, %Context{support_data: %{expected_value: "abc"}}) ==
45 |                false
46 |     end
47 | 
48 |     test "given a skipper function with arity 1 and an incorrect return type should raise",
49 |          %{incorrect_type_skipper_fun_1: incorrect_type_skipper_fun_1} do
50 |       assert_raise(RuntimeError, ~r/skipper.+field.+abc.+incorrect.+value/si, fn ->
51 |         Skipper.call("abc", incorrect_type_skipper_fun_1, %Context{metadata: {:field, :abc}})
52 |       end)
53 |     end
54 | 
55 |     test "given a skipper function with arity 2 and an incorrect return type should raise",
56 |          %{incorrect_type_skipper_fun_2: incorrect_type_skipper_fun_2} do
57 |       assert_raise(RuntimeError, ~r/skipper.+field.+abc.+incorrect.+value/si, fn ->
58 |         Skipper.call("abc", incorrect_type_skipper_fun_2, %Context{metadata: {:field, :abc}})
59 |       end)
60 |     end
61 |   end
62 | end
63 | 


--------------------------------------------------------------------------------
/test/examples/pond_example_test.exs:
--------------------------------------------------------------------------------
 1 | defmodule DataQuacker.Examples.PondExampleTest do
 2 |   use DataQuacker.Case, async: true
 3 | 
 4 |   alias DataQuacker.Adapters.Identity
 5 | 
 6 |   defmodule PondSchema do
 7 |     use DataQuacker.Schema
 8 | 
 9 |     schema :pond_example_1 do
10 |       field :type do
11 |         source("type")
12 |       end
13 | 
14 |       field :colour do
15 |         source(~r/colou?r/i)
16 |       end
17 | 
18 |       field :age do
19 |         source("age")
20 |       end
21 |     end
22 | 
23 |     schema :pond_example_2 do
24 |       field :type do
25 |         validate(fn type -> type in ["Mallard", "Domestic", "Mandarin"] end)
26 | 
27 |         source("type")
28 |       end
29 | 
30 |       field :colour do
31 |         source(~r/colou?r/i)
32 |       end
33 | 
34 |       field :age do
35 |         transform(fn age_str ->
36 |           case Integer.parse(age_str) do
37 |             {age_int, _} -> {:ok, age_int}
38 |             :error -> :error
39 |           end
40 |         end)
41 | 
42 |         source("age")
43 |       end
44 |     end
45 |   end
46 | 
47 |   describe "pond example" do
48 |     @tag :integration
49 |     test "should parse sample data given the pond example 1 schema" do
50 |       assert {:ok, [row1, row2, row3]} =
51 |                DataQuacker.parse(
52 |                  %{
53 |                    headers: ["Type", "Colour", "Age"],
54 |                    rows: [
55 |                      ["Mallard", "green", "3"],
56 |                      ["Domestic", "white", "2"],
57 |                      ["Mandarin", "multi-coloured", "4"]
58 |                    ]
59 |                  },
60 |                  PondSchema.schema_structure(:pond_example_1),
61 |                  nil,
62 |                  adapter: Identity
63 |                )
64 | 
65 |       assert row1 == {:ok, %{type: "Mandarin", colour: "multi-coloured", age: "4"}}
66 |       assert row2 == {:ok, %{type: "Domestic", colour: "white", age: "2"}}
67 |       assert row3 == {:ok, %{type: "Mallard", colour: "green", age: "3"}}
68 |     end
69 | 
70 |     @tag :integration
71 |     test "should parse sample data given the pond example 2 schema" do
72 |       assert {:error, [row1, row2, row3, row4, row5]} =
73 |                DataQuacker.parse(
74 |                  %{
75 |                    headers: ["Type", "Colour", "Age"],
76 |                    rows: [
77 |                      ["Mallard", "green", "3"],
78 |                      ["Domestic", "white", "2"],
79 |                      ["Mandarin", "multi-coloured", "4"],
80 |                      ["Mystery", "golden", "100"],
81 |                      ["Black", "black", "Infinity"]
82 |                    ]
83 |                  },
84 |                  PondSchema.schema_structure(:pond_example_2),
85 |                  nil,
86 |                  adapter: Identity
87 |                )
88 | 
89 |       assert row1 == :error
90 |       assert row2 == :error
91 |       assert row3 == {:ok, %{type: "Mandarin", colour: "multi-coloured", age: 4}}
92 |       assert row4 == {:ok, %{type: "Domestic", colour: "white", age: 2}}
93 |       assert row5 == {:ok, %{type: "Mallard", colour: "green", age: 3}}
94 |     end
95 |   end
96 | end
97 | 


--------------------------------------------------------------------------------
/test/examples/pricing_example_test.exs:
--------------------------------------------------------------------------------
  1 | defmodule DataQuacker.Examples.PricingExampleTest do
  2 |   use DataQuacker.Case, async: true
  3 | 
  4 |   alias DataQuacker.Adapters.Identity
  5 | 
  6 |   defmodule PricingSchema do
  7 |     use DataQuacker.Schema
  8 | 
  9 |     schema :pricing_example_1 do
 10 |       field :size do
 11 |         transform(fn size ->
 12 |           case Integer.parse(size) do
 13 |             {size_int, _} -> {:ok, size_int}
 14 |             :error -> {:error, "Invalid value #{size} given"}
 15 |           end
 16 |         end)
 17 | 
 18 |         source("Apartment/flat size (in m^2)")
 19 |       end
 20 | 
 21 |       field :price do
 22 |         transform(fn price ->
 23 |           case Integer.parse(price) do
 24 |             {price_int, _} -> {:ok, price_int}
 25 |             :error -> {:error, "Invalid value #{price} given"}
 26 |           end
 27 |         end)
 28 | 
 29 |         source("Price per 1 month")
 30 |       end
 31 |     end
 32 | 
 33 |     schema :pricing_example_2 do
 34 |       field :size do
 35 |         transform(&PricingSchema.parse_int_example_2/1)
 36 | 
 37 |         source("Apartment/flat size (in m^2)")
 38 |       end
 39 | 
 40 |       field :price do
 41 |         transform(&PricingSchema.parse_int_example_2/1)
 42 | 
 43 |         source("Price per 1 month")
 44 |       end
 45 |     end
 46 | 
 47 |     schema :pricing_example_3 do
 48 |       field :size do
 49 |         transform(&PricingSchema.parse_int_example_3/2)
 50 | 
 51 |         source(["apartment", "size"])
 52 |       end
 53 | 
 54 |       field :price do
 55 |         transform(&PricingSchema.parse_int_example_3/2)
 56 | 
 57 |         source(["price", "1"])
 58 |       end
 59 |     end
 60 | 
 61 |     schema :pricing_example_4 do
 62 |       row skip_if: fn %{price: price} -> is_nil(price) end do
 63 |         field :size do
 64 |           transform(&PricingSchema.parse_int_example_4/2)
 65 | 
 66 |           source(["apartment", "size"])
 67 |         end
 68 | 
 69 |         field :duration do
 70 |           virtual_source(1)
 71 |         end
 72 | 
 73 |         field :price do
 74 |           transform(&PricingSchema.parse_int_example_4/2)
 75 | 
 76 |           source(["price", "1"])
 77 |         end
 78 |       end
 79 | 
 80 |       row do
 81 |         field :size do
 82 |           transform(&PricingSchema.parse_int_example_4/2)
 83 | 
 84 |           source(["apartment", "size"])
 85 |         end
 86 | 
 87 |         field :duration do
 88 |           virtual_source(3)
 89 |         end
 90 | 
 91 |         field :price do
 92 |           transform(&PricingSchema.parse_int_example_4/2)
 93 | 
 94 |           source(["price", "3"])
 95 |         end
 96 |       end
 97 |     end
 98 | 
 99 |     schema :pricing_example_5 do
100 |       row skip_if: fn %{price: price} -> is_nil(price) end do
101 |         field :size do
102 |           transform(&PricingSchema.parse_int_example_5/2)
103 | 
104 |           source(["apartment", "size"])
105 |         end
106 | 
107 |         field :duration do
108 |           virtual_source(1)
109 |         end
110 | 
111 |         field :price do
112 |           transform(&PricingSchema.replace_commas/1)
113 |           transform(&PricingSchema.parse_decimal/2)
114 | 
115 |           source(["price", "1"])
116 |         end
117 |       end
118 | 
119 |       row do
120 |         field :size do
121 |           transform(&PricingSchema.parse_int_example_5/2)
122 | 
123 |           source(["apartment", "size"])
124 |         end
125 | 
126 |         field :duration do
127 |           virtual_source(3)
128 |         end
129 | 
130 |         field :price do
131 |           transform(&PricingSchema.replace_commas/1)
132 |           transform(&PricingSchema.parse_decimal/2)
133 | 
134 |           source(["price", "3"])
135 |         end
136 |       end
137 |     end
138 | 
139 |     def parse_int_example_2(str) do
140 |       case Integer.parse(str) do
141 |         {int, _} -> {:ok, int}
142 |         :error -> {:error, "Invalid value #{str} given"}
143 |       end
144 |     end
145 | 
146 |     def parse_int_example_3(str, %{metadata: metadata, source_row: source_row}) do
147 |       case Integer.parse(str) do
148 |         {int, _} ->
149 |           {:ok, int}
150 | 
151 |         :error ->
152 |           {:error,
153 |            "Error processing #{elem(metadata, 0)} #{elem(metadata, 1)} in row #{source_row}; '#{str}' given"}
154 |       end
155 |     end
156 | 
157 |     def parse_int_example_4("", _context), do: {:ok, nil}
158 | 
159 |     def parse_int_example_4(str, %{metadata: metadata, source_row: source_row}) do
160 |       case Integer.parse(str) do
161 |         {int, _} ->
162 |           {:ok, int}
163 | 
164 |         :error ->
165 |           {:error,
166 |            "Error processing #{elem(metadata, 0)} #{elem(metadata, 1)} in row #{source_row}; '#{str}' given"}
167 |       end
168 |     end
169 | 
170 |     def parse_int_example_5("", _context), do: {:ok, nil}
171 | 
172 |     def parse_int_example_5(str, %{metadata: metadata, source_row: source_row}) do
173 |       case Integer.parse(str) do
174 |         {int, _} ->
175 |           {:ok, int}
176 | 
177 |         :error ->
178 |           {:error,
179 |            "Error processing #{elem(metadata, 0)} #{elem(metadata, 1)} in row #{source_row}; '#{str}' given"}
180 |       end
181 |     end
182 | 
183 |     def replace_commas(str) do
184 |       {:ok, String.replace(str, ",", ".")}
185 |     end
186 | 
187 |     def parse_decimal("", _context), do: {:ok, nil}
188 | 
189 |     def parse_decimal(str, %{metadata: metadata, source_row: source_row}) do
190 |       case Decimal.parse(str) do
191 |         {decimal, ""} ->
192 |           {:ok, decimal}
193 | 
194 |         :error ->
195 |           {:error,
196 |            "Error processing #{elem(metadata, 0)} #{elem(metadata, 1)} in row #{source_row}; '#{str}' given"}
197 |       end
198 |     end
199 |   end
200 | 
201 |   describe "pricing example" do
202 |     @tag :integration
203 |     test "should parse sample data given the pricing example 1 schema" do
204 |       assert {:ok, [row1, row2]} =
205 |                DataQuacker.parse(
206 |                  %{
207 |                    headers: ["Apartment/flat size (in m^2)", "Price per 1 month"],
208 |                    rows: [
209 |                      ["40", "1000"],
210 |                      ["50", "1100"]
211 |                    ]
212 |                  },
213 |                  PricingSchema.schema_structure(:pricing_example_1),
214 |                  nil,
215 |                  adapter: Identity
216 |                )
217 | 
218 |       assert row1 == {:ok, %{size: 50, price: 1100}}
219 |       assert row2 == {:ok, %{size: 40, price: 1000}}
220 |     end
221 | 
222 |     @tag :integration
223 |     test "should parse sample data given the pricing example 2 schema" do
224 |       assert {:ok, [row1, row2]} =
225 |                DataQuacker.parse(
226 |                  %{
227 |                    headers: ["Apartment/flat size (in m^2)", "Price per 1 month"],
228 |                    rows: [
229 |                      ["40", "1000"],
230 |                      ["50", "1100"]
231 |                    ]
232 |                  },
233 |                  PricingSchema.schema_structure(:pricing_example_2),
234 |                  nil,
235 |                  adapter: Identity
236 |                )
237 | 
238 |       assert row1 == {:ok, %{size: 50, price: 1100}}
239 |       assert row2 == {:ok, %{size: 40, price: 1000}}
240 |     end
241 | 
242 |     @tag :integration
243 |     test "should parse sample data given the pricing example 3 schema" do
244 |       assert {:error, [row1, row2, row3, row4]} =
245 |                DataQuacker.parse(
246 |                  %{
247 |                    headers: ["Apartment or flat size", "Price for 1 month"],
248 |                    rows: [
249 |                      ["40", "1000"],
250 |                      ["50", "1100"],
251 |                      ["50", "a lot of $$$"],
252 |                      ["huge", "1000000"]
253 |                    ]
254 |                  },
255 |                  PricingSchema.schema_structure(:pricing_example_3),
256 |                  nil,
257 |                  adapter: Identity
258 |                )
259 | 
260 |       assert row1 == {:error, "Error processing field size in row 4; 'huge' given"}
261 |       assert row2 == {:error, "Error processing field price in row 3; 'a lot of $$$' given"}
262 |       assert row3 == {:ok, %{size: 50, price: 1100}}
263 |       assert row4 == {:ok, %{size: 40, price: 1000}}
264 |     end
265 | 
266 |     @tag :integration
267 |     test "should parse sample data given the pricing example 4 schema" do
268 |       assert {:ok, [row1, row2, row3, row4, row5]} =
269 |                DataQuacker.parse(
270 |                  %{
271 |                    headers: ["Apartment or flat size", "Price for 1 month", "Price per 3 months"],
272 |                    rows: [
273 |                      ["40", "1000", "2800"],
274 |                      ["50", "1100", "3000"],
275 |                      ["60", "", "3600"]
276 |                    ]
277 |                  },
278 |                  PricingSchema.schema_structure(:pricing_example_4),
279 |                  nil,
280 |                  adapter: Identity
281 |                )
282 | 
283 |       assert row1 == {:ok, %{duration: 3, price: 3600, size: 60}}
284 |       assert row2 == {:ok, %{duration: 3, price: 3000, size: 50}}
285 |       assert row3 == {:ok, %{duration: 1, price: 1100, size: 50}}
286 |       assert row4 == {:ok, %{duration: 3, price: 2800, size: 40}}
287 |       assert row5 == {:ok, %{duration: 1, price: 1000, size: 40}}
288 |     end
289 | 
290 |     @tag :integration
291 |     test "should parse sample data given the pricing example 5 schema" do
292 |       assert {:ok, [row1, row2, row3, row4, row5]} =
293 |                DataQuacker.parse(
294 |                  %{
295 |                    headers: ["Apartment or flat size", "Price for 1 month", "Price per 3 months"],
296 |                    rows: [
297 |                      ["40", "999,99", "2799,99"],
298 |                      ["50", "1099,99", "2999,99"],
299 |                      ["60", "", "3599,99"]
300 |                    ]
301 |                  },
302 |                  PricingSchema.schema_structure(:pricing_example_5),
303 |                  nil,
304 |                  adapter: Identity
305 |                )
306 | 
307 |       assert row1 == {:ok, %{duration: 3, price: Decimal.new("3599.99"), size: 60}}
308 |       assert row2 == {:ok, %{duration: 3, price: Decimal.new("2999.99"), size: 50}}
309 |       assert row3 == {:ok, %{duration: 1, price: Decimal.new("1099.99"), size: 50}}
310 |       assert row4 == {:ok, %{duration: 3, price: Decimal.new("2799.99"), size: 40}}
311 |       assert row5 == {:ok, %{duration: 1, price: Decimal.new("999.99"), size: 40}}
312 |     end
313 |   end
314 | end
315 | 


--------------------------------------------------------------------------------
/test/examples/students_example_test.exs:
--------------------------------------------------------------------------------
  1 | defmodule DataQuacker.Examples.StudentsExampleTest do
  2 |   use DataQuacker.Case, async: true
  3 | 
  4 |   alias DataQuacker.Adapters.Identity
  5 | 
  6 |   defmodule StudentsSchema do
  7 |     use DataQuacker.Schema
  8 | 
  9 |     schema :students_example_1 do
 10 |       field :first_name do
 11 |         source("first name")
 12 |       end
 13 | 
 14 |       field :last_name do
 15 |         source("last name")
 16 |       end
 17 | 
 18 |       field :age do
 19 |         source("age")
 20 |       end
 21 | 
 22 |       field :favourite_subject do
 23 |         source("favourite subject")
 24 |       end
 25 |     end
 26 | 
 27 |     schema :students_example_2 do
 28 |       field :first_name do
 29 |         source("first name")
 30 |       end
 31 | 
 32 |       field :last_name do
 33 |         source("last name")
 34 |       end
 35 | 
 36 |       field :age do
 37 |         transform(fn age ->
 38 |           case Integer.parse(age) do
 39 |             {age_int, _} -> {:ok, age_int}
 40 |             :error -> {:error, "Invalid value #{age} given"}
 41 |           end
 42 |         end)
 43 | 
 44 |         source("age")
 45 |       end
 46 | 
 47 |       field :favourite_subject do
 48 |         validate(fn subj -> subj in ["Maths", "Physics", "Programming"] end)
 49 | 
 50 |         source("favourite subject")
 51 |       end
 52 |     end
 53 | 
 54 |     schema :students_example_4 do
 55 |       field :full_name do
 56 |         transform(fn %{first_name: first_name, last_name: last_name} ->
 57 |           {:ok, "#{first_name} #{last_name}"}
 58 |         end)
 59 | 
 60 |         field :first_name do
 61 |           source("first name")
 62 |         end
 63 | 
 64 |         field :last_name do
 65 |           source("last name")
 66 |         end
 67 |       end
 68 | 
 69 |       field :age do
 70 |         transform(fn age ->
 71 |           case Integer.parse(age) do
 72 |             {age_int, _} -> {:ok, age_int}
 73 |             :error -> {:error, "Invalid value #{age} given"}
 74 |           end
 75 |         end)
 76 | 
 77 |         source("age")
 78 |       end
 79 | 
 80 |       field :favourite_subject do
 81 |         validate(fn subj, context ->
 82 |           case subj in context.support_data.valid_subjects do
 83 |             true ->
 84 |               :ok
 85 | 
 86 |             false ->
 87 |               {:error,
 88 |                "Invalid favourite subject in row ##{context.source_row}, must be one of #{inspect(context.support_data.valid_subjects)}"}
 89 |           end
 90 |         end)
 91 | 
 92 |         source("favourite subject")
 93 |       end
 94 |     end
 95 |   end
 96 | 
 97 |   describe "students example" do
 98 |     @tag :integration
 99 |     test "should parse sample data given the students example 1 schema" do
100 |       assert {:ok, [row1, row2, row3]} =
101 |                DataQuacker.parse(
102 |                  %{
103 |                    headers: ["First name", "Last name", "Age", "Favourite subject"],
104 |                    rows: [
105 |                      ["John", "Smith", "19", "Maths"],
106 |                      ["Adam", "Johnson", "18", "Physics"],
107 |                      ["Quackers", "the Duck", "1", "Programming"]
108 |                    ]
109 |                  },
110 |                  StudentsSchema.schema_structure(:students_example_1),
111 |                  %{valid_subjects: ["Maths", "Physics", "Programming"]},
112 |                  adapter: Identity
113 |                )
114 | 
115 |       assert row1 ==
116 |                {:ok,
117 |                 %{
118 |                   age: "1",
119 |                   favourite_subject: "Programming",
120 |                   first_name: "Quackers",
121 |                   last_name: "the Duck"
122 |                 }}
123 | 
124 |       assert row2 ==
125 |                {:ok,
126 |                 %{
127 |                   age: "18",
128 |                   favourite_subject: "Physics",
129 |                   first_name: "Adam",
130 |                   last_name: "Johnson"
131 |                 }}
132 | 
133 |       assert row3 ==
134 |                {:ok,
135 |                 %{age: "19", favourite_subject: "Maths", first_name: "John", last_name: "Smith"}}
136 |     end
137 | 
138 |     @tag :integration
139 |     test "should parse sample data given the students example 2 schema" do
140 |       assert {:ok, [row1, row2, row3]} =
141 |                DataQuacker.parse(
142 |                  %{
143 |                    headers: ["First name", "Last name", "Age", "Favourite subject"],
144 |                    rows: [
145 |                      ["John", "Smith", "19", "Maths"],
146 |                      ["Adam", "Johnson", "18", "Physics"],
147 |                      ["Quackers", "the Duck", "1", "Programming"]
148 |                    ]
149 |                  },
150 |                  StudentsSchema.schema_structure(:students_example_2),
151 |                  %{valid_subjects: ["Maths", "Physics", "Programming"]},
152 |                  adapter: Identity
153 |                )
154 | 
155 |       assert row1 ==
156 |                {:ok,
157 |                 %{
158 |                   age: 1,
159 |                   favourite_subject: "Programming",
160 |                   first_name: "Quackers",
161 |                   last_name: "the Duck"
162 |                 }}
163 | 
164 |       assert row2 ==
165 |                {:ok,
166 |                 %{age: 18, favourite_subject: "Physics", first_name: "Adam", last_name: "Johnson"}}
167 | 
168 |       assert row3 ==
169 |                {:ok,
170 |                 %{age: 19, favourite_subject: "Maths", first_name: "John", last_name: "Smith"}}
171 |     end
172 | 
173 |     @tag :integration
174 |     test "should parse sample data given the students example 4 schema" do
175 |       assert {:error, [row1, row2, row3, row4]} =
176 |                DataQuacker.parse(
177 |                  %{
178 |                    headers: ["First name", "Last name", "Age", "Favourite subject"],
179 |                    rows: [
180 |                      ["John", "Smith", "19", "Maths"],
181 |                      ["Adam", "Johnson", "18", "Physics"],
182 |                      ["Quackers", "the Duck", "1", "Programming"],
183 |                      ["Mat", "Savage", "100", "None"]
184 |                    ]
185 |                  },
186 |                  StudentsSchema.schema_structure(:students_example_4),
187 |                  %{valid_subjects: ["Maths", "Physics", "Programming"]},
188 |                  adapter: Identity
189 |                )
190 | 
191 |       assert row1 ==
192 |                {:error,
193 |                 "Invalid favourite subject in row #4, must be one of [\"Maths\", \"Physics\", \"Programming\"]"}
194 | 
195 |       assert row2 ==
196 |                {:ok, %{age: 1, favourite_subject: "Programming", full_name: "Quackers the Duck"}}
197 | 
198 |       assert row3 == {:ok, %{age: 18, favourite_subject: "Physics", full_name: "Adam Johnson"}}
199 |       assert row4 == {:ok, %{age: 19, favourite_subject: "Maths", full_name: "John Smith"}}
200 |     end
201 |   end
202 | end
203 | 


--------------------------------------------------------------------------------
/test/helpers_tests/fun_wrapper_test.exs:
--------------------------------------------------------------------------------
  1 | defmodule DataQuacker.FunWrapperHelperTest do
  2 |   use DataQuacker.Case, async: true
  3 | 
  4 |   alias DataQuacker.Schema.WrappedFun
  5 |   alias DataQuacker.SchemaError
  6 | 
  7 |   defmodule SampleWrappedFunctions do
  8 |     import DataQuacker.Schema.FunWrapper
  9 | 
 10 |     @fun0 wrap_fun(fn ->
 11 |             "no args"
 12 |           end)
 13 | 
 14 |     @fun1 wrap_fun(fn arg1 ->
 15 |             arg1
 16 |           end)
 17 | 
 18 |     @fun2 wrap_fun(fn arg1, arg2 ->
 19 |             {arg1, arg2}
 20 |           end)
 21 | 
 22 |     def wrapped_fun0 do
 23 |       @fun0
 24 |     end
 25 | 
 26 |     def wrapped_fun1 do
 27 |       @fun1
 28 |     end
 29 | 
 30 |     def wrapped_fun2 do
 31 |       @fun2
 32 |     end
 33 |   end
 34 | 
 35 |   describe "wrap_fun/2" do
 36 |     test "should wrap a function and return a wrapped function struct" do
 37 |       assert %WrappedFun{callable: fun0, arity: 0} = SampleWrappedFunctions.wrapped_fun0()
 38 | 
 39 |       assert fun0.() == "no args"
 40 | 
 41 |       assert %WrappedFun{callable: fun1, arity: 1} = SampleWrappedFunctions.wrapped_fun1()
 42 | 
 43 |       assert fun1.("a") == "a"
 44 | 
 45 |       assert %WrappedFun{callable: fun2, arity: 2} = SampleWrappedFunctions.wrapped_fun2()
 46 | 
 47 |       assert fun2.("a", "b") == {"a", "b"}
 48 |     end
 49 | 
 50 |     test "given a function and expected numeric arity should not compile if the function's arity does not match the assertion" do
 51 |       assert_raise(SchemaError, ~r/unexpected.+arity/si, fn ->
 52 |         Code.eval_string(
 53 |           """
 54 |           defmodule TestFunWrapper do
 55 |             import DataQuacker.Schema.FunWrapper
 56 | 
 57 |             @fun wrap_fun(fn _ -> nil end, 2)
 58 |           end
 59 |           """,
 60 |           [],
 61 |           __ENV__
 62 |         )
 63 |       end)
 64 | 
 65 |       assert_raise(SchemaError, ~r/unexpected.+arity/si, fn ->
 66 |         Code.eval_string(
 67 |           """
 68 |           defmodule TestFunWrapper do
 69 |             import DataQuacker.Schema.FunWrapper
 70 | 
 71 |             @fun wrap_fun(fn _, _ -> nil end, 1)
 72 |           end
 73 |           """,
 74 |           [],
 75 |           __ENV__
 76 |         )
 77 |       end)
 78 |     end
 79 | 
 80 |     test "given a function and expected range of arity should not compile if the function's arity does not match the assertion" do
 81 |       assert_raise(SchemaError, ~r/unexpected.+arity/si, fn ->
 82 |         Code.eval_string(
 83 |           """
 84 |           defmodule TestFunWrapper do
 85 |             import DataQuacker.Schema.FunWrapper
 86 | 
 87 |             @fun wrap_fun(fn -> nil end, 1..2)
 88 |           end
 89 |           """,
 90 |           [],
 91 |           __ENV__
 92 |         )
 93 |       end)
 94 | 
 95 |       assert_raise(SchemaError, ~r/unexpected.+arity/si, fn ->
 96 |         Code.eval_string(
 97 |           """
 98 |           defmodule TestFunWrapper do
 99 |             import DataQuacker.Schema.FunWrapper
100 | 
101 |             @fun wrap_fun(fn _, _, _ -> nil end, 1..2)
102 |           end
103 |           """,
104 |           [],
105 |           __ENV__
106 |         )
107 |       end)
108 |     end
109 |   end
110 | end
111 | 


--------------------------------------------------------------------------------
/test/schema/state_test.exs:
--------------------------------------------------------------------------------
  1 | defmodule DataQuacker.Schema.StateTest do
  2 |   use ExUnit.Case, async: true
  3 | 
  4 |   alias DataQuacker.Schema.State
  5 | 
  6 |   describe "new/0" do
  7 |     test "should return an empty State struct" do
  8 |       assert State.new() == %State{
  9 |                cursor: [],
 10 |                flags: %{},
 11 |                schema: %{},
 12 |                matchers: [],
 13 |                rows: [],
 14 |                fields: %{}
 15 |              }
 16 |     end
 17 |   end
 18 | 
 19 |   describe "clear_fields/1" do
 20 |     setup do
 21 |       {:ok, state: %State{fields: %{a: 1, b: 2, c: 3}}}
 22 |     end
 23 | 
 24 |     test "should clear the fields", %{state: state} do
 25 |       state = State.clear_fields(state)
 26 | 
 27 |       assert state.fields == %{}
 28 |     end
 29 |   end
 30 | 
 31 |   describe "flag/3" do
 32 |     setup do
 33 |       {:ok, state: %State{flags: %{a: true, b: false}}}
 34 |     end
 35 | 
 36 |     test "should put a flag with a value", %{state: state} do
 37 |       state = State.flag(state, :c, true)
 38 | 
 39 |       assert state.flags.c == true
 40 |     end
 41 | 
 42 |     test "should replace the value of an already existing flag", %{state: state} do
 43 |       state = State.flag(state, :b, true)
 44 | 
 45 |       assert state.flags.b == true
 46 |     end
 47 |   end
 48 | 
 49 |   describe "flagged?/2" do
 50 |     setup do
 51 |       {:ok, state: %State{flags: %{a: true, b: false}}}
 52 |     end
 53 | 
 54 |     test "should get the value of a flag", %{state: state} do
 55 |       assert State.flagged?(state, :a) == true
 56 |       assert State.flagged?(state, :b) == false
 57 |     end
 58 | 
 59 |     test "should return false if a flag does not exist", %{state: state} do
 60 |       assert State.flagged?(state, :c) == false
 61 |     end
 62 |   end
 63 | 
 64 |   describe "cursor_at/2" do
 65 |     setup do
 66 |       {:ok,
 67 |        empty_cursor_state: %State{cursor: []},
 68 |        state: %State{cursor: [{:field, :sample_field}, {:row, 0}]}}
 69 |     end
 70 | 
 71 |     test "given a state with an empty cursor and compare the needle with nil", %{
 72 |       empty_cursor_state: empty_cursor_state
 73 |     } do
 74 |       assert State.cursor_at?(empty_cursor_state, nil) == true
 75 |       assert State.cursor_at?(empty_cursor_state, 123) == false
 76 |       assert State.cursor_at?(empty_cursor_state, "abc") == false
 77 |     end
 78 | 
 79 |     test "given a cursor should compare the latest pointer's type to the needle", %{
 80 |       state: state
 81 |     } do
 82 |       assert State.cursor_at?(state, :field) == true
 83 |       assert State.cursor_at?(state, :row) == false
 84 |     end
 85 |   end
 86 | 
 87 |   describe "target/1" do
 88 |     setup do
 89 |       {:ok, state: %State{cursor: [{:field, :abc}, {:row, 0}, {:schema, :def}]}}
 90 |     end
 91 | 
 92 |     test "should return a list of values at subsequent cursor entries (without the types)", %{
 93 |       state: state
 94 |     } do
 95 |       assert State.target(state) == [:abc, 0, :def]
 96 |     end
 97 |   end
 98 | 
 99 |   describe "cursor_exit/1" do
100 |     setup do
101 |       {:ok, state: %State{cursor: [{:field, :abc}, {:row, 0}, {:schema, :def}]}}
102 |     end
103 | 
104 |     test "should drop the cursor's head", %{state: state} do
105 |       assert State.cursor_exit(state) == %State{cursor: [{:row, 0}, {:schema, :def}]}
106 |     end
107 | 
108 |     test "given the exit level should the cursor's first n elements", %{state: state} do
109 |       assert State.cursor_exit(state, 2) == %State{cursor: [{:schema, :def}]}
110 |     end
111 |   end
112 | 
113 |   describe "register/3" do
114 |     setup do
115 |       blank_state = %State{}
116 |       state_with_schema = State.register(blank_state, :schema, {:abc, %{}})
117 |       state_with_row = State.register(state_with_schema, :row, {0, %{}})
118 |       state_with_field = State.register(state_with_row, :field, {:def, %{}})
119 | 
120 |       {:ok,
121 |        blank_state: blank_state,
122 |        state_with_schema: state_with_schema,
123 |        state_with_row: state_with_row,
124 |        state_with_field: state_with_field}
125 |     end
126 | 
127 |     test "given a schema should add the schema merged with the default to the state", %{
128 |       blank_state: state
129 |     } do
130 |       assert %State{cursor: [{:schema, :abc}], schema: %{__name__: :abc, matchers: [], rows: []}} =
131 |                State.register(state, :schema, {:abc, %{}})
132 |     end
133 | 
134 |     test "given a row should add the row merged with the default to the state", %{
135 |       state_with_schema: state
136 |     } do
137 |       assert %State{
138 |                cursor: [{:row, 0}, {:schema, :abc}],
139 |                rows: [
140 |                  %{__index__: 0, fields: %{}, transformers: [], skip_if: nil, validators: []}
141 |                ]
142 |              } = State.register(state, :row, {0, %{}})
143 |     end
144 | 
145 |     test "given a field should add the field merged with the default to the state", %{
146 |       state_with_row: state
147 |     } do
148 |       assert %State{
149 |                cursor: [{:field, :def}, {:row, 0}, {:schema, :abc}],
150 |                fields: %{
151 |                  def: %{
152 |                    __name__: :def,
153 |                    __type__: nil,
154 |                    transformers: [],
155 |                    skip_if: nil,
156 |                    source: nil,
157 |                    subfields: %{},
158 |                    validators: []
159 |                  }
160 |                }
161 |              } = State.register(state, :field, {:def, %{}})
162 |     end
163 | 
164 |     test "given a field when the cursor is already at a field should add the field merged with the default to the state as a subfield",
165 |          %{
166 |            state_with_field: state
167 |          } do
168 |       assert %State{
169 |                cursor: [{:field, :ghi}, {:field, :def}, {:row, 0}, {:schema, :abc}],
170 |                fields: %{
171 |                  def: %{
172 |                    subfields: %{
173 |                      ghi: %{
174 |                        __name__: :ghi,
175 |                        __type__: nil,
176 |                        transformers: [],
177 |                        skip_if: nil,
178 |                        source: nil,
179 |                        subfields: %{},
180 |                        validators: []
181 |                      }
182 |                    }
183 |                  }
184 |                }
185 |              } = State.register(state, :field, {:ghi, %{}})
186 |     end
187 | 
188 |     test "given a matcher should add the matcher merged with the default to the state with the current cursor as the target",
189 |          %{state_with_field: state} do
190 |       assert %State{matchers: [%{rule: "some rule", target: target}]} =
191 |                State.register(state, :matcher, "some rule")
192 | 
193 |       assert target == State.target(state)
194 |     end
195 |   end
196 | 
197 |   describe "update/3" do
198 |     setup do
199 |       state_with_schema = State.register(%State{}, :schema, {:abc, %{}})
200 |       state_with_row = State.register(state_with_schema, :row, {0, %{}})
201 |       state_with_field = State.register(state_with_row, :field, {:def, %{}})
202 |       state_with_nested_field = State.register(state_with_field, :field, {:ghi, %{}})
203 | 
204 |       {:ok,
205 |        state_with_schema: state_with_schema,
206 |        state_with_row: state_with_row,
207 |        state_with_field: state_with_field,
208 |        state_with_nested_field: state_with_nested_field}
209 |     end
210 | 
211 |     test "given a schema should update the existing schema", %{
212 |       state_with_schema: state
213 |     } do
214 |       assert %State{schema: %{some_field: 123}} = State.update(state, :schema, %{some_field: 123})
215 |     end
216 | 
217 |     test "given a row should update the row that the cursor is pointing at", %{
218 |       state_with_row: state
219 |     } do
220 |       assert %State{rows: [%{some_field: 123}]} = State.update(state, :row, %{some_field: 123})
221 |     end
222 | 
223 |     test "given a field should update the field the cursor is pointing at", %{
224 |       state_with_field: state
225 |     } do
226 |       assert %State{fields: %{def: %{some_field: 123}}} =
227 |                State.update(state, :field, %{some_field: 123})
228 |     end
229 | 
230 |     test "given a field should update the field the cursor is pointing at (nested)",
231 |          %{
232 |            state_with_nested_field: state
233 |          } do
234 |       assert %State{fields: %{def: %{subfields: %{ghi: %{some_field: 123}}}}} =
235 |                State.update(state, :field, %{some_field: 123})
236 |     end
237 |   end
238 | 
239 |   describe "get/2" do
240 |     setup do
241 |       state_with_schema = State.register(%State{}, :schema, {:abc, %{}})
242 |       state_with_row = State.register(state_with_schema, :row, {0, %{}})
243 |       state_with_field = State.register(state_with_row, :field, {:def, %{}})
244 |       state_with_nested_field = State.register(state_with_field, :field, {:ghi, %{}})
245 | 
246 |       {:ok,
247 |        state_with_schema: state_with_schema,
248 |        state_with_row: state_with_row,
249 |        state_with_field: state_with_field,
250 |        state_with_nested_field: state_with_nested_field}
251 |     end
252 | 
253 |     test "should return a row if requested", %{state_with_row: state_with_row} do
254 |       assert State.get(state_with_row, :row) == Enum.at(state_with_row.rows, 0)
255 |     end
256 | 
257 |     test "should return a field if requested", %{state_with_field: state_with_field} do
258 |       assert State.get(state_with_field, :field) == Map.get(state_with_field.fields, :def)
259 |     end
260 | 
261 |     test "should return a nested field if requested", %{
262 |       state_with_nested_field: state_with_nested_field
263 |     } do
264 |       assert State.get(state_with_nested_field, :field) ==
265 |                get_in(state_with_nested_field.fields, [:def, :subfields, :ghi])
266 |     end
267 |   end
268 | end
269 | 


--------------------------------------------------------------------------------
/test/support/case.ex:
--------------------------------------------------------------------------------
 1 | defmodule DataQuacker.Case do
 2 |   @moduledoc false
 3 | 
 4 |   use ExUnit.CaseTemplate
 5 | 
 6 |   using do
 7 |     quote do
 8 |       import DataQuacker.Case
 9 |     end
10 |   end
11 | end
12 | 


--------------------------------------------------------------------------------
/test/support/mock_file_manager.ex:
--------------------------------------------------------------------------------
1 | # defmodule DataQuacker.MockFileManager do
2 | #  @behaviour DataQuacker.FileManager
3 | #
4 | #  @impl true
5 | #  def stream!(path, _modes \\ [], _line_or_bytes \\ :line) do
6 | #    %Stream{}
7 | #  end
8 | # end
9 | 


--------------------------------------------------------------------------------
/test/test_helper.exs:
--------------------------------------------------------------------------------
1 | Mox.defmock(DataQuacker.MockFileManager, for: DataQuacker.FileManager)
2 | 
3 | Application.put_env(:data_quacker, :file_manager, DataQuacker.MockFileManager)
4 | 
5 | ExUnit.start()
6 | 


--------------------------------------------------------------------------------