├── .envrc ├── .github └── workflows │ └── test.yml ├── .gitignore ├── LICENSE ├── README.md ├── benchmarks ├── 3.0.2.md ├── README.md ├── elm.json ├── lists-vs-arrays.md ├── parser.md ├── src │ ├── ConsVsArray.elm │ ├── DecodeVsParse.elm │ ├── HowManyLookups.elm │ └── SplitVsParse.elm └── when-is-array-worth-it.md ├── ci.sh ├── elm.json ├── nix ├── sources.json └── sources.nix ├── review ├── elm.json └── src │ └── ReviewConfig.elm ├── shell.nix ├── src └── Csv │ ├── Decode.elm │ ├── Encode.elm │ └── Parser.elm └── tests ├── Csv ├── DecodeTest.elm ├── EncodeTest.elm └── ParserTest.elm └── elm-verify-examples.json /.envrc: -------------------------------------------------------------------------------- 1 | use nix 2 | -------------------------------------------------------------------------------- /.github/workflows/test.yml: -------------------------------------------------------------------------------- 1 | name: "Test" 2 | on: 3 | pull_request: 4 | push: 5 | jobs: 6 | tests: 7 | runs-on: ubuntu-latest 8 | steps: 9 | - uses: actions/checkout@v3.5.3 10 | - uses: cachix/install-nix-action@v22 11 | with: 12 | nix_path: nixpkgs=channel:nixos-23.05 13 | - uses: cachix/cachix-action@v12 14 | with: 15 | name: elm-csv 16 | authToken: '${{ secrets.CACHIX_AUTH_TOKEN }}' 17 | extraPullNames: niv 18 | - run: nix-shell --pure --run 'true' 19 | - run: ./ci.sh 20 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | /benchmarks/index.html 2 | /documentation.json 3 | /result 4 | /tests/VerifyExamples 5 | elm-stuff 6 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright 2021 Brian Hicks 2 | 3 | Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 4 | 5 | 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 6 | 7 | 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 8 | 9 | 3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. 10 | 11 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 12 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # elm-csv 2 | 3 | Decode CSV in the most boring way possible. 4 | Other CSV libraries have exciting, innovative APIs... not this one! 5 | Pretend you're writing a [JSON decoder](https://package.elm-lang.org/packages/elm/json/latest/), gimme your data, get on with your life. 6 | 7 | ```elm 8 | import Csv.Decode as Decode exposing (Decoder) 9 | 10 | 11 | decoder : Decoder ( Int, Int, Int ) 12 | decoder = 13 | Decode.map3 (\r g b -> ( r, g, b )) 14 | (Decode.column 0 Decode.int) 15 | (Decode.column 1 Decode.int) 16 | (Decode.column 2 Decode.int) 17 | 18 | 19 | csv : String 20 | csv = 21 | "0,128,128\r\n112,128,144" 22 | 23 | 24 | Decode.decodeCsv Decode.NoFieldNames decoder csv 25 | --> Ok 26 | --> [ ( 0, 128, 128 ) 27 | --> , ( 112, 128, 144 ) 28 | --> ] 29 | ``` 30 | 31 | However, in an effort to avoid a common problem with `elm/json` ("How do I decode a record with more than 8 fields?") this library also exposes a pipeline-style decoder ([inspired by `NoRedInk/elm-json-decode-pipeline`](https://package.elm-lang.org/packages/NoRedInk/elm-json-decode-pipeline/latest/)) for records: 32 | 33 | ```elm 34 | import Csv.Decode as Decode exposing (Decoder) 35 | 36 | 37 | type alias Pet = 38 | { id : Int 39 | , name : String 40 | , species : String 41 | , weight : Maybe Float 42 | } 43 | 44 | 45 | decoder : Decoder Pet 46 | decoder = 47 | Decode.into Pet 48 | |> Decode.pipeline (Decode.field "id" Decode.int) 49 | |> Decode.pipeline (Decode.field "name" Decode.string) 50 | |> Decode.pipeline (Decode.field "species" Decode.string) 51 | |> Decode.pipeline (Decode.field "weight" (Decode.blank Decode.float)) 52 | 53 | 54 | csv : String 55 | csv = 56 | "id,name,species,weight\r\n1,Atlas,cat,14.5\r\n2,Pippi,dog," 57 | 58 | 59 | Decode.decodeCsv Decode.FieldNamesFromFirstRow decoder csv 60 | --> Ok 61 | --> [ { id = 1, name = "Atlas", species = "cat", weight = Just 14.5 } 62 | --> , { id = 2, name = "Pippi", species = "dog", weight = Nothing } 63 | --> ] 64 | ``` 65 | 66 | ## FAQ 67 | 68 | ### Can this do TSVs too? What about European-style CSVs that use semicolon instead of comma? 69 | 70 | Yes to both! 71 | Use `decodeCustom`. 72 | It takes a field and row separator string, which can be whatever you need. 73 | 74 | ### Aren't there like (*checks*) 8 other CSV libraries already? 75 | 76 | Yes, there are! 77 | While I appreciate the hard work that other people have put into those, there are a couple problems: 78 | 79 | First, you need to put together multiple libraries to successfully parse CSV. 80 | Before this package was published, you had to pick one package for parsing to `List (List String)` and another to decode from that into something you actually cared about. 81 | Props to those authors for making their hard work available, of course, but this situation bugs me! 82 | 83 | I don't want to have to pick different libraries for parsing and converting. 84 | I just want it to work like `elm/json` where I write a decoder, give the package a string, and handle a `Result`. 85 | This should not require so much thought! 86 | 87 | The second thing, and the one that prompted me to publish this package, is that none of the libraries available at the time implemented `andThen`. 88 | Sure, you can use a `Result` to do whatever you like, but there's not a good way to make a decoding decision for one field dependent on another. 89 | 90 | ## Contributing 91 | 92 | Hello! 93 | I'm so glad that you're interested in contributing to `elm-csv`! 94 | Just so you know, I consider this library "done". 95 | Unless something major changes in either the CSV standard or Elm, major changes are unlikely. 96 | If you want to make a case for new decoder functions (or whatever) being added to the package feel free to do so (in an issue, not a PR!), but be aware the bar is fairly high for new inclusions. 97 | 98 | That said, I'll be publishing upgrades to track with new versions of Elm, and bug fixes as needed. 99 | I always welcome help with those, and with documentation improvements! 100 | 101 | Still here? 102 | Ok, let's get set up. 103 | This project uses [Nix](https://nixos.org/download.html) to manage versions (but just need a `nix` installation, not NixOS, so this will work on macOS.) 104 | Install that, then run `nix-shell` to get into a development environment. 105 | 106 | Things I'd appreciate help with: 107 | 108 | - **Testing the parser on many kinds of CSV and TSV data.** 109 | If you find that the parser has incorrectly interpreted some data you have, please open an issue. 110 | It would be very helpful if you could include a sample of the input that's giving you problems, the versions of the software used to produce the sample, and the locale settings on your computer. 111 | 112 | - **Feedback on speed.** 113 | Please let me know if you find out that parsing/decoding has become a bottleneck in your application. 114 | Our parser is fairly quick (see `benchmarking` in the source) but we can always go faster. 115 | 116 | - **Docs.** 117 | Always docs. 118 | Forever docs. 119 | 120 | ## Climate Action 121 | 122 | I want my open-source work to support projects addressing the climate crisis (for example, projects in clean energy, public transit, reforestation, or sustainable agriculture.) 123 | If you are working on such a project, and find a bug or missing feature in any of my libraries, **please let me know and I will treat your issue as high priority.** 124 | I'd also be happy to support such projects in other ways. 125 | In particular, I've worked with Elm for a long time and would be happy to advise on your implementation. 126 | 127 | ## License 128 | 129 | `elm-csv` is licensed under the BSD 3-Clause license, located at `LICENSE`. 130 | -------------------------------------------------------------------------------- /benchmarks/3.0.2.md: -------------------------------------------------------------------------------- 1 | # 3.0.2 Benchmarks 2 | 3 | I was benchmarking some other stuff in different browsers and got curious how `elm-csv` performed. 4 | (Setup: same as in [parser.md](./parser.md) except with only the 16-row case.) 5 | 6 | | Browser | Split | Parse | % Change | 7 | |----------------------|--------:|--------:|----------| 8 | | Chrome 88.0.4324.192 | 147,967 | 88,501 | -40.19% | 9 | | Firefox 86.0 | 198,989 | 55,016 | -72.35% | 10 | | Safari 14.0.1 | 195,194 | 194,489 | -0.36% | 11 | 12 | This rings true based on the other benchmarks I'm doing! 13 | I expect that if I rewrote the parser to use `String.beginsWith` instead of `String.slice`, we could maybe see no change in Chrome, a modest improvement in Firefox, and a massive loss in Safari. 14 | BUT this would maybe be offset by having to slice the string on every iteration *anyway*, so probably not worth doing. 15 | -------------------------------------------------------------------------------- /benchmarks/README.md: -------------------------------------------------------------------------------- 1 | # Benchmarks 2 | 3 | These are the code and logs for benchmarking work on this library. 4 | The 1.0.1 release was not so fast. 5 | The 2.0.0 release will be much faster! 6 | 7 | Numbers are runs per second on Brian's MacBook Pro (2017, 3.1 Ghz Quad-Core Intel Core i7 with 16GB memory) in Chrome (latest at time of writing.) 8 | 9 | - [benchmarking the parser](./parser.md) 10 | - [benchmarking lists vs arrays](./lists-vs-arrays.md) 11 | 12 | All the benchmark code lives under `src/` in this directory. 13 | -------------------------------------------------------------------------------- /benchmarks/elm.json: -------------------------------------------------------------------------------- 1 | { 2 | "type": "application", 3 | "source-directories": [ 4 | "src", 5 | "../src" 6 | ], 7 | "elm-version": "0.19.1", 8 | "dependencies": { 9 | "direct": { 10 | "elm/browser": "1.0.2", 11 | "elm/core": "1.0.5", 12 | "elm/html": "1.0.0", 13 | "elm/json": "1.1.3", 14 | "elm-explorations/benchmark": "1.0.2" 15 | }, 16 | "indirect": { 17 | "BrianHicks/elm-trend": "2.1.3", 18 | "elm/regex": "1.0.0", 19 | "elm/time": "1.0.0", 20 | "elm/url": "1.0.0", 21 | "elm/virtual-dom": "1.0.2", 22 | "mdgriffith/style-elements": "5.0.2", 23 | "robinheghan/murmur3": "1.0.0" 24 | } 25 | }, 26 | "test-dependencies": { 27 | "direct": {}, 28 | "indirect": {} 29 | } 30 | } 31 | -------------------------------------------------------------------------------- /benchmarks/lists-vs-arrays.md: -------------------------------------------------------------------------------- 1 | # Is it faster to cons onto a list, append to an array, or assign into an array with preallocated size? 2 | 3 | The parser has to construct a lot of lists, and there may be a faster way. 4 | There may be wins here, so I'm going to set up a new benchmark to measure. 5 | 6 | | Test | Speed | % Change | 7 | |--------------------------------------|-----------:|---------:| 8 | | `(::)` directly | 17,996,283 | | 9 | | `(::)` into `List.reverse` | 9,425,834 | -90.93% | 10 | | `(::)` into `Array.fromList` | 5,745,229 | -64.06% | 11 | | `Array.push` | 3,737,409 | -53.72% | 12 | | `Array.set`, sharing initial `Array` | 2,029,485 | -84.16% | 13 | | `Array.set` without sharing | 1,884,629 | -7.69% | 14 | 15 | Well, that's that; guess I should keep using `List`! 16 | I probably could have predicted that reversing the array would roughly double the time, but I wouldn't have predicted that `set` on an `Array` initialized to the right length would be slower than `push`. 17 | It'd be the opposite if we were mutating the array directly, but I guess since we aren't it's faster to keep all the null values and create a new reference or something. 18 | -------------------------------------------------------------------------------- /benchmarks/parser.md: -------------------------------------------------------------------------------- 1 | # Benchmarking 2 | 3 | The benchmarking strategy for this library is to compare the full implementation against the simplest possible parser that does an acceptable job (it just splits rows by the row separator, and column by the column separator. This wouldn't work for real data because of escaping issues.) 4 | 5 | The idea here is that if we can get the benchmarks for the real thing to be anything like the naive-but-fast thing, we're doing pretty well. 6 | 7 | Numbers are runs per second on Brian's MacBook Pro (2017, 3.1 Ghz Quad-Core Intel Core i7 with 16GB memory) in Chrome (latest at time of writing.) 8 | 9 | After this document was finalized, I had to make some changes to the parser that knock about 7% off the speed, but make the parser way more robust by being more accepting of different kinds of newlines. 10 | It's still about 25x faster than the 1.0.1 parser. 11 | 12 | ## Final parser speedup for this document (January 26, 2021) 13 | 14 | As of 00ac997c: 15 | 16 | | Size | Naive | Real | % Change | 17 | |---------|----------:|-----------:|----------:| 18 | | 0 rows | 3,077,611 | 19,439,037 | +531.63% | 19 | | 1 row | 1,747,415 | 1,853,376 | +6.06% | 20 | | 2 rows | 979,352 | 931,390 | -4.90% | 21 | | 4 rows | 535,850 | 469,160 | -12.45% | 22 | | 8 rows | 287,293 | 235,588 | -18.00% | 23 | | 16 rows | 146,172 | 118,980 | -18.60% | 24 | 25 | | Size | Before | After | Adjustment | After, Adjusted | % Change | 26 | |--------|----------:|----------:|-----------:|----------------:|----------:| 27 | | 1 row | 64,626 | 1,853,376 | -2.55% | 1,806,115 | 2,694.72% | 28 | | 2 rows | 32,939 | 931,390 | -4.25% | 891,806 | 2,607.45% | 29 | | 4 rows | 16,667 | 469,160 | -1.10% | 463,999 | 2,683.94% | 30 | | 8 rows | 8,261 | 235,588 | -0.35% | 234,763 | 2,741.82% | 31 | 32 | Overall, a 26x speedup! 33 | 34 | ## Don't duplicate slices (January 25, 2021) 35 | 36 | I was slicing off the first charcter of the string three times. 37 | That only needs to be done once. 38 | (Thanks again again, Andrey!) 39 | 40 | | Size | Naive | Real | % Change | 41 | |---------|----------:|-----------:|----------:| 42 | | 0 rows | 3,063,892 | 19,598,562 | +539.66% | 43 | | 1 row | 1,708,921 | 1,830,877 | +7.14% | 44 | | 2 rows | 983,645 | 922,945 | -1.67% | 45 | | 4 rows | 526,378 | 465,208 | -11.62% | 46 | | 8 rows | 279,097 | 465,208 | -17.43% | 47 | | 16 rows | 143,799 | 115,259 | -19.85% | 48 | | 32 rows | 73,565 | 58.084 | -21.04% | 49 | 50 | I also added a 16-row test because the function is fast enough now that doing so completes in a reasonable time, and a 32-row test just for this instance. 51 | 52 | ## Subtraction instead of comparison (January 25, 2021) 53 | 54 | If you've got integers, doing `(x - y) >= 0` is faster than `x < y` because the compiler generates literally that code instead of using a comparator function. 55 | (Thanks again, Andrey!) 56 | 57 | | Size | Naive | Real | % Change | 58 | |--------|----------:|-----------:|----------:| 59 | | 0 rows | 2,982,748 | 18,602,065 | +523.66% | 60 | | 1 row | 1,630,630 | 1,323,911 | -18.11% | 61 | | 2 rows | 905,553 | 660,299 | -27.08% | 62 | | 4 rows | 498,260 | 333,485 | -33.07% | 63 | | 8 rows | 266,176 | 165,825 | -37.70% | 64 | 65 | For 8 rows: +10.54% to the new values (I changed monitor configs again between last run and now.) 66 | That means that 8 rows adjusted is like 183,302 times per second, a +6.13% improvement. 67 | 68 | ## Measuring progres on parser speedup (January 25, 2021) 69 | 70 | I think I've optimized the parser as much as I possibly can (or at least as much as I want to right now.) 71 | I've also made a bunch of bug fixes (and added the same inlining optimization for `;`-separated values, common in Europe.) 72 | 73 | Let's get a final number to see how far we've come: 74 | 75 | | Size | Naive | Real | % Change | 76 | |--------|----------:|-----------:|----------:| 77 | | 0 rows | 3,203,108 | 19,949,199 | +522.81% | 78 | | 1 row | 1,811,039 | 1,311,312 | -27.59% | 79 | | 2 rows | 998,932 | 681,188 | -31.81% | 80 | | 4 rows | 551,327 | 341,781 | -38.01% | 81 | | 8 rows | 297,552 | 172,720 | -41.95% | 82 | 83 | We were down like 100% before, so this is a big improvement! 84 | Let's make that percentage adjustment using the naive implementation and see where we're at: 85 | 86 | | Size | Before | After | Adjustment | After, Adjusted | % Change | 87 | |--------|--------|-----------|------------|-----------------|----------:| 88 | | 1 row | 64,626 | 1,311,312 | -6.28% | 1,228,961 | +1,801.65 | 89 | | 2 rows | 32,939 | 681,188 | -6.40% | 637,591 | +1,835.67 | 90 | | 4 rows | 16,667 | 341,781 | -4.01% | 328,076 | +1,868.42 | 91 | | 8 rows | 8,261 | 172,720 | -3.96% | 165,880 | +1,907.99 | 92 | 93 | So I'm going to claim it's **19x faster**, since most CSVs I've worked with are 8 rows or longer. 94 | 95 | What's that mean in characters per second? 96 | The benchmark has five fields per row, plus four commas, and a `\r\n` on every line except the end: `rows * 31 - 2`. 97 | For the benchmark with 8 rows, that's 246 characters. 98 | 99 | So previously, we could process that 8,261 times per second, or 2,032,206 characters per second. 100 | Now we can do it 165,880 (adjusted) times per second, or 40,806,480 characters per second! 101 | 102 | ## Inline "," and "\r\n" (January 25, 2021) 103 | 104 | I'm going to copy the whole parser function and make a version that tests literals instead of references. 105 | That should make the compiler use direct JS `===` instead of `_Utils_eq`. 106 | Maybe it makes things faster, at the cost of more generated code? 107 | 108 | | Size | Naive | Real | % Change | 109 | |--------|----------:|-----------:|----------:| 110 | | 0 rows | 3,152,392 | 20,269,870 | +543.00% | 111 | | 1 row | 1,818,376 | 1,327,727 | -26.98% | 112 | | 2 rows | 1,003,826 | 678,638 | -32.39% | 113 | | 4 rows | 552,580 | 342,057 | -38.10% | 114 | | 8 rows | 293,287 | 175,085 | -40.30% | 115 | 116 | Looks like the naive implementation differs more than I'd like (~6%, vs ~2% before) so I'm going to adjust the new numbers down by the percentage the naive numbers differ to arrive at a conclusion: 117 | 118 | | Size | Baseline | Inline | Adjustment | Inline, Adjusted | % Change | 119 | |--------|----------:|----------:|-----------:|-----------------:|---------:| 120 | | 1 row | 1,062,197 | 1,327,727 | -6.05% | 1,247,399 | +17.44% | 121 | | 2 rows | 524,645 | 678,638 | -6.35% | 635,544 | +21.14% | 122 | | 4 rows | 264,076 | 342,057 | -5.45% | 323,415 | +22.47% | 123 | | 8 rows | 132,918 | 175,085 | -5.16% | 166,050 | +24.93% | 124 | 125 | Ok, wow, that's probably worth keeping! 126 | 127 | ## Re-baseline once quoted field parser is finished (January 25, 2021) 128 | 129 | I finished the quoted field parser. 130 | It shouldn't have slowed down *too* much (it's doing the same comparison as before.) 131 | But let's see: 132 | 133 | | Size | Naive | Real | % Change | 134 | |--------|----------:|-----------:|----------:| 135 | | 0 rows | 3,171,828 | 20,880,023 | +558.30% | 136 | | 1 row | 1,714,612 | 1,062,197 | -38.05% | 137 | | 2 rows | 944,000 | 524,645 | -44.45% | 138 | | 4 rows | 524,009 | 264,076 | -49.60% | 139 | | 8 rows | 278,901 | 132,918 | -52.34% | 140 | 141 | There's some weird noise here: the 0 row benchmark went down by a lot. 142 | I tried quieting my computer down (had some additional chat apps open and a different monitor configuration) but the difference persists. 143 | 144 | I don't see a big difference in the generated code, so I think this might be a fluke. 145 | Everything seems to be within a couple percentage points of where I want it to be, so I'm going to move ahead. 146 | 147 | ## Defer slicing until the decoding step (January 25, 2021) 148 | 149 | We won't necessarily use all the fields we parse, so we can just keep track of offsets instead of slicing directly. 150 | This will cause a little issue for quoted strings, but we could get around it by returning `(Int, Int, Bool)` where the bool indicates whether the field is quoted or not and replacing `""` with `"` after slicing if it is. 151 | 152 | Anyway, benchmarks: 153 | 154 | | Size | Naive | Real | % Change | 155 | |--------|----------:|-----------:|----------:| 156 | | 0 rows | 3,154,649 | 29,144,858 | +823.87% | 157 | | 1 row | 1,808,216 | 1,175,177 | -35.01% | 158 | | 2 rows | 997,407 | 576,939 | -42.16% | 159 | | 4 rows | 553,081 | 288,789 | -47.79% | 160 | | 8 rows | 290,929 | 142,389 | -51.06% | 161 | 162 | Let's see how that changed: 163 | 164 | | Size | Source Slicing | Int Tuples | % Change | 165 | |--------|---------------:|-----------:|----------:| 166 | | 1 row | 1,014,435 | 1,175,177 | +15.85% | 167 | | 2 rows | 519,795 | 576,939 | +10.99% | 168 | | 4 rows | 258,108 | 288,789 | +11.89% | 169 | 170 | Hmm! 171 | That's nothing to scoff at, but there's the complication above. 172 | And another thing, I bet people use all of the fields most of the time, so this may actually be adding allocations that we wouldn't have to be doing, just moving the string slicing to the decoder. 173 | For both reasons, I'm going to back this change out, finish the quoting implementation, and then benchmark decoding. 174 | 175 | ## Avoiding passing next source slice (January 24, 2021) 176 | 177 | We can definitely just keep the slice indexes around instead of truncating the string. 178 | Ok, let's slice into the full source! 179 | 180 | | Size | Naive | Real | % Change | 181 | |--------|----------:|-----------:|----------:| 182 | | 0 rows | 3,140,522 | 29,442,281 | +837.50% | 183 | | 1 row | 1,781,778 | 1,014,435 | -43.07% | 184 | | 2 rows | 993,161 | 519,795 | -47.66% | 185 | | 4 rows | 543,486 | 258,108 | -52.51% | 186 | | 8 rows | 290,018 | 129,659 | -55.29% | 187 | 188 | Well, that's an improvement from before, especially in the longer strings. 189 | From the last time I compared benchmarks to benchmarks: 190 | 191 | | Size | Hand-Rolled | Source Slicing | % Change | 192 | |--------|------------:|---------------:|----------:| 193 | | 1 row | 1,062,082 | 1,014,435 | -4.49% | 194 | | 2 rows | 500,770 | 519,795 | +3.80% | 195 | | 4 rows | 245,318 | 258,108 | +5.21% | 196 | 197 | Huh! 198 | Not quite the slam dunk I was expecting, but the effect seems to grow with longer CSVs, so I'll take it. 199 | 200 | ## Avoiding tuple allocation (January 24, 2021) 201 | 202 | Andrey (w0rm) pointed out that `String.uncons` is allocating a `Maybe ( Char, String )` on every iteration. 203 | He suggested that getting the length of the string on each iteration ([constant time](https://jsbench.me/0dkkb3th3a/1)) or simply checking prefixes ([`slice` and `startsWith` should be equivalent](https://jsbench.me/mikkb4dm2s/1)) may be faster. 204 | Let's try! 205 | 206 | | Size | Naive | Real | % Change | 207 | |--------|----------:|-----------:|----------:| 208 | | 0 rows | 3,183,463 | 30,584,921 | +860.74% | 209 | | 1 row | 1,800,687 | 944,412 | -44.78% | 210 | | 2 rows | 998,618 | 463,075 | -53.63% | 211 | | 4 rows | 540,625 | 227,136 | -57.99% | 212 | | 8 rows | 296,402 | 111,209 | -62,48% | 213 | 214 | Hmm, this didn't actually make things faster! 215 | But I think this is required for further improvements, so I'm going to keep it (for now.) 216 | 217 | ## Hand-Rolled Parser (January 24, 2021) 218 | 219 | I can keep (mostly) the same API, but probably get a big speedup by rolling my own parser function on `String` directly. 220 | (nb. it's not done yet; I haven't done quoted values. 221 | However, the benchmark doesn't use quoted values and I added in a do-nothing branch so the compiled output includes the conditional check.) 222 | 223 | | Size | Naive | Real | % Change | 224 | |--------|----------:|-----------:|----------:| 225 | | 0 rows | 3,209,593 | 30,841,837 | +860.93% | 226 | | 1 row | 1,822,683 | 1,062,082 | -41.73% | 227 | | 2 rows | 988,819 | 500,770 | -49.36% | 228 | | 4 rows | 530,179 | 245,318 | -53.73% | 229 | | 8 rows | 240,949 | 124,999 | -57.04% | 230 | 231 | Much better! 232 | It's not always reasonable to compare speed across revisions (that's why we test against a known target), but the naive numbers look reasonably close. 233 | They're mostly within a couple percent of each other (except for 8, which is ~16%,) so I'm going to compare 1, 2, and 4: 234 | 235 | | Size | Bail Early | Hand-Rolled | % Change | 236 | |--------|-----------:|------------:|----------:| 237 | | 1 row | 67,496 | 1,062,082 | +1473.55% | 238 | | 2 rows | 33,915 | 500,770 | +1376.54% | 239 | | 4 rows | 17,209 | 245,318 | +1321.45% | 240 | 241 | Seems like something like a 13x speedup. 242 | Works for me! 243 | 244 | ## Bail Early (January 23, 2021) 245 | 246 | I can get the 0 rows edge case way down by checking for that instead of using the `elm/parser` machinery. 247 | 248 | | Size | Naive | Real | % Change | 249 | |--------|----------:|-----------:|----------:| 250 | | 0 rows | 3,099,488 | 42,873,148 | +1283.23% | 251 | | 1 row | 1,766,812 | 67,496 | -96.18% | 252 | | 2 rows | 967,385 | 33,915 | -96.49% | 253 | | 4 rows | 531,227 | 17,209 | -96.76% | 254 | | 8 rows | 285,386 | 8,491 | -97.02% | 255 | 256 | ... yeah, that works. 257 | 258 | ## Initial Measurement (January 23, 2021) 259 | 260 | | Size | Naive | Real | % Change | 261 | |--------|----------:|----------:|---------:| 262 | | 0 rows | 2,985,148 | 2,834,003 | -5.06% | 263 | | 1 row | 1,704,020 | 64,626 | -96.21% | 264 | | 2 rows | 939,422 | 32,939 | -96.49% | 265 | | 4 rows | 530,027 | 16,667 | -96.86% | 266 | | 8 rows | 286,299 | 8,261 | -97.11% | 267 | 268 | So around two orders of magnitude slower across the board. 269 | That's where we're starting! 270 | -------------------------------------------------------------------------------- /benchmarks/src/ConsVsArray.elm: -------------------------------------------------------------------------------- 1 | module ConsVsArray exposing (..) 2 | 3 | import Array 4 | import Benchmark exposing (benchmark, describe) 5 | import Benchmark.Runner exposing (BenchmarkProgram, program) 6 | 7 | 8 | main : BenchmarkProgram 9 | main = 10 | let 11 | targetSize = 12 | 10 13 | 14 | items = 15 | List.range 1 targetSize 16 | in 17 | describe "collection construction" 18 | [ benchmark "List (::)" (\_ -> List.foldl (::) [] items) 19 | , benchmark "List (::) |> List.reverse" (\_ -> List.foldl (::) [] items |> List.reverse) 20 | , benchmark "List (::) |> Array.fromList" (\_ -> List.foldl (::) [] items |> Array.fromList) 21 | , benchmark "Array.push" (\_ -> List.foldl Array.push Array.empty items) 22 | , let 23 | premade = 24 | Array.initialize targetSize identity 25 | in 26 | benchmark "Array.set (shared)" 27 | (\_ -> 28 | List.foldl 29 | (\item ( soFar, index ) -> 30 | ( Array.set index item soFar 31 | , index + 1 32 | ) 33 | ) 34 | ( premade, 0 ) 35 | items 36 | ) 37 | , benchmark "Array.set" 38 | (\_ -> 39 | List.foldl 40 | (\item ( soFar, index ) -> 41 | ( Array.set index item soFar 42 | , index + 1 43 | ) 44 | ) 45 | ( Array.initialize targetSize identity, 0 ) 46 | items 47 | ) 48 | ] 49 | |> program 50 | -------------------------------------------------------------------------------- /benchmarks/src/DecodeVsParse.elm: -------------------------------------------------------------------------------- 1 | module DecodeVsParse exposing (main) 2 | 3 | import Benchmark exposing (Benchmark, describe) 4 | import Benchmark.Runner exposing (BenchmarkProgram, program) 5 | import Csv.Parser as Parser 6 | import Json.Decode as Decode 7 | import Json.Encode as Encode 8 | 9 | 10 | encodeCsv : Int -> String 11 | encodeCsv howManyRows = 12 | List.range 0 (howManyRows - 1) 13 | |> List.map (\_ -> String.join "," (List.repeat 5 "a")) 14 | |> String.join "\u{000D}\n" 15 | 16 | 17 | encodeJson : Int -> String 18 | encodeJson howManyRows = 19 | List.range 0 (howManyRows - 1) 20 | |> Encode.list 21 | (\_ -> 22 | List.range 0 5 23 | |> Encode.list (\_ -> Encode.string "a") 24 | ) 25 | |> Encode.encode 0 26 | 27 | 28 | crashButWithoutDependingOnDebug : () -> a 29 | crashButWithoutDependingOnDebug _ = 30 | crashButWithoutDependingOnDebug () 31 | 32 | 33 | main : BenchmarkProgram 34 | main = 35 | let 36 | config = 37 | case Parser.config { rowSeparator = "\u{000D}\n", fieldSeparator = "," } of 38 | Ok config_ -> 39 | config_ 40 | 41 | Err _ -> 42 | crashButWithoutDependingOnDebug () 43 | in 44 | [ 0, 1, 2, 4, 8, 16, 32 ] 45 | |> List.map 46 | (\size -> 47 | let 48 | csv = 49 | encodeCsv size 50 | 51 | json = 52 | encodeJson size 53 | in 54 | Benchmark.compare (String.fromInt size ++ " rows") 55 | ("Json.Decode.fromString" ++ " (" ++ String.fromInt (String.length json) ++ " bytes)") 56 | (\_ -> Decode.decodeString Decode.value csv) 57 | ("Csv.Parser.parse" ++ " (" ++ String.fromInt (String.length csv) ++ " bytes)") 58 | (\_ -> Parser.parse config csv) 59 | ) 60 | |> describe "elm-csv" 61 | |> program 62 | -------------------------------------------------------------------------------- /benchmarks/src/HowManyLookups.elm: -------------------------------------------------------------------------------- 1 | module HowManyLookups exposing (..) 2 | 3 | import Array 4 | import Benchmark exposing (benchmark, describe) 5 | import Benchmark.Runner exposing (BenchmarkProgram, program) 6 | 7 | 8 | lookup : Int -> List a -> Maybe a 9 | lookup index items = 10 | List.head (List.drop index items) 11 | 12 | 13 | main : BenchmarkProgram 14 | main = 15 | [ 1, 2, 4, 8, 16, 32 ] 16 | |> List.map 17 | (\cap -> 18 | let 19 | sourceList = 20 | List.range 1 cap 21 | 22 | indexes = 23 | List.range 0 (cap - 1) 24 | in 25 | Benchmark.compare (String.fromInt cap ++ " items") 26 | "List" 27 | (\_ -> List.map (\index -> lookup index sourceList) indexes) 28 | "Array" 29 | (\_ -> 30 | let 31 | sourceArray = 32 | Array.fromList sourceList 33 | in 34 | List.map (\index -> Array.get index sourceArray) indexes 35 | ) 36 | ) 37 | |> describe "lookup scaling" 38 | |> program 39 | -------------------------------------------------------------------------------- /benchmarks/src/SplitVsParse.elm: -------------------------------------------------------------------------------- 1 | module SplitVsParse exposing (main) 2 | 3 | import Benchmark exposing (Benchmark, describe) 4 | import Benchmark.Runner exposing (BenchmarkProgram, program) 5 | import Csv.Parser as Parser 6 | 7 | 8 | stringSplittingParser : String -> List (List String) 9 | stringSplittingParser = 10 | String.split "\u{000D}\n" >> List.map (String.split ",") 11 | 12 | 13 | encodeCsv : Int -> String 14 | encodeCsv howManyRows = 15 | List.range 0 (howManyRows - 1) 16 | |> List.map (\_ -> String.join "," (List.repeat 5 "a")) 17 | |> String.join "\u{000D}\n" 18 | 19 | 20 | main : BenchmarkProgram 21 | main = 22 | let 23 | config = 24 | { fieldSeparator = ',' } 25 | in 26 | [ 0, 1, 2, 4, 8, 16 ] 27 | |> List.map 28 | (\size -> 29 | let 30 | csv = 31 | encodeCsv size 32 | in 33 | Benchmark.compare (String.fromInt size ++ " rows") 34 | "String.split" 35 | (\_ -> stringSplittingParser csv) 36 | "Csv.Parser.parse" 37 | (\_ -> Parser.parse config csv) 38 | ) 39 | |> describe "elm-csv" 40 | |> program 41 | -------------------------------------------------------------------------------- /benchmarks/when-is-array-worth-it.md: -------------------------------------------------------------------------------- 1 | # when is array worth it? 2 | 3 | I was planning to optimize the decoders by creating an array from the row list and then do all lookups on that instead of doing `List.drop index |> List.head`, thinking that lookups would be faster. 4 | Turns out, that may only be accurate above 8 items! 5 | 6 | For each size, we're benchmarking getting each item in the list individually. 7 | For arrays, we create the array inside the benchmark (but even if we don't do that, we only get a modest speedup at 8.) 8 | 9 | | Size | List | Array | % Change | 10 | |----------|-----------:|----------:|---------:| 11 | | 1 item | 10,862,604 | 4,803,727 | -55.78% | 12 | | 2 items | 7,160,442 | 3,740,035 | -47.77% | 13 | | 4 items | 3,966,711 | 2,560,377 | -35.45% | 14 | | 8 items | 1,939,120 | 1,613,071 | -16.81% | 15 | | 16 items | 869,669 | 940,014 | +8.09% | 16 | | 32 items | 329,673 | 422,594 | +28.19% | 17 | 18 | Of course, this effect will increase at larger sizes, but I haven't worked with a lot of CSVs that have more than 10 items. 19 | If someone complains about it, this is maybe worth implementing, but I don't think it's worth doing proactively (especially for the large cost to smaller CSVs.) 20 | -------------------------------------------------------------------------------- /ci.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env nix-shell 2 | #!nix-shell --pure -i bash 3 | set -euo pipefail 4 | 5 | group() { 6 | echo "::group::${1:-}" 7 | "${@}" 8 | echo 9 | echo "::endgroup::" 10 | } 11 | 12 | # tests 13 | group elm-verify-examples 14 | group elm-test 15 | 16 | # docs 17 | group elm make --docs=documentation.json 18 | 19 | # linting 20 | group elm-format --validate src 21 | group elm-review 22 | -------------------------------------------------------------------------------- /elm.json: -------------------------------------------------------------------------------- 1 | { 2 | "type": "package", 3 | "name": "BrianHicks/elm-csv", 4 | "summary": "Decode CSV in the most boring way possible.", 5 | "license": "BSD-3-Clause", 6 | "version": "4.0.1", 7 | "exposed-modules": [ 8 | "Csv.Decode", 9 | "Csv.Encode", 10 | "Csv.Parser" 11 | ], 12 | "elm-version": "0.19.0 <= v < 0.20.0", 13 | "dependencies": { 14 | "elm/core": "1.0.2 <= v < 2.0.0" 15 | }, 16 | "test-dependencies": { 17 | "elm-explorations/test": "2.0.0 <= v < 3.0.0", 18 | "rtfeldman/elm-hex": "1.0.0 <= v < 2.0.0" 19 | } 20 | } 21 | -------------------------------------------------------------------------------- /nix/sources.json: -------------------------------------------------------------------------------- 1 | { 2 | "niv": { 3 | "branch": "master", 4 | "description": "Easy dependency management for Nix projects", 5 | "homepage": "https://github.com/nmattia/niv", 6 | "owner": "nmattia", 7 | "repo": "niv", 8 | "rev": "914aba08a26cb10538b84d00d6cfb01c9776d80c", 9 | "sha256": "0gx316gc7prjay5b0cr13x4zc2pdbiwxkfkpjvrlb2rml80lm4pm", 10 | "type": "tarball", 11 | "url": "https://github.com/nmattia/niv/archive/914aba08a26cb10538b84d00d6cfb01c9776d80c.tar.gz", 12 | "url_template": "https://github.com///archive/.tar.gz" 13 | }, 14 | "nixpkgs": { 15 | "branch": "nixpkgs-unstable", 16 | "description": "Nix Packages collection", 17 | "homepage": "", 18 | "owner": "NixOS", 19 | "repo": "nixpkgs", 20 | "rev": "cd99c2b3c9f160cd004318e0697f90bbd5960825", 21 | "sha256": "0n2mzyjvxxhrjmallgwi6k1k8p4fi7rk1abhhibc7mgv8alvmmbi", 22 | "type": "tarball", 23 | "url": "https://github.com/NixOS/nixpkgs/archive/cd99c2b3c9f160cd004318e0697f90bbd5960825.tar.gz", 24 | "url_template": "https://github.com///archive/.tar.gz" 25 | } 26 | } 27 | -------------------------------------------------------------------------------- /nix/sources.nix: -------------------------------------------------------------------------------- 1 | # This file has been generated by Niv. 2 | 3 | let 4 | 5 | # 6 | # The fetchers. fetch_ fetches specs of type . 7 | # 8 | 9 | fetch_file = pkgs: name: spec: 10 | let 11 | name' = sanitizeName name + "-src"; 12 | in 13 | if spec.builtin or true then 14 | builtins_fetchurl { inherit (spec) url sha256; name = name'; } 15 | else 16 | pkgs.fetchurl { inherit (spec) url sha256; name = name'; }; 17 | 18 | fetch_tarball = pkgs: name: spec: 19 | let 20 | name' = sanitizeName name + "-src"; 21 | in 22 | if spec.builtin or true then 23 | builtins_fetchTarball { name = name'; inherit (spec) url sha256; } 24 | else 25 | pkgs.fetchzip { name = name'; inherit (spec) url sha256; }; 26 | 27 | fetch_git = name: spec: 28 | let 29 | ref = 30 | spec.ref or ( 31 | if spec ? branch then "refs/heads/${spec.branch}" else 32 | if spec ? tag then "refs/tags/${spec.tag}" else 33 | abort "In git source '${name}': Please specify `ref`, `tag` or `branch`!" 34 | ); 35 | submodules = spec.submodules or false; 36 | submoduleArg = 37 | let 38 | nixSupportsSubmodules = builtins.compareVersions builtins.nixVersion "2.4" >= 0; 39 | emptyArgWithWarning = 40 | if submodules 41 | then 42 | builtins.trace 43 | ( 44 | "The niv input \"${name}\" uses submodules " 45 | + "but your nix's (${builtins.nixVersion}) builtins.fetchGit " 46 | + "does not support them" 47 | ) 48 | { } 49 | else { }; 50 | in 51 | if nixSupportsSubmodules 52 | then { inherit submodules; } 53 | else emptyArgWithWarning; 54 | in 55 | builtins.fetchGit 56 | ({ url = spec.repo; inherit (spec) rev; inherit ref; } // submoduleArg); 57 | 58 | fetch_local = spec: spec.path; 59 | 60 | fetch_builtin-tarball = name: throw 61 | ''[${name}] The niv type "builtin-tarball" is deprecated. You should instead use `builtin = true`. 62 | $ niv modify ${name} -a type=tarball -a builtin=true''; 63 | 64 | fetch_builtin-url = name: throw 65 | ''[${name}] The niv type "builtin-url" will soon be deprecated. You should instead use `builtin = true`. 66 | $ niv modify ${name} -a type=file -a builtin=true''; 67 | 68 | # 69 | # Various helpers 70 | # 71 | 72 | # https://github.com/NixOS/nixpkgs/pull/83241/files#diff-c6f540a4f3bfa4b0e8b6bafd4cd54e8bR695 73 | sanitizeName = name: 74 | ( 75 | concatMapStrings (s: if builtins.isList s then "-" else s) 76 | ( 77 | builtins.split "[^[:alnum:]+._?=-]+" 78 | ((x: builtins.elemAt (builtins.match "\\.*(.*)" x) 0) name) 79 | ) 80 | ); 81 | 82 | # The set of packages used when specs are fetched using non-builtins. 83 | mkPkgs = sources: system: 84 | let 85 | sourcesNixpkgs = 86 | import (builtins_fetchTarball { inherit (sources.nixpkgs) url sha256; }) { inherit system; }; 87 | hasNixpkgsPath = builtins.any (x: x.prefix == "nixpkgs") builtins.nixPath; 88 | hasThisAsNixpkgsPath = == ./.; 89 | in 90 | if builtins.hasAttr "nixpkgs" sources 91 | then sourcesNixpkgs 92 | else if hasNixpkgsPath && ! hasThisAsNixpkgsPath then 93 | import { } 94 | else 95 | abort 96 | '' 97 | Please specify either (through -I or NIX_PATH=nixpkgs=...) or 98 | add a package called "nixpkgs" to your sources.json. 99 | ''; 100 | 101 | # The actual fetching function. 102 | fetch = pkgs: name: spec: 103 | 104 | if ! builtins.hasAttr "type" spec then 105 | abort "ERROR: niv spec ${name} does not have a 'type' attribute" 106 | else if spec.type == "file" then fetch_file pkgs name spec 107 | else if spec.type == "tarball" then fetch_tarball pkgs name spec 108 | else if spec.type == "git" then fetch_git name spec 109 | else if spec.type == "local" then fetch_local spec 110 | else if spec.type == "builtin-tarball" then fetch_builtin-tarball name 111 | else if spec.type == "builtin-url" then fetch_builtin-url name 112 | else 113 | abort "ERROR: niv spec ${name} has unknown type ${builtins.toJSON spec.type}"; 114 | 115 | # If the environment variable NIV_OVERRIDE_${name} is set, then use 116 | # the path directly as opposed to the fetched source. 117 | replace = name: drv: 118 | let 119 | saneName = stringAsChars (c: if (builtins.match "[a-zA-Z0-9]" c) == null then "_" else c) name; 120 | ersatz = builtins.getEnv "NIV_OVERRIDE_${saneName}"; 121 | in 122 | if ersatz == "" then drv else 123 | # this turns the string into an actual Nix path (for both absolute and 124 | # relative paths) 125 | if builtins.substring 0 1 ersatz == "/" then /. + ersatz else /. + builtins.getEnv "PWD" + "/${ersatz}"; 126 | 127 | # Ports of functions for older nix versions 128 | 129 | # a Nix version of mapAttrs if the built-in doesn't exist 130 | mapAttrs = builtins.mapAttrs or ( 131 | f: set: with builtins; 132 | listToAttrs (map (attr: { name = attr; value = f attr set.${attr}; }) (attrNames set)) 133 | ); 134 | 135 | # https://github.com/NixOS/nixpkgs/blob/0258808f5744ca980b9a1f24fe0b1e6f0fecee9c/lib/lists.nix#L295 136 | range = first: last: if first > last then [ ] else builtins.genList (n: first + n) (last - first + 1); 137 | 138 | # https://github.com/NixOS/nixpkgs/blob/0258808f5744ca980b9a1f24fe0b1e6f0fecee9c/lib/strings.nix#L257 139 | stringToCharacters = s: map (p: builtins.substring p 1 s) (range 0 (builtins.stringLength s - 1)); 140 | 141 | # https://github.com/NixOS/nixpkgs/blob/0258808f5744ca980b9a1f24fe0b1e6f0fecee9c/lib/strings.nix#L269 142 | stringAsChars = f: s: concatStrings (map f (stringToCharacters s)); 143 | concatMapStrings = f: list: concatStrings (map f list); 144 | concatStrings = builtins.concatStringsSep ""; 145 | 146 | # https://github.com/NixOS/nixpkgs/blob/8a9f58a375c401b96da862d969f66429def1d118/lib/attrsets.nix#L331 147 | optionalAttrs = cond: as: if cond then as else { }; 148 | 149 | # fetchTarball version that is compatible between all the versions of Nix 150 | builtins_fetchTarball = { url, name ? null, sha256 }@attrs: 151 | let 152 | inherit (builtins) lessThan nixVersion fetchTarball; 153 | in 154 | if lessThan nixVersion "1.12" then 155 | fetchTarball ({ inherit url; } // (optionalAttrs (name != null) { inherit name; })) 156 | else 157 | fetchTarball attrs; 158 | 159 | # fetchurl version that is compatible between all the versions of Nix 160 | builtins_fetchurl = { url, name ? null, sha256 }@attrs: 161 | let 162 | inherit (builtins) lessThan nixVersion fetchurl; 163 | in 164 | if lessThan nixVersion "1.12" then 165 | fetchurl ({ inherit url; } // (optionalAttrs (name != null) { inherit name; })) 166 | else 167 | fetchurl attrs; 168 | 169 | # Create the final "sources" from the config 170 | mkSources = config: 171 | mapAttrs 172 | ( 173 | name: spec: 174 | if builtins.hasAttr "outPath" spec 175 | then 176 | abort 177 | "The values in sources.json should not have an 'outPath' attribute" 178 | else 179 | spec // { outPath = replace name (fetch config.pkgs name spec); } 180 | ) 181 | config.sources; 182 | 183 | # The "config" used by the fetchers 184 | mkConfig = 185 | { sourcesFile ? if builtins.pathExists ./sources.json then ./sources.json else null 186 | , sources ? if sourcesFile == null then { } else builtins.fromJSON (builtins.readFile sourcesFile) 187 | , system ? builtins.currentSystem 188 | , pkgs ? mkPkgs sources system 189 | }: rec { 190 | # The sources, i.e. the attribute set of spec name to spec 191 | inherit sources; 192 | 193 | # The "pkgs" (evaluated nixpkgs) to use for e.g. non-builtin fetchers 194 | inherit pkgs; 195 | }; 196 | 197 | in 198 | mkSources (mkConfig { }) // { __functor = _: settings: mkSources (mkConfig settings); } 199 | -------------------------------------------------------------------------------- /review/elm.json: -------------------------------------------------------------------------------- 1 | { 2 | "type": "application", 3 | "source-directories": [ 4 | "src" 5 | ], 6 | "elm-version": "0.19.1", 7 | "dependencies": { 8 | "direct": { 9 | "elm/core": "1.0.5", 10 | "elm/json": "1.1.3", 11 | "elm/project-metadata-utils": "1.0.2", 12 | "jfmengels/elm-review": "2.13.1", 13 | "jfmengels/elm-review-common": "1.3.3", 14 | "jfmengels/elm-review-unused": "1.1.30", 15 | "stil4m/elm-syntax": "7.2.9", 16 | "truqu/elm-review-noredundantconcat": "1.0.1", 17 | "truqu/elm-review-noredundantcons": "1.0.1" 18 | }, 19 | "indirect": { 20 | "elm/bytes": "1.0.8", 21 | "elm/html": "1.0.0", 22 | "elm/parser": "1.1.0", 23 | "elm/random": "1.0.0", 24 | "elm/time": "1.0.0", 25 | "elm/virtual-dom": "1.0.3", 26 | "elm-community/list-extra": "8.7.0", 27 | "elm-explorations/test": "2.1.1", 28 | "miniBill/elm-unicode": "1.0.3", 29 | "rtfeldman/elm-hex": "1.0.0", 30 | "stil4m/structured-writer": "1.0.3" 31 | } 32 | }, 33 | "test-dependencies": { 34 | "direct": { 35 | "elm-explorations/test": "2.1.1" 36 | }, 37 | "indirect": {} 38 | } 39 | } 40 | -------------------------------------------------------------------------------- /review/src/ReviewConfig.elm: -------------------------------------------------------------------------------- 1 | module ReviewConfig exposing (config) 2 | 3 | import NoExposingEverything 4 | import NoImportingEverything 5 | import NoMissingTypeAnnotation 6 | import NoMissingTypeAnnotationInLetIn 7 | import NoMissingTypeExpose 8 | import NoRedundantConcat 9 | import NoRedundantCons 10 | import NoUnused.CustomTypeConstructorArgs 11 | import NoUnused.CustomTypeConstructors 12 | import NoUnused.Dependencies 13 | import NoUnused.Exports 14 | import NoUnused.Modules 15 | import NoUnused.Parameters 16 | import NoUnused.Patterns 17 | import NoUnused.Variables 18 | import Review.Rule as Rule exposing (Rule) 19 | 20 | 21 | config : List Rule 22 | config = 23 | [ -- elm-review-common 24 | NoExposingEverything.rule 25 | |> Rule.ignoreErrorsForDirectories [ "tests" ] 26 | , NoImportingEverything.rule [ "Test" ] 27 | |> Rule.ignoreErrorsForDirectories [ "tests/VerifyExamples" ] 28 | , NoMissingTypeAnnotation.rule 29 | , NoMissingTypeAnnotationInLetIn.rule 30 | , NoMissingTypeExpose.rule 31 | 32 | -- elm-review-unused 33 | , NoUnused.CustomTypeConstructors.rule [] 34 | , NoUnused.CustomTypeConstructorArgs.rule 35 | , NoUnused.Dependencies.rule 36 | , NoUnused.Exports.rule 37 | , NoUnused.Modules.rule 38 | , NoUnused.Parameters.rule 39 | , NoUnused.Patterns.rule 40 | , NoUnused.Variables.rule 41 | 42 | -- elm-review-noredundantconcat 43 | , NoRedundantConcat.rule 44 | 45 | -- elm-review-noredundantcons 46 | , NoRedundantCons.rule 47 | ] 48 | -------------------------------------------------------------------------------- /shell.nix: -------------------------------------------------------------------------------- 1 | { ... }: 2 | let 3 | sources = import ./nix/sources.nix; 4 | nixpkgs = import sources.nixpkgs { }; 5 | niv = import sources.niv { }; 6 | in with nixpkgs; 7 | stdenv.mkDerivation { 8 | name = "elm-csv"; 9 | buildInputs = [ 10 | niv.niv 11 | git 12 | 13 | # elm 14 | elmPackages.elm 15 | elmPackages.elm-format 16 | elmPackages.elm-json 17 | elmPackages.elm-review 18 | elmPackages.elm-test 19 | elmPackages.elm-verify-examples 20 | 21 | # to get remote stuff in CI 22 | cacert 23 | ]; 24 | } 25 | -------------------------------------------------------------------------------- /src/Csv/Decode.elm: -------------------------------------------------------------------------------- 1 | module Csv.Decode exposing 2 | ( Decoder, string, int, float, blank 3 | , column, field, optionalColumn, optionalField 4 | , FieldNames(..), decodeCsv, decodeCustom, Error(..), DecodingError(..), errorToString, Column(..), Problem(..) 5 | , map, map2, map3, into, pipeline 6 | , oneOf, andThen, succeed, fail, fromResult, fromMaybe, availableFields 7 | ) 8 | 9 | {-| Decode values from CSV. This package tries to be as 10 | unsurprising as possible, imitating [`elm/json`][elm-json] and 11 | [`NoRedInk/elm-json-decode-pipeline`][json-decode-pipeline] so that you can 12 | apply whatever you already know about JSON decoders to a different data format. 13 | 14 | [elm-json]: https://package.elm-lang.org/packages/elm/json/latest/ 15 | [json-decode-pipline]: https://package.elm-lang.org/packages/NoRedInk/elm-json-decode-pipeline/latest/ 16 | 17 | 18 | ## A Crash Course on Constructing Decoders 19 | 20 | Say you have a CSV like this: 21 | 22 | ID,Name,Species 23 | 1,Atlas,cat 24 | 2,Axel,puffin 25 | 26 | You want to get some data out of it, so you're looking through these docs. 27 | Where do you begin? 28 | 29 | The first thing you need to know is that decoders are designed to fit together 30 | to match whatever data shapes are in your CSV. So to decode the ID (an `Int` in 31 | the "ID" field), you'd combine [`int`](#int) and [`field`](#field) like this: 32 | 33 | data : String 34 | data = 35 | -- \u{000D} is the carriage return 36 | "ID,Name,Species\u{000D}\n1,Atlas,cat\u{000D}\n2,Axel,puffin" 37 | 38 | decodeCsv FieldNamesFromFirstRow (field "ID" int) data 39 | --> Ok [ 1, 2 ] 40 | 41 | But this is probably not enough, so we'll need to combine a bunch of decoders 42 | together using [`into`](#into): 43 | 44 | decodeCsv FieldNamesFromFirstRow 45 | (into 46 | (\id name species -> 47 | { id = id 48 | , name = name 49 | , species = species 50 | } 51 | ) 52 | |> pipeline (field "ID" int) 53 | |> pipeline (field "Name" string) 54 | |> pipeline (field "Species" string) 55 | ) 56 | data 57 | --> Ok 58 | --> [ { id = 1, name = "Atlas", species = "cat" } 59 | --> , { id = 2, name = "Axel", species = "puffin" } 60 | --> ] 61 | 62 | You can decode as many things as you want by giving [`into`](#into) a function 63 | that takes more arguments. 64 | 65 | 66 | ## Basic Decoders 67 | 68 | @docs Decoder, string, int, float, blank 69 | 70 | 71 | ## Finding Values 72 | 73 | @docs column, field, optionalColumn, optionalField 74 | 75 | 76 | ## Running Decoders 77 | 78 | @docs FieldNames, decodeCsv, decodeCustom, Error, DecodingError, errorToString, Column, Problem 79 | 80 | 81 | ## Transforming Values 82 | 83 | @docs map, map2, map3, into, pipeline 84 | 85 | 86 | ## Fancy Decoding 87 | 88 | @docs oneOf, andThen, succeed, fail, fromResult, fromMaybe, availableFields 89 | 90 | -} 91 | 92 | import Csv.Parser as Parser 93 | import Dict exposing (Dict) 94 | 95 | 96 | 97 | -- BASIC DECODERS 98 | 99 | 100 | {-| A way to specify what kind of thing you want to decode into. For example, 101 | if you have a `Pet` data type, you'd want a `Decoder Pet`. 102 | -} 103 | type Decoder a 104 | = Decoder 105 | (Location 106 | -> ResolvedNames 107 | -> Int 108 | -> List String 109 | -> Result (List DecodingError) a 110 | ) 111 | 112 | 113 | type alias ResolvedNames = 114 | { names : Dict String Int 115 | , available : Bool 116 | } 117 | 118 | 119 | fromString : (String -> Result Problem a) -> Decoder a 120 | fromString convert = 121 | Decoder <| 122 | \location { names } rowNum row -> 123 | let 124 | error : Problem -> Result (List DecodingError) a 125 | error problem = 126 | Err 127 | [ FieldDecodingError 128 | { row = rowNum 129 | , column = locationToColumn names location 130 | , problem = problem 131 | } 132 | ] 133 | in 134 | case location of 135 | Column_ colNum -> 136 | case row |> List.drop colNum |> List.head of 137 | Just value -> 138 | case convert value of 139 | Ok converted -> 140 | Ok converted 141 | 142 | Err problem -> 143 | error problem 144 | 145 | Nothing -> 146 | error (ColumnNotFound colNum) 147 | 148 | Field_ name -> 149 | case Dict.get name names of 150 | Just colNum -> 151 | case row |> List.drop colNum |> List.head of 152 | Just value -> 153 | case convert value of 154 | Ok converted -> 155 | Ok converted 156 | 157 | Err problem -> 158 | error problem 159 | 160 | Nothing -> 161 | error (FieldNotFound name) 162 | 163 | Nothing -> 164 | Err [ FieldNotProvided name ] 165 | 166 | OnlyColumn_ -> 167 | case row of 168 | [] -> 169 | error (ColumnNotFound 0) 170 | 171 | [ only ] -> 172 | case convert only of 173 | Ok converted -> 174 | Ok converted 175 | 176 | Err problem -> 177 | error problem 178 | 179 | _ -> 180 | error (ExpectedOneColumn (List.length row)) 181 | 182 | 183 | {-| Decode a string. 184 | 185 | decodeCsv NoFieldNames string "a" --> Ok [ "a" ] 186 | 187 | Unless you specify otherwise (e.g. with [`field`](#field)) this will assume 188 | there is only one column in the CSV and try to decode that. 189 | 190 | decodeCsv NoFieldNames string "a,b" 191 | --> Err 192 | --> (DecodingErrors 193 | --> [ FieldDecodingError 194 | --> { row = 0 195 | --> , column = OnlyColumn 196 | --> , problem = ExpectedOneColumn 2 197 | --> } 198 | --> ] 199 | --> ) 200 | 201 | -} 202 | string : Decoder String 203 | string = 204 | fromString Ok 205 | 206 | 207 | {-| Decode an integer. 208 | 209 | decodeCsv NoFieldNames int "1" --> Ok [ 1 ] 210 | 211 | decodeCsv NoFieldNames int "volcano" 212 | --> Err 213 | --> (DecodingErrors 214 | --> [ FieldDecodingError 215 | --> { row = 0 216 | --> , column = OnlyColumn 217 | --> , problem = ExpectedInt "volcano" 218 | --> } 219 | --> ] 220 | --> ) 221 | 222 | Unless you specify otherwise (e.g. with [`field`](#field)) this will assume 223 | there is only one column in the CSV and try to decode that. 224 | 225 | decodeCsv NoFieldNames int "1,2" 226 | --> Err 227 | --> (DecodingErrors 228 | --> [ FieldDecodingError 229 | --> { row = 0 230 | --> , column = OnlyColumn 231 | --> , problem = ExpectedOneColumn 2 232 | --> } 233 | --> ] 234 | --> ) 235 | 236 | -} 237 | int : Decoder Int 238 | int = 239 | fromString <| 240 | \value -> 241 | case String.toInt (String.trim value) of 242 | Just parsed -> 243 | Ok parsed 244 | 245 | Nothing -> 246 | Err (ExpectedInt value) 247 | 248 | 249 | {-| Decode a floating-point number. 250 | 251 | decodeCsv NoFieldNames float "3.14" --> Ok [ 3.14 ] 252 | 253 | decodeCsv NoFieldNames float "mimesis" 254 | --> Err 255 | --> (DecodingErrors 256 | --> [ FieldDecodingError 257 | --> { row = 0 258 | --> , column = OnlyColumn 259 | --> , problem = ExpectedFloat "mimesis" 260 | --> } 261 | --> ] 262 | --> ) 263 | 264 | Unless you specify otherwise (e.g. with [`field`](#field)) this will assume 265 | there is only one column in the CSV and try to decode that. 266 | 267 | decodeCsv NoFieldNames float "1.0,2.0" 268 | --> Err 269 | --> (DecodingErrors 270 | --> [ FieldDecodingError 271 | --> { row = 0 272 | --> , column = OnlyColumn 273 | --> , problem = ExpectedOneColumn 2 274 | --> } 275 | --> ] 276 | --> ) 277 | 278 | -} 279 | float : Decoder Float 280 | float = 281 | fromString <| 282 | \value -> 283 | case String.toFloat (String.trim value) of 284 | Just parsed -> 285 | Ok parsed 286 | 287 | Nothing -> 288 | Err (ExpectedFloat value) 289 | 290 | 291 | {-| Handle blank fields by turning them into `Maybe`s. We consider a field 292 | to be blank if it's empty or consists solely of whitespace characters. 293 | 294 | decodeCsv NoFieldNames (blank int) "\r\n1" 295 | --> Ok [ Nothing, Just 1 ] 296 | 297 | -} 298 | blank : Decoder a -> Decoder (Maybe a) 299 | blank decoder = 300 | andThen 301 | (\maybeBlank -> 302 | if String.isEmpty (String.trim maybeBlank) then 303 | succeed Nothing 304 | 305 | else 306 | map Just decoder 307 | ) 308 | string 309 | 310 | 311 | 312 | -- LOCATIONS 313 | 314 | 315 | type Location 316 | = Column_ Int 317 | | Field_ String 318 | | OnlyColumn_ 319 | 320 | 321 | {-| Parse a value at a numbered column, starting from 0. 322 | 323 | decodeCsv NoFieldNames (column 1 string) "a,b,c" --> Ok [ "b" ] 324 | 325 | decodeCsv NoFieldNames (column 100 float) "3.14" 326 | --> Err 327 | --> (DecodingErrors 328 | --> [ FieldDecodingError 329 | --> { row = 0 330 | --> , column = Column 100 331 | --> , problem = ColumnNotFound 100 332 | --> } 333 | --> ] 334 | --> ) 335 | 336 | -} 337 | column : Int -> Decoder a -> Decoder a 338 | column col (Decoder decoder) = 339 | Decoder (\_ fieldNames row -> decoder (Column_ col) fieldNames row) 340 | 341 | 342 | {-| Like `column`, parse a value at a numbered column. The parsing succeeds even if the column is missing. 343 | 344 | decodeCsv 345 | NoFieldNames 346 | (optionalColumn 1 string) 347 | "Pie\r\nApple,Argentina" 348 | --> Ok [ Nothing, Just "Argentina" ] 349 | 350 | -} 351 | optionalColumn : Int -> Decoder a -> Decoder (Maybe a) 352 | optionalColumn col (Decoder decoder) = 353 | Decoder 354 | (\_ fieldNames rowNum row -> 355 | if col < List.length row then 356 | Result.map Just (decoder (Column_ col) fieldNames rowNum row) 357 | 358 | else 359 | Ok Nothing 360 | ) 361 | 362 | 363 | {-| Parse a value at a named column. There are a number of ways to provide 364 | these names, see [`FieldNames`](#FieldNames) 365 | 366 | decodeCsv 367 | FieldNamesFromFirstRow 368 | (field "Country" string) 369 | "Country\r\nArgentina" 370 | --> Ok [ "Argentina" ] 371 | 372 | -} 373 | field : String -> Decoder a -> Decoder a 374 | field name (Decoder decoder) = 375 | Decoder (\_ fieldNames row -> decoder (Field_ name) fieldNames row) 376 | 377 | 378 | {-| Like `field`, parse a value at a named column. The parsing succeeds even if the column is missing. 379 | 380 | decodeCsv 381 | FieldNamesFromFirstRow 382 | (optionalField "Country" string) 383 | "Country\r\nArgentina" 384 | --> Ok [ Just "Argentina" ] 385 | 386 | 387 | decodeCsv 388 | FieldNamesFromFirstRow 389 | (optionalField "Country" string) 390 | "Pie\r\nApple" 391 | --> Ok [ Nothing ] 392 | 393 | -} 394 | optionalField : String -> Decoder a -> Decoder (Maybe a) 395 | optionalField name (Decoder decoder) = 396 | Decoder 397 | (\_ fieldNames rowNum row -> 398 | if Dict.member name fieldNames.names then 399 | Result.map Just (decoder (Field_ name) fieldNames rowNum row) 400 | 401 | else 402 | Ok Nothing 403 | ) 404 | 405 | 406 | {-| Returns all available field names. The behavior depends on your configuration: 407 | 408 | - `NoFieldNames`: The decoder fails. 409 | - `CustomFieldNames`: Decodes to the provided list. 410 | - `FieldNamesFromFirstRow`: Returns the first row of the CSV. 411 | 412 | -} 413 | availableFields : Decoder (List String) 414 | availableFields = 415 | Decoder 416 | (\_ fieldNames _ _ -> 417 | if fieldNames.available then 418 | Ok 419 | (Dict.toList fieldNames.names 420 | |> List.sortBy Tuple.second 421 | |> List.map Tuple.first 422 | ) 423 | 424 | else 425 | Err [ NoFieldNamesProvided ] 426 | ) 427 | 428 | 429 | 430 | -- RUN DECODERS 431 | 432 | 433 | {-| Where do we get names for use with [`field`](#field)? 434 | 435 | - `NoFieldNames`: don't get field names at all. [`field`](#field) will 436 | always fail. 437 | - `CustomFieldNames`: use the provided field names in order (so `["Id", "Name"]` 438 | will mean that "Id" is in column 0 and "Name" is in column 1.) 439 | - `FieldNamesFromFirstRow`: use the first row of the CSV as the source of 440 | field names. 441 | 442 | -} 443 | type FieldNames 444 | = NoFieldNames 445 | | CustomFieldNames (List String) 446 | | FieldNamesFromFirstRow 447 | 448 | 449 | getFieldNames : FieldNames -> List (List String) -> Result Error ( ResolvedNames, Int, List (List String) ) 450 | getFieldNames headers rows = 451 | let 452 | fromList : List String -> Dict String Int 453 | fromList names = 454 | names 455 | |> List.foldl 456 | (\name ( soFar, i ) -> 457 | ( Dict.insert name i soFar 458 | , i + 1 459 | ) 460 | ) 461 | ( Dict.empty, 0 ) 462 | |> Tuple.first 463 | in 464 | case headers of 465 | NoFieldNames -> 466 | Ok ( { names = Dict.empty, available = False }, 0, rows ) 467 | 468 | CustomFieldNames names -> 469 | Ok ( { names = fromList names, available = True }, 0, rows ) 470 | 471 | FieldNamesFromFirstRow -> 472 | case rows of 473 | [] -> 474 | Err NoFieldNamesOnFirstRow 475 | 476 | first :: rest -> 477 | Ok ( { names = fromList (List.map String.trim first), available = True }, 1, rest ) 478 | 479 | 480 | {-| Convert a CSV string into some type you care about using the 481 | [`Decoder`](#Decoder)s in this module! 482 | -} 483 | decodeCsv : FieldNames -> Decoder a -> String -> Result Error (List a) 484 | decodeCsv = 485 | decodeCustom { fieldSeparator = ',' } 486 | 487 | 488 | {-| Convert something shaped roughly like a CSV. For example, to decode 489 | a TSV (_tab_-separated values) string: 490 | 491 | decodeCustom { fieldSeparator = '\t' } 492 | NoFieldNames 493 | (map2 Tuple.pair 494 | (column 0 int) 495 | (column 1 string) 496 | ) 497 | "1\tBrian\n2\tAtlas" 498 | --> Ok [ ( 1, "Brian" ), ( 2, "Atlas" ) ] 499 | 500 | -} 501 | decodeCustom : { fieldSeparator : Char } -> FieldNames -> Decoder a -> String -> Result Error (List a) 502 | decodeCustom config fieldNames decoder source = 503 | Parser.parse config source 504 | |> Result.mapError ParsingError 505 | |> Result.andThen (applyDecoder fieldNames decoder) 506 | 507 | 508 | applyDecoder : FieldNames -> Decoder a -> List (List String) -> Result Error (List a) 509 | applyDecoder fieldNames (Decoder decode) allRows = 510 | let 511 | defaultLocation : Location 512 | defaultLocation = 513 | OnlyColumn_ 514 | in 515 | Result.andThen 516 | (\( resolvedNames, firstRowNumber, rows ) -> 517 | rows 518 | |> List.foldl 519 | (\row ( soFar, rowNum ) -> 520 | ( case decode defaultLocation resolvedNames rowNum row of 521 | Ok val -> 522 | case soFar of 523 | Ok values -> 524 | Ok (val :: values) 525 | 526 | Err errs -> 527 | Err errs 528 | 529 | Err err -> 530 | case soFar of 531 | Ok _ -> 532 | Err [ err ] 533 | 534 | Err errs -> 535 | Err (err :: errs) 536 | , rowNum + 1 537 | ) 538 | ) 539 | ( Ok [], firstRowNumber ) 540 | |> Tuple.first 541 | |> Result.map List.reverse 542 | |> Result.mapError (DecodingErrors << List.concat << List.reverse) 543 | ) 544 | (getFieldNames fieldNames allRows) 545 | 546 | 547 | {-| Sometimes we cannot decode every row in a CSV. This is how we tell 548 | you what went wrong. If you need to present this to someone, you can get a 549 | human-readable version with [`errorToString`](#errorToString) 550 | 551 | Some more detail: 552 | 553 | - `ParsingError`: there was a problem parsing the CSV into rows and 554 | columns. All these errors have to do with quoting issues. Check that 555 | any quoted fields are closed and that quotes are escaped. 556 | - `NoFieldNamesOnFirstRow`: we tried to get the field names from the first 557 | row (using [`FieldNames`](#FieldNames)) but couldn't find any, probably 558 | because the input was blank. 559 | - `DecodingErrors`: we couldn't decode a value using the specified 560 | decoder. See [`DecodingError`](#DecodingError) for more details. 561 | 562 | -} 563 | type Error 564 | = ParsingError Parser.Problem 565 | | NoFieldNamesOnFirstRow 566 | | DecodingErrors (List DecodingError) 567 | 568 | 569 | {-| Errors when decoding can either be: 570 | 571 | - Focused on decoding a single field (`FieldDecodingError`), in which case there 572 | is a specific [`Problem`](#Problem) in a specific location. 573 | - A result of a [`oneOf`](#oneOf) where all branches failed (`OneOfDecodingError`). 574 | - A problem with the header row or configuration where a column is simply 575 | missing (`FieldNotProvided`). 576 | - Calling `availableFields` when `NoFieldNames` was passed. 577 | 578 | -} 579 | type DecodingError 580 | = FieldDecodingError { row : Int, column : Column, problem : Problem } 581 | | OneOfDecodingError Int (List DecodingError) 582 | | FieldNotProvided String 583 | | NoFieldNamesProvided 584 | 585 | 586 | {-| Where did the problem happen? 587 | 588 | - `Column`: at the given column number 589 | - `Field`: at the given named column (with optional column number if we were 590 | able to look up what column we _should_ have found.) 591 | - `OnlyColumn`: at the only column in the row 592 | 593 | -} 594 | type Column 595 | = Column Int 596 | | Field String (Maybe Int) 597 | | OnlyColumn 598 | 599 | 600 | locationToColumn : Dict String Int -> Location -> Column 601 | locationToColumn fieldNames location = 602 | case location of 603 | Column_ i -> 604 | Column i 605 | 606 | Field_ name -> 607 | Field name (Dict.get name fieldNames) 608 | 609 | OnlyColumn_ -> 610 | OnlyColumn 611 | 612 | 613 | {-| Things that can go wrong while decoding: 614 | 615 | - `ColumnNotFound Int` and `FieldNotFound String`: we looked for the 616 | specified column, but couldn't find it. The argument specifies where we 617 | tried to look. 618 | - `ExpectedOneColumn Int`: basic decoders like [`string`](#string) and 619 | [`int`](#int) expect to find a single column per row. If there are multiple 620 | columns, and you don't specify which to use with [`column`](#column) 621 | or [`field`](#field), you'll get this error. The argument says how many 622 | columns we found. 623 | - `ExpectedInt String` and `ExpectedFloat String`: we failed to parse a 624 | string into a number. The argument specifies the string we got. 625 | - `Failure`: we got a custom failure message from [`fail`](#fail). 626 | 627 | -} 628 | type Problem 629 | = ColumnNotFound Int 630 | | FieldNotFound String 631 | | ExpectedOneColumn Int 632 | | ExpectedInt String 633 | | ExpectedFloat String 634 | | Failure String 635 | 636 | 637 | {-| Produce a human-readable version of an [`Error`](#Error)?! 638 | -} 639 | errorToString : Error -> String 640 | errorToString error = 641 | case error of 642 | ParsingError (Parser.SourceEndedWithoutClosingQuote row) -> 643 | "The source ended on row " ++ String.fromInt row ++ " in a quoted field without a closing quote." 644 | 645 | ParsingError (Parser.AdditionalCharactersAfterClosingQuote row) -> 646 | "On row " ++ String.fromInt row ++ " in the source, there were additional characters in a field after a closing quote." 647 | 648 | NoFieldNamesOnFirstRow -> 649 | "I expected to see field names on the first row, but there were none." 650 | 651 | DecodingErrors errs -> 652 | let 653 | problemString : Problem -> String 654 | problemString problem = 655 | case problem of 656 | ColumnNotFound i -> 657 | "I couldn't find column #" ++ String.fromInt i ++ "." 658 | 659 | FieldNotFound name -> 660 | "I couldn't find the `" ++ name ++ "` column." 661 | 662 | ExpectedOneColumn howMany -> 663 | "I expected exactly one column, but there were " ++ String.fromInt howMany ++ "." 664 | 665 | ExpectedInt notInt -> 666 | "I could not parse an int from `" ++ notInt ++ "`." 667 | 668 | ExpectedFloat notFloat -> 669 | "I could not parse a float from `" ++ notFloat ++ "`." 670 | 671 | Failure custom -> 672 | custom 673 | 674 | columnString : { b | column : Column } -> String 675 | columnString err = 676 | case err.column of 677 | Column col -> 678 | "column " ++ String.fromInt col 679 | 680 | Field name Nothing -> 681 | "in the `" ++ name ++ "` field" 682 | 683 | Field name (Just col) -> 684 | "in the `" ++ name ++ "` field (column " ++ String.fromInt col ++ ")" 685 | 686 | OnlyColumn -> 687 | "column 0 (the only column present)" 688 | 689 | rowString : { a | startRow : Int, endRow : Int } -> String 690 | rowString loc = 691 | case loc.endRow - loc.startRow of 692 | 0 -> 693 | "row " ++ String.fromInt loc.startRow 694 | 695 | 1 -> 696 | "rows " ++ String.fromInt loc.startRow ++ " and " ++ String.fromInt loc.endRow 697 | 698 | _ -> 699 | "rows " ++ String.fromInt loc.startRow ++ "–" ++ String.fromInt loc.endRow 700 | 701 | errString : DecodingError -> String 702 | errString err = 703 | case err of 704 | FieldDecodingError fde -> 705 | columnString fde 706 | ++ ": " 707 | ++ problemString fde.problem 708 | 709 | OneOfDecodingError _ oodes -> 710 | "all of the following decoders failed, but at least one must succeed:\n" 711 | ++ String.join "\n" 712 | (List.indexedMap 713 | (\i e -> 714 | " (" ++ String.fromInt (i + 1) ++ ") " ++ errString e 715 | ) 716 | oodes 717 | ) 718 | 719 | FieldNotProvided name -> 720 | "field " ++ name ++ " was not provided" 721 | 722 | NoFieldNamesProvided -> 723 | "Asked for available fields, but none were provided" 724 | 725 | topLevelErrString : { startRow : Int, endRow : Int, error : DecodingError } -> String 726 | topLevelErrString err = 727 | (case err.error of 728 | FieldDecodingError _ -> 729 | "There was a problem on " ++ rowString err ++ ", " 730 | 731 | OneOfDecodingError _ _ -> 732 | "There was a problem on " ++ rowString err ++ " - " 733 | 734 | FieldNotProvided _ -> 735 | "There was a problem in the header: " 736 | 737 | NoFieldNamesProvided -> 738 | "" 739 | ) 740 | ++ errString err.error 741 | 742 | isContiguous : DecodingError -> DecodingError -> Bool 743 | isContiguous errA errB = 744 | case ( errA, errB ) of 745 | ( FieldDecodingError a, FieldDecodingError b ) -> 746 | a.problem == b.problem && a.row + 1 == b.row && a.column == b.column 747 | 748 | ( OneOfDecodingError aRow aList, OneOfDecodingError bRow bList ) -> 749 | aRow + 1 == bRow && List.length aList == List.length bList && List.all identity (List.map2 isContiguous aList bList) 750 | 751 | _ -> 752 | errA == errB 753 | 754 | getRow : DecodingError -> Int 755 | getRow decErr = 756 | case decErr of 757 | FieldDecodingError e -> 758 | e.row 759 | 760 | OneOfDecodingError row _ -> 761 | row 762 | 763 | FieldNotProvided _ -> 764 | 0 765 | 766 | NoFieldNamesProvided -> 767 | 0 768 | 769 | dedupeHelp : 770 | List { startRow : Int, endRow : Int, error : DecodingError } 771 | -> List DecodingError 772 | -> List DecodingError 773 | -> List { startRow : Int, endRow : Int, error : DecodingError } 774 | dedupeHelp soFar prevGroup errors = 775 | case errors of 776 | [] -> 777 | case prevGroup of 778 | [] -> 779 | List.reverse soFar 780 | 781 | head :: tail -> 782 | List.reverse ({ startRow = List.reverse tail |> List.head |> Maybe.withDefault head |> getRow, endRow = getRow head, error = head } :: soFar) 783 | 784 | err :: rest -> 785 | case prevGroup of 786 | [] -> 787 | dedupeHelp soFar (err :: prevGroup) rest 788 | 789 | head :: tail -> 790 | if isContiguous head err then 791 | dedupeHelp soFar (err :: prevGroup) rest 792 | 793 | else 794 | dedupeHelp ({ startRow = List.reverse tail |> List.head |> Maybe.withDefault head |> getRow, endRow = getRow head, error = head } :: soFar) [ err ] rest 795 | 796 | dedupeErrs : List DecodingError -> List { startRow : Int, endRow : Int, error : DecodingError } 797 | dedupeErrs = 798 | List.sortBy 799 | (\err -> 800 | case err of 801 | FieldDecodingError { problem, row } -> 802 | case problem of 803 | ColumnNotFound _ -> 804 | ( 1, "", row ) 805 | 806 | FieldNotFound name -> 807 | ( 2, name, row ) 808 | 809 | ExpectedOneColumn howMany -> 810 | ( 3, String.fromInt howMany, row ) 811 | 812 | ExpectedInt notInt -> 813 | ( 4, notInt, row ) 814 | 815 | ExpectedFloat notFloat -> 816 | ( 5, notFloat, row ) 817 | 818 | Failure custom -> 819 | ( 6, custom, row ) 820 | 821 | OneOfDecodingError row list -> 822 | -- This isn't completely foolproof, as if there are more than one 823 | -- OneOfDecodingErrors per row that have the same number of branches, 824 | -- this will fail to group them. 825 | ( 7, String.fromInt (List.length list), row ) 826 | 827 | FieldNotProvided name -> 828 | ( 8, name, 0 ) 829 | 830 | NoFieldNamesProvided -> 831 | ( 9, "", 0 ) 832 | ) 833 | >> dedupeHelp [] [] 834 | >> List.sortBy (\{ startRow } -> startRow) 835 | in 836 | case dedupeErrs errs of 837 | [] -> 838 | "Something went wrong, but I got an blank error list so I don't know what it was. Please open an issue!" 839 | 840 | [ only ] -> 841 | topLevelErrString only 842 | 843 | multiple -> 844 | "I saw " 845 | ++ String.fromInt (List.length multiple) 846 | ++ " problems while decoding this CSV:\n\n" 847 | ++ String.join "\n\n" (List.map topLevelErrString multiple) 848 | 849 | 850 | 851 | -- MAPPING 852 | 853 | 854 | {-| Transform a decoded value. 855 | 856 | decodeCsv NoFieldNames (map (\i -> i * 2) int) "15" 857 | --> Ok [ 30 ] 858 | 859 | decodeCsv NoFieldNames (map String.reverse string) "slap" 860 | --> Ok [ "pals" ] 861 | 862 | -} 863 | map : (from -> to) -> Decoder from -> Decoder to 864 | map transform (Decoder decoder) = 865 | Decoder (\location fieldNames rowNum row -> decoder location fieldNames rowNum row |> Result.map transform) 866 | 867 | 868 | {-| Combine two decoders to make something else. 869 | 870 | decodeCsv NoFieldNames 871 | (map2 Tuple.pair 872 | (column 0 int) 873 | (column 1 string) 874 | ) 875 | "1,Atlas" 876 | --> Ok [ (1, "Atlas") ] 877 | 878 | -} 879 | map2 : (a -> b -> c) -> Decoder a -> Decoder b -> Decoder c 880 | map2 transform (Decoder decodeA) (Decoder decodeB) = 881 | Decoder 882 | (\location fieldNames rowNum row -> 883 | case ( decodeA location fieldNames rowNum row, decodeB location fieldNames rowNum row ) of 884 | ( Ok a, Ok b ) -> 885 | Ok (transform a b) 886 | 887 | ( Err a, Err b ) -> 888 | Err (a ++ b) 889 | 890 | ( Err a, _ ) -> 891 | Err a 892 | 893 | ( _, Err b ) -> 894 | Err b 895 | ) 896 | 897 | 898 | {-| Like [`map2`](#map2), but with three decoders. `map4` and beyond don't 899 | exist in this package. Use [`into`](#into) to decode records instead! 900 | 901 | decodeCsv NoFieldNames 902 | (map3 (\r g b -> (r, g, b)) 903 | (column 0 int) 904 | (column 1 int) 905 | (column 2 int) 906 | ) 907 | "255,255,0" 908 | --> Ok [ (255, 255, 0) ] 909 | 910 | -} 911 | map3 : (a -> b -> c -> d) -> Decoder a -> Decoder b -> Decoder c -> Decoder d 912 | map3 transform (Decoder decodeA) (Decoder decodeB) (Decoder decodeC) = 913 | Decoder 914 | (\location fieldNames rowNum row -> 915 | case 916 | ( decodeA location fieldNames rowNum row 917 | , decodeB location fieldNames rowNum row 918 | , decodeC location fieldNames rowNum row 919 | ) 920 | of 921 | ( Ok a, Ok b, Ok c ) -> 922 | Ok (transform a b c) 923 | 924 | ( Err a, Err b, Err c ) -> 925 | Err (a ++ b ++ c) 926 | 927 | ( Err a, Err b, _ ) -> 928 | Err (a ++ b) 929 | 930 | ( _, Err b, Err c ) -> 931 | Err (b ++ c) 932 | 933 | ( Err a, _, Err c ) -> 934 | Err (a ++ c) 935 | 936 | ( _, _, Err c ) -> 937 | Err c 938 | 939 | ( _, Err b, _ ) -> 940 | Err b 941 | 942 | ( Err a, _, _ ) -> 943 | Err a 944 | ) 945 | 946 | 947 | {-| Combine an arbitrary amount of fields. You provide a function that takes 948 | as many arguments as you need, then send it values by providing decoders with 949 | [`pipeline`](#pipeline). 950 | 951 | type alias Pet = 952 | { id : Int 953 | , name : String 954 | , species : String 955 | , weight : Float 956 | } 957 | 958 | petDecoder : Decoder Pet 959 | petDecoder = 960 | into Pet 961 | |> pipeline (column 0 int) 962 | |> pipeline (column 1 string) 963 | |> pipeline (column 2 string) 964 | |> pipeline (column 3 float) 965 | 966 | Now you can decode pets like this: 967 | 968 | decodeCsv NoFieldNames petDecoder "1,Atlas,cat,14\r\n2,Axel,puffin,1.37" 969 | --> Ok 970 | --> [ { id = 1, name = "Atlas", species = "cat", weight = 14 } 971 | --> , { id = 2, name = "Axel", species = "puffin", weight = 1.37 } 972 | --> ] 973 | 974 | -} 975 | into : (a -> b) -> Decoder (a -> b) 976 | into = 977 | succeed 978 | 979 | 980 | {-| See [`into`](#into). 981 | -} 982 | pipeline : Decoder a -> Decoder (a -> b) -> Decoder b 983 | pipeline = 984 | map2 (\value fn -> fn value) 985 | 986 | 987 | 988 | -- FANCY DECODING 989 | 990 | 991 | {-| Try several possible decoders in sequence, committing to the first one 992 | that passes. 993 | 994 | decodeCsv NoFieldNames 995 | (oneOf 996 | (map Just int) 997 | [ succeed Nothing ] 998 | ) 999 | "1" 1000 | --> Ok [ Just 1 ] 1001 | 1002 | decodeCsv NoFieldNames 1003 | (oneOf 1004 | (map Just int) 1005 | [ succeed Nothing ] 1006 | ) 1007 | "a" 1008 | --> Ok [ Nothing ] 1009 | 1010 | -} 1011 | oneOf : Decoder a -> List (Decoder a) -> Decoder a 1012 | oneOf first rest = 1013 | case rest of 1014 | [] -> 1015 | first 1016 | 1017 | next :: others -> 1018 | recover first (oneOf next others) 1019 | 1020 | 1021 | recover : Decoder a -> Decoder a -> Decoder a 1022 | recover (Decoder first) (Decoder second) = 1023 | Decoder <| 1024 | \location fieldNames rowNum row -> 1025 | case first location fieldNames rowNum row of 1026 | Ok value -> 1027 | Ok value 1028 | 1029 | Err errs -> 1030 | case second location fieldNames rowNum row of 1031 | Ok value -> 1032 | Ok value 1033 | 1034 | Err [ OneOfDecodingError _ problems ] -> 1035 | Err [ OneOfDecodingError rowNum (errs ++ problems) ] 1036 | 1037 | Err problems -> 1038 | Err [ OneOfDecodingError rowNum (errs ++ problems) ] 1039 | 1040 | 1041 | {-| Decode some value _and then_ make a decoding decision based on the 1042 | outcome. For example, if you wanted to reject negative numbers, you might 1043 | do something like this: 1044 | 1045 | positiveInt : Decoder Int 1046 | positiveInt = 1047 | int 1048 | |> andThen 1049 | (\rawInt -> 1050 | if rawInt < 0 then 1051 | Decode.fail "Only positive numbers allowed!" 1052 | 1053 | else 1054 | Decode.succeed rawInt 1055 | ) 1056 | 1057 | You could then use it like this: 1058 | 1059 | decodeCsv NoFieldNames positiveInt "1" -- Ok [ 1 ] 1060 | 1061 | decodeCsv NoFieldNames positiveInt "-1" 1062 | -- Err { row = 0, problem = Failure "Only positive numbers allowed!" } 1063 | 1064 | -} 1065 | andThen : (a -> Decoder b) -> Decoder a -> Decoder b 1066 | andThen next (Decoder first) = 1067 | Decoder 1068 | (\location fieldNames rowNum row -> 1069 | first location fieldNames rowNum row 1070 | |> Result.andThen 1071 | (\nextValue -> 1072 | let 1073 | (Decoder final) = 1074 | next nextValue 1075 | in 1076 | final location fieldNames rowNum row 1077 | ) 1078 | ) 1079 | 1080 | 1081 | {-| Always succeed, no matter what. Mostly useful with [`andThen`](#andThen). 1082 | -} 1083 | succeed : a -> Decoder a 1084 | succeed value = 1085 | Decoder (\_ _ _ _ -> Ok value) 1086 | 1087 | 1088 | {-| Always fail with the given message, no matter what. Mostly useful with 1089 | [`andThen`](#andThen). 1090 | -} 1091 | fail : String -> Decoder a 1092 | fail message = 1093 | Decoder 1094 | (\location { names } rowNum _ -> 1095 | Err 1096 | [ FieldDecodingError 1097 | { row = rowNum 1098 | , column = locationToColumn names location 1099 | , problem = Failure message 1100 | } 1101 | ] 1102 | ) 1103 | 1104 | 1105 | {-| Make creating custom decoders a little easier. If you already have a 1106 | function that parses into something you care about, you can combine it with 1107 | this. 1108 | 1109 | For example, here's how you could parse a hexadecimal number with 1110 | [`rtfeldman/elm-hex`](https://package.elm-lang.org/packages/rtfeldman/elm-hex/latest/): 1111 | 1112 | import Hex 1113 | 1114 | hex : Decoder Int 1115 | hex = 1116 | andThen 1117 | (\value -> fromResult (Hex.fromString value)) 1118 | string 1119 | 1120 | decodeCsv NoFieldNames hex "ff" 1121 | --> Ok [ 255 ] 1122 | 1123 | -} 1124 | fromResult : Result String a -> Decoder a 1125 | fromResult result = 1126 | case result of 1127 | Ok great -> 1128 | succeed great 1129 | 1130 | Err problem -> 1131 | fail problem 1132 | 1133 | 1134 | {-| Like [`fromResult`](#fromResult) but you have to specify the error 1135 | message since `Nothing` has no further information. 1136 | 1137 | For example, you could implement something like [`int`](#int) using this: 1138 | 1139 | myInt : Decoder Int 1140 | myInt = 1141 | andThen 1142 | (\value -> 1143 | fromMaybe "Expected an int" 1144 | (String.toInt value) 1145 | ) 1146 | string 1147 | 1148 | decodeCsv NoFieldNames myInt "123" 1149 | --> Ok [ 123 ] 1150 | 1151 | (That said, you probably want to use [`int`](#int) instead... it has better 1152 | error messages and is more tolerant of unusual situations!) 1153 | 1154 | -} 1155 | fromMaybe : String -> Maybe a -> Decoder a 1156 | fromMaybe problem maybe = 1157 | case maybe of 1158 | Just value -> 1159 | succeed value 1160 | 1161 | Nothing -> 1162 | fail problem 1163 | -------------------------------------------------------------------------------- /src/Csv/Encode.elm: -------------------------------------------------------------------------------- 1 | module Csv.Encode exposing (encode, Encoder, withFieldNames, withoutFieldNames) 2 | 3 | {-| 4 | 5 | @docs encode, Encoder, withFieldNames, withoutFieldNames 6 | 7 | -} 8 | 9 | import Dict 10 | 11 | 12 | {-| Describe how you want the output CSV to be shaped. Constructe 13 | encoders with [`withFieldNames`](#withFieldNames) and 14 | [`withoutFieldNames`](#withoutFieldNames). 15 | -} 16 | type Encoder a 17 | = WithFieldNames (a -> List ( String, String )) 18 | | WithoutFieldNames (a -> List String) 19 | 20 | 21 | {-| When provided a function that maps field names to values, this function 22 | uses it to produce a perfectly rectangular CSV. 23 | 24 | [ ( "FF", "FF", "FF" ) 25 | , ( "80", "80", "80" ) 26 | , ( "00", "00", "00" ) 27 | ] 28 | |> encode 29 | { encoder = 30 | withFieldNames 31 | (\( r, g, b ) -> 32 | [ ( "red", r ) 33 | , ( "green", g ) 34 | , ( "blue", b ) 35 | ] 36 | ) 37 | , fieldSeparator = ',' 38 | } 39 | --> "red,green,blue\r\nFF,FF,FF\r\n80,80,80\r\n00,00,00" 40 | 41 | The ordering of columns is determined by the order of values returned from 42 | the function. 43 | 44 | - If the function returns fields in an inconsistent order, we will determine 45 | a final ordering based on the average position of each column. 46 | 47 | - If the function sometimes omits `(field, value)` pairs, we will leave 48 | fields blank to avoid generating a misaligned CSV. 49 | 50 | -} 51 | withFieldNames : (a -> List ( String, String )) -> Encoder a 52 | withFieldNames = 53 | WithFieldNames 54 | 55 | 56 | {-| Encode your data however you like. This is the "live an exciting adventure" 57 | encoder in that it will let you output rows with uneven lengths. 58 | 59 | [ ( "FF", "FF", "FF" ) 60 | , ( "80", "80", "80" ) 61 | , ( "00", "00", "00" ) 62 | ] 63 | |> encode 64 | { encoder = withoutFieldNames (\(r,g, b) -> [ r, g, b ] ) 65 | , fieldSeparator = ',' 66 | } 67 | --> "FF,FF,FF\r\n80,80,80\r\n00,00,00" 68 | 69 | -} 70 | withoutFieldNames : (a -> List String) -> Encoder a 71 | withoutFieldNames = 72 | WithoutFieldNames 73 | 74 | 75 | {-| Encode some data to a CSV string, quoting and escaping characters as 76 | necessary. 77 | -} 78 | encode : 79 | { encoder : Encoder a 80 | , fieldSeparator : Char 81 | } 82 | -> List a 83 | -> String 84 | encode { encoder, fieldSeparator } items = 85 | let 86 | fieldSeparatorString : String 87 | fieldSeparatorString = 88 | String.fromChar fieldSeparator 89 | in 90 | items 91 | |> encodeItems encoder 92 | |> List.map (String.join fieldSeparatorString << List.map (quoteIfNecessary fieldSeparatorString)) 93 | |> String.join "\u{000D}\n" 94 | 95 | 96 | encodeItems : Encoder a -> List a -> List (List String) 97 | encodeItems encoder rows = 98 | case encoder of 99 | WithFieldNames convert -> 100 | let 101 | ( converted, namePositions ) = 102 | List.foldr 103 | (\row ( converted_, names ) -> 104 | let 105 | convertedRow : List ( String, String ) 106 | convertedRow = 107 | convert row 108 | in 109 | ( Dict.fromList convertedRow :: converted_ 110 | , List.foldl 111 | (\( name, _ ) ( soFar, column ) -> 112 | ( Dict.update name 113 | (\value -> 114 | case value of 115 | Just columns -> 116 | Just (column :: columns) 117 | 118 | Nothing -> 119 | Just [ column ] 120 | ) 121 | soFar 122 | , column + 1 123 | ) 124 | ) 125 | ( names, 0 ) 126 | convertedRow 127 | |> Tuple.first 128 | ) 129 | ) 130 | ( [], Dict.empty ) 131 | rows 132 | 133 | ordering : List String 134 | ordering = 135 | namePositions 136 | |> Dict.map (\_ positions -> List.sum positions / toFloat (List.length positions)) 137 | |> Dict.toList 138 | |> List.sortBy Tuple.second 139 | |> List.map Tuple.first 140 | in 141 | ordering 142 | :: List.map 143 | (\row -> 144 | List.map 145 | (\field -> Dict.get field row |> Maybe.withDefault "") 146 | ordering 147 | ) 148 | converted 149 | 150 | WithoutFieldNames convert -> 151 | List.map convert rows 152 | 153 | 154 | quoteIfNecessary : String -> String -> String 155 | quoteIfNecessary fieldSeparator value = 156 | if 157 | String.contains "\"" value 158 | || String.contains fieldSeparator value 159 | || String.contains "\u{000D}\n" value 160 | || String.contains "\n" value 161 | then 162 | "\"" ++ String.replace "\"" "\"\"" value ++ "\"" 163 | 164 | else 165 | value 166 | -------------------------------------------------------------------------------- /src/Csv/Parser.elm: -------------------------------------------------------------------------------- 1 | module Csv.Parser exposing (parse, Problem(..)) 2 | 3 | {-| CSV (and TSV) parsing. 4 | 5 | @docs parse, Problem 6 | 7 | -} 8 | 9 | 10 | {-| Something went wrong during parsing! What was it? 11 | 12 | - `SourceEndedWithoutClosingQuote`: we started parsing a quoted field, 13 | but the file ended before we saw a closing quote. If you meant to have 14 | a literal quote in your data, quote the whole field and then escape the 15 | literal quote by replacing it with `""`. For example, `": double prime` 16 | would be encoded as `""": double prime"`. 17 | - `AdditionalCharactersAfterClosingQuote`: we found the closing pair of a 18 | quoted field, but there was data after it but before a separator or the 19 | end of the file. Follow the quote-escaping advice above to get around this. 20 | 21 | -} 22 | type Problem 23 | = SourceEndedWithoutClosingQuote Int 24 | | AdditionalCharactersAfterClosingQuote Int 25 | 26 | 27 | {-| Turn a CSV string into a list of rows. Prefer using `Csv.Decode.decodeCsv` 28 | or `Csv.Decode.decodeCustom` unless you need something unusally custom (and 29 | please [open an issue](https://github.com/BrianHicks/elm-csv/issues/new) 30 | if so!) 31 | -} 32 | parse : { fieldSeparator : Char } -> String -> Result Problem (List (List String)) 33 | parse config source = 34 | let 35 | fieldSeparator : String 36 | fieldSeparator = 37 | String.fromChar config.fieldSeparator 38 | 39 | finalLength : Int 40 | finalLength = 41 | String.length source 42 | 43 | parseQuotedField : (String -> Bool) -> String -> Int -> Int -> Result (Int -> Problem) ( String, Int, Bool ) 44 | parseQuotedField isFieldSeparator soFar startOffset endOffset = 45 | if endOffset - finalLength >= 0 then 46 | Err SourceEndedWithoutClosingQuote 47 | 48 | else if String.slice endOffset (endOffset + 1) source == "\"" then 49 | let 50 | segment : String 51 | segment = 52 | String.slice startOffset endOffset source 53 | in 54 | if (endOffset + 1) - finalLength >= 0 then 55 | Ok 56 | ( soFar ++ segment 57 | , endOffset + 1 58 | , False 59 | ) 60 | 61 | else 62 | let 63 | next : String 64 | next = 65 | String.slice (endOffset + 1) (endOffset + 2) source 66 | in 67 | if next == "\"" then 68 | -- "" is a quoted ". Unescape it and keep going. 69 | let 70 | newPos : Int 71 | newPos = 72 | endOffset + 2 73 | in 74 | parseQuotedField 75 | isFieldSeparator 76 | (soFar ++ segment ++ "\"") 77 | newPos 78 | newPos 79 | 80 | else if isFieldSeparator next then 81 | Ok 82 | ( soFar ++ segment 83 | , endOffset + 2 84 | , False 85 | ) 86 | 87 | else if next == "\n" then 88 | Ok 89 | ( soFar ++ segment 90 | , endOffset + 2 91 | , True 92 | ) 93 | 94 | else if next == "\u{000D}" && String.slice (endOffset + 2) (endOffset + 3) source == "\n" then 95 | Ok 96 | ( soFar ++ segment 97 | , endOffset + 3 98 | , True 99 | ) 100 | 101 | else 102 | Err AdditionalCharactersAfterClosingQuote 103 | 104 | else 105 | parseQuotedField isFieldSeparator soFar startOffset (endOffset + 1) 106 | 107 | parseHelp : (String -> Bool) -> List String -> List (List String) -> Int -> Int -> Result Problem (List (List String)) 108 | parseHelp isFieldSeparator row rows startOffset endOffset = 109 | if endOffset - finalLength >= 0 then 110 | let 111 | finalField : String 112 | finalField = 113 | String.slice startOffset endOffset source 114 | in 115 | if finalField == "" && row == [] then 116 | Ok (List.reverse rows) 117 | 118 | else 119 | Ok (List.reverse (List.reverse (finalField :: row) :: rows)) 120 | 121 | else 122 | let 123 | first : String 124 | first = 125 | String.slice endOffset (endOffset + 1) source 126 | in 127 | if isFieldSeparator first then 128 | let 129 | newPos : Int 130 | newPos = 131 | endOffset + 1 132 | in 133 | parseHelp 134 | isFieldSeparator 135 | (String.slice startOffset endOffset source :: row) 136 | rows 137 | newPos 138 | newPos 139 | 140 | else if first == "\n" then 141 | let 142 | newPos : Int 143 | newPos = 144 | endOffset + 1 145 | in 146 | parseHelp 147 | isFieldSeparator 148 | [] 149 | (List.reverse (String.slice startOffset endOffset source :: row) :: rows) 150 | newPos 151 | newPos 152 | 153 | else if first == "\u{000D}" && String.slice (endOffset + 1) (endOffset + 2) source == "\n" then 154 | let 155 | newPos : Int 156 | newPos = 157 | endOffset + 2 158 | in 159 | parseHelp 160 | isFieldSeparator 161 | [] 162 | (List.reverse (String.slice startOffset endOffset source :: row) :: rows) 163 | newPos 164 | newPos 165 | 166 | else if first == "\"" then 167 | let 168 | newPos : Int 169 | newPos = 170 | endOffset + 1 171 | in 172 | case parseQuotedField isFieldSeparator "" newPos newPos of 173 | Ok ( value, afterQuotedField, rowEnded ) -> 174 | if afterQuotedField >= finalLength then 175 | Ok (List.reverse (List.reverse (value :: row) :: rows)) 176 | 177 | else if rowEnded then 178 | parseHelp isFieldSeparator [] (List.reverse (value :: row) :: rows) afterQuotedField afterQuotedField 179 | 180 | else 181 | parseHelp isFieldSeparator (value :: row) rows afterQuotedField afterQuotedField 182 | 183 | Err problem -> 184 | Err (problem (List.length rows + 1)) 185 | 186 | else 187 | parseHelp 188 | isFieldSeparator 189 | row 190 | rows 191 | startOffset 192 | (endOffset + 1) 193 | 194 | {- This and `parseSemicolon` below are just specialized versions of 195 | `parseHelp` that produce more efficient generated code. The whole 196 | trick here is to compare to literals instead of variables, which 197 | makes the Elm compiler produce code that compares with `===` 198 | instead of a helper function that implements value-level comparison. 199 | 200 | To update these functions, just copy the body of `parseHelp`, then: 201 | 202 | 1. replace the calls to `isFieldSeparator` with literal equality 203 | checks (e.g. `first == ","` in `parseComma`.) 204 | 2. create a new `isFieldSeparator` to pass to `parseQuotedField` 205 | that does the same. 206 | 207 | Benchmark numbers without these functions ported will appear to 208 | be much slower, but it's fine to temporarily disable them in the 209 | bottom `if` of this function to fix bugs and stuff. 210 | -} 211 | parseComma : List String -> List (List String) -> Int -> Int -> Result Problem (List (List String)) 212 | parseComma row rows startOffset endOffset = 213 | if endOffset - finalLength >= 0 then 214 | let 215 | finalField : String 216 | finalField = 217 | String.slice startOffset endOffset source 218 | in 219 | if finalField == "" && row == [] then 220 | Ok (List.reverse rows) 221 | 222 | else 223 | Ok (List.reverse (List.reverse (finalField :: row) :: rows)) 224 | 225 | else 226 | let 227 | first : String 228 | first = 229 | String.slice endOffset (endOffset + 1) source 230 | in 231 | if first == "," then 232 | let 233 | newPos : Int 234 | newPos = 235 | endOffset + 1 236 | in 237 | parseComma 238 | (String.slice startOffset endOffset source :: row) 239 | rows 240 | newPos 241 | newPos 242 | 243 | else if first == "\n" then 244 | let 245 | newPos : Int 246 | newPos = 247 | endOffset + 1 248 | in 249 | parseComma 250 | [] 251 | (List.reverse (String.slice startOffset endOffset source :: row) :: rows) 252 | newPos 253 | newPos 254 | 255 | else if first == "\u{000D}" && String.slice (endOffset + 1) (endOffset + 2) source == "\n" then 256 | let 257 | newPos : Int 258 | newPos = 259 | endOffset + 2 260 | in 261 | parseComma 262 | [] 263 | (List.reverse (String.slice startOffset endOffset source :: row) :: rows) 264 | newPos 265 | newPos 266 | 267 | else if first == "\"" then 268 | let 269 | newPos : Int 270 | newPos = 271 | endOffset + 1 272 | in 273 | case parseQuotedField (\c -> c == ",") "" newPos newPos of 274 | Ok ( value, afterQuotedField, rowEnded ) -> 275 | if afterQuotedField >= finalLength then 276 | Ok (List.reverse (List.reverse (value :: row) :: rows)) 277 | 278 | else if rowEnded then 279 | parseComma [] (List.reverse (value :: row) :: rows) afterQuotedField afterQuotedField 280 | 281 | else 282 | parseComma (value :: row) rows afterQuotedField afterQuotedField 283 | 284 | Err problem -> 285 | Err (problem (List.length rows + 1)) 286 | 287 | else 288 | parseComma 289 | row 290 | rows 291 | startOffset 292 | (endOffset + 1) 293 | 294 | parseSemicolon : List String -> List (List String) -> Int -> Int -> Result Problem (List (List String)) 295 | parseSemicolon row rows startOffset endOffset = 296 | if endOffset - finalLength >= 0 then 297 | let 298 | finalField : String 299 | finalField = 300 | String.slice startOffset endOffset source 301 | in 302 | if finalField == "" && row == [] then 303 | Ok (List.reverse rows) 304 | 305 | else 306 | Ok (List.reverse (List.reverse (finalField :: row) :: rows)) 307 | 308 | else 309 | let 310 | first : String 311 | first = 312 | String.slice endOffset (endOffset + 1) source 313 | in 314 | if first == ";" then 315 | let 316 | newPos : Int 317 | newPos = 318 | endOffset + 1 319 | in 320 | parseSemicolon 321 | (String.slice startOffset endOffset source :: row) 322 | rows 323 | newPos 324 | newPos 325 | 326 | else if first == "\n" then 327 | let 328 | newPos : Int 329 | newPos = 330 | endOffset + 1 331 | in 332 | parseSemicolon 333 | [] 334 | (List.reverse (String.slice startOffset endOffset source :: row) :: rows) 335 | newPos 336 | newPos 337 | 338 | else if first == "\u{000D}" && String.slice (endOffset + 1) (endOffset + 2) source == "\n" then 339 | let 340 | newPos : Int 341 | newPos = 342 | endOffset + 2 343 | in 344 | parseSemicolon 345 | [] 346 | (List.reverse (String.slice startOffset endOffset source :: row) :: rows) 347 | newPos 348 | newPos 349 | 350 | else if first == "\"" then 351 | let 352 | newPos : Int 353 | newPos = 354 | endOffset + 1 355 | in 356 | case parseQuotedField (\c -> c == ";") "" newPos newPos of 357 | Ok ( value, afterQuotedField, rowEnded ) -> 358 | if afterQuotedField >= finalLength then 359 | Ok (List.reverse (List.reverse (value :: row) :: rows)) 360 | 361 | else if rowEnded then 362 | parseSemicolon [] (List.reverse (value :: row) :: rows) afterQuotedField afterQuotedField 363 | 364 | else 365 | parseSemicolon (value :: row) rows afterQuotedField afterQuotedField 366 | 367 | Err problem -> 368 | Err (problem (List.length rows + 1)) 369 | 370 | else 371 | parseSemicolon 372 | row 373 | rows 374 | startOffset 375 | (endOffset + 1) 376 | in 377 | if String.isEmpty source then 378 | Ok [] 379 | 380 | else if config.fieldSeparator == ',' then 381 | parseComma [] [] 0 0 382 | 383 | else if config.fieldSeparator == ';' then 384 | parseSemicolon [] [] 0 0 385 | 386 | else 387 | parseHelp (\s -> s == fieldSeparator) [] [] 0 0 388 | -------------------------------------------------------------------------------- /tests/Csv/DecodeTest.elm: -------------------------------------------------------------------------------- 1 | module Csv.DecodeTest exposing (..) 2 | 3 | import Csv.Decode as Decode exposing (Column(..), Decoder, Error(..), FieldNames(..)) 4 | import Expect 5 | import Hex 6 | import Test exposing (..) 7 | 8 | 9 | stringTest : Test 10 | stringTest = 11 | describe "string" 12 | [ test "a blank string" <| 13 | \_ -> 14 | "\"\"" 15 | |> Decode.decodeCsv NoFieldNames Decode.string 16 | |> Expect.equal (Ok [ "" ]) 17 | , test "a unquoted value" <| 18 | \_ -> 19 | "a" 20 | |> Decode.decodeCsv NoFieldNames Decode.string 21 | |> Expect.equal (Ok [ "a" ]) 22 | , test "an integer" <| 23 | \_ -> 24 | "1" 25 | |> Decode.decodeCsv NoFieldNames Decode.string 26 | |> Expect.equal (Ok [ "1" ]) 27 | , test "multiple columns" <| 28 | \_ -> 29 | "1,2" 30 | |> Decode.decodeCsv NoFieldNames Decode.string 31 | |> Expect.equal 32 | (Err 33 | (DecodingErrors 34 | [ Decode.FieldDecodingError 35 | { row = 0 36 | , column = OnlyColumn 37 | , problem = Decode.ExpectedOneColumn 2 38 | } 39 | ] 40 | ) 41 | ) 42 | ] 43 | 44 | 45 | intTest : Test 46 | intTest = 47 | describe "int" 48 | [ test "a valid integer" <| 49 | \_ -> 50 | "1" 51 | |> Decode.decodeCsv NoFieldNames Decode.int 52 | |> Expect.equal (Ok [ 1 ]) 53 | , test "an integer with spaces around" <| 54 | \_ -> 55 | " 1 " 56 | |> Decode.decodeCsv NoFieldNames Decode.int 57 | |> Expect.equal (Ok [ 1 ]) 58 | , test "an invalid integer" <| 59 | \_ -> 60 | "a" 61 | |> Decode.decodeCsv NoFieldNames Decode.int 62 | |> Expect.equal 63 | (Err 64 | (DecodingErrors 65 | [ Decode.FieldDecodingError 66 | { row = 0 67 | , column = OnlyColumn 68 | , problem = Decode.ExpectedInt "a" 69 | } 70 | ] 71 | ) 72 | ) 73 | , test "multiple columns" <| 74 | \_ -> 75 | "1,2" 76 | |> Decode.decodeCsv NoFieldNames Decode.int 77 | |> Expect.equal 78 | (Err 79 | (DecodingErrors 80 | [ Decode.FieldDecodingError 81 | { row = 0 82 | , column = OnlyColumn 83 | , problem = Decode.ExpectedOneColumn 2 84 | } 85 | ] 86 | ) 87 | ) 88 | ] 89 | 90 | 91 | floatTest : Test 92 | floatTest = 93 | describe "float" 94 | [ test "a float shaped like an integer" <| 95 | \_ -> 96 | "1" 97 | |> Decode.decodeCsv NoFieldNames Decode.float 98 | |> Expect.equal (Ok [ 1.0 ]) 99 | , test "a float shaped like a floating-point number" <| 100 | \_ -> 101 | "3.14" 102 | |> Decode.decodeCsv NoFieldNames Decode.float 103 | |> Expect.equal (Ok [ 3.14 ]) 104 | , test "a float with spaces around" <| 105 | \_ -> 106 | " 3.14 " 107 | |> Decode.decodeCsv NoFieldNames Decode.float 108 | |> Expect.equal (Ok [ 3.14 ]) 109 | , test "an invalid float" <| 110 | \_ -> 111 | "a" 112 | |> Decode.decodeCsv NoFieldNames Decode.float 113 | |> Expect.equal 114 | (Err 115 | (DecodingErrors 116 | [ Decode.FieldDecodingError 117 | { row = 0 118 | , column = OnlyColumn 119 | , problem = Decode.ExpectedFloat "a" 120 | } 121 | ] 122 | ) 123 | ) 124 | , test "multiple columns" <| 125 | \_ -> 126 | "1,2" 127 | |> Decode.decodeCsv NoFieldNames Decode.float 128 | |> Expect.equal 129 | (Err 130 | (DecodingErrors 131 | [ Decode.FieldDecodingError 132 | { row = 0 133 | , column = OnlyColumn 134 | , problem = Decode.ExpectedOneColumn 2 135 | } 136 | ] 137 | ) 138 | ) 139 | ] 140 | 141 | 142 | blankTest : Test 143 | blankTest = 144 | describe "blank" 145 | [ test "when the field is blank" <| 146 | \_ -> 147 | "" 148 | |> Decode.decodeCsv NoFieldNames (Decode.blank Decode.int) 149 | |> Expect.equal (Ok []) 150 | , test "when the field contains spaces" <| 151 | \_ -> 152 | " " 153 | |> Decode.decodeCsv NoFieldNames (Decode.blank Decode.int) 154 | |> Expect.equal (Ok [ Nothing ]) 155 | , test "when the field contains whitespace characters" <| 156 | \_ -> 157 | "\"\u{00A0}\t\n\"" 158 | |> Decode.decodeCsv NoFieldNames (Decode.blank Decode.int) 159 | |> Expect.equal (Ok [ Nothing ]) 160 | , test "when the field is non-blank but not valid for the decoder" <| 161 | \_ -> 162 | "banana" 163 | |> Decode.decodeCsv NoFieldNames (Decode.blank Decode.int) 164 | |> Expect.equal 165 | (Err 166 | (DecodingErrors 167 | [ Decode.FieldDecodingError 168 | { row = 0 169 | , column = OnlyColumn 170 | , problem = Decode.ExpectedInt "banana" 171 | } 172 | ] 173 | ) 174 | ) 175 | , test "when the field is non-blank and valid for the decoder" <| 176 | \_ -> 177 | "1" 178 | |> Decode.decodeCsv NoFieldNames (Decode.blank Decode.int) 179 | |> Expect.equal (Ok [ Just 1 ]) 180 | ] 181 | 182 | 183 | columnTest : Test 184 | columnTest = 185 | describe "column" 186 | [ test "can get the only column" <| 187 | \_ -> 188 | "a" 189 | |> Decode.decodeCsv NoFieldNames (Decode.column 0 Decode.string) 190 | |> Expect.ok 191 | , test "can get an arbitrary column" <| 192 | \_ -> 193 | "a,b,c" 194 | |> Decode.decodeCsv NoFieldNames (Decode.column 1 Decode.string) 195 | |> Expect.equal (Ok [ "b" ]) 196 | , test "issues an error if the column doesn't exist" <| 197 | \_ -> 198 | "a" 199 | |> Decode.decodeCsv NoFieldNames (Decode.column 1 Decode.string) 200 | |> Expect.equal 201 | (Err 202 | (DecodingErrors 203 | [ Decode.FieldDecodingError 204 | { row = 0 205 | , column = Column 1 206 | , problem = Decode.ColumnNotFound 1 207 | } 208 | ] 209 | ) 210 | ) 211 | ] 212 | 213 | 214 | fieldTest : Test 215 | fieldTest = 216 | describe "field" 217 | [ test "fails when no field names are provided or present" <| 218 | \_ -> 219 | "a" 220 | |> Decode.decodeCsv NoFieldNames (Decode.field "Name" Decode.string) 221 | |> Expect.equal 222 | (Err 223 | (DecodingErrors [ Decode.FieldNotProvided "Name" ]) 224 | ) 225 | , test "fails when the provided headers don't contain the name" <| 226 | \_ -> 227 | "a" 228 | |> Decode.decodeCsv 229 | (CustomFieldNames []) 230 | (Decode.field "Name" Decode.string) 231 | |> Expect.equal 232 | (Err 233 | (DecodingErrors 234 | [ Decode.FieldNotProvided "Name" 235 | ] 236 | ) 237 | ) 238 | , test "fails when the first row doesn't contain the name" <| 239 | \_ -> 240 | "Blah\u{000D}\na" 241 | |> Decode.decodeCsv 242 | FieldNamesFromFirstRow 243 | (Decode.field "Name" Decode.string) 244 | |> Expect.equal 245 | (Err 246 | (DecodingErrors 247 | [ Decode.FieldNotProvided "Name" ] 248 | ) 249 | ) 250 | , test "fails when there is no first row" <| 251 | \_ -> 252 | "" 253 | |> Decode.decodeCsv 254 | FieldNamesFromFirstRow 255 | (Decode.field "Name" Decode.string) 256 | |> Expect.equal (Err Decode.NoFieldNamesOnFirstRow) 257 | , test "fails when name is not present in the first row" <| 258 | \_ -> 259 | "Bad\u{000D}\nAtlas" 260 | |> Decode.decodeCsv 261 | FieldNamesFromFirstRow 262 | (Decode.field "Name" Decode.string) 263 | |> Expect.equal 264 | (Err 265 | (DecodingErrors 266 | [ Decode.FieldNotProvided "Name" ] 267 | ) 268 | ) 269 | , test "fails when the associated column is not present in the row" <| 270 | \_ -> 271 | "Name,Other\u{000D}\nAtlas" 272 | |> Decode.decodeCsv 273 | FieldNamesFromFirstRow 274 | (Decode.field "Other" Decode.string) 275 | |> Expect.equal 276 | (Err 277 | (DecodingErrors 278 | [ Decode.FieldDecodingError 279 | { row = 1 280 | , column = Field "Other" (Just 1) 281 | , problem = Decode.FieldNotFound "Other" 282 | } 283 | ] 284 | ) 285 | ) 286 | , test "retrieves the field from custom-provided fields" <| 287 | \_ -> 288 | "a" 289 | |> Decode.decodeCsv 290 | (CustomFieldNames [ "Name" ]) 291 | (Decode.field "Name" Decode.string) 292 | |> Expect.equal (Ok [ "a" ]) 293 | , test "uses the headers on the first row, if present" <| 294 | \_ -> 295 | "Name\u{000D}\nAtlas" 296 | |> Decode.decodeCsv 297 | FieldNamesFromFirstRow 298 | (Decode.field "Name" Decode.string) 299 | |> Expect.equal 300 | (Ok [ "Atlas" ]) 301 | , test "uses the headers on the first row, trimmed" <| 302 | \_ -> 303 | " Name \u{000D}\nAtlas" 304 | |> Decode.decodeCsv 305 | FieldNamesFromFirstRow 306 | (Decode.field "Name" Decode.string) 307 | |> Expect.equal 308 | (Ok [ "Atlas" ]) 309 | , test "fails with the right line number after getting field names from the first row" <| 310 | \_ -> 311 | "Number\u{000D}\nnot a number" 312 | |> Decode.decodeCsv 313 | FieldNamesFromFirstRow 314 | (Decode.field "Number" Decode.int) 315 | |> Expect.equal 316 | (Err 317 | (DecodingErrors 318 | [ Decode.FieldDecodingError 319 | { row = 1 320 | , column = Field "Number" (Just 0) 321 | , problem = Decode.ExpectedInt "not a number" 322 | } 323 | ] 324 | ) 325 | ) 326 | ] 327 | 328 | 329 | mapTest : Test 330 | mapTest = 331 | describe "map functions" 332 | [ test "can map a single value" <| 333 | \_ -> 334 | "5" 335 | |> Decode.decodeCsv NoFieldNames (Decode.column 0 Decode.int |> Decode.map (\i -> i * 2)) 336 | |> Expect.equal (Ok [ 10 ]) 337 | , test "map2" <| 338 | \_ -> 339 | "1,Atlas" 340 | |> Decode.decodeCsv NoFieldNames 341 | (Decode.map2 Tuple.pair 342 | (Decode.column 0 Decode.int) 343 | (Decode.column 1 Decode.string) 344 | ) 345 | |> Expect.equal 346 | (Ok [ ( 1, "Atlas" ) ]) 347 | , test "map3" <| 348 | \_ -> 349 | "1,Atlas,Cat" 350 | |> Decode.decodeCsv NoFieldNames 351 | (Decode.map3 (\id name species -> ( id, name, species )) 352 | (Decode.column 0 Decode.int) 353 | (Decode.column 1 Decode.string) 354 | (Decode.column 2 Decode.string) 355 | ) 356 | |> Expect.equal 357 | (Ok [ ( 1, "Atlas", "Cat" ) ]) 358 | ] 359 | 360 | 361 | oneOfTest : Test 362 | oneOfTest = 363 | describe "oneOf" 364 | [ test "decodes a value" <| 365 | \_ -> 366 | "1" 367 | |> Decode.decodeCsv NoFieldNames (Decode.oneOf Decode.int []) 368 | |> Expect.equal (Ok [ 1 ]) 369 | , test "uses a fallback" <| 370 | \_ -> 371 | "a" 372 | |> Decode.decodeCsv NoFieldNames 373 | (Decode.oneOf 374 | (Decode.map Just Decode.int) 375 | [ Decode.succeed Nothing ] 376 | ) 377 | |> Expect.equal (Ok [ Nothing ]) 378 | , test "gives all the errors if all the decoders fail" <| 379 | \_ -> 380 | "a" 381 | |> Decode.decodeCsv NoFieldNames 382 | (Decode.oneOf 383 | (Decode.fail "ONE") 384 | [ Decode.fail "TWO" 385 | , Decode.fail "THREE" 386 | ] 387 | ) 388 | |> Expect.equal 389 | (Err 390 | (DecodingErrors 391 | [ Decode.OneOfDecodingError 0 392 | [ Decode.FieldDecodingError 393 | { row = 0 394 | , column = OnlyColumn 395 | , problem = Decode.Failure "ONE" 396 | } 397 | , Decode.FieldDecodingError 398 | { row = 0 399 | , column = OnlyColumn 400 | , problem = Decode.Failure "TWO" 401 | } 402 | , Decode.FieldDecodingError 403 | { row = 0 404 | , column = OnlyColumn 405 | , problem = Decode.Failure "THREE" 406 | } 407 | ] 408 | ] 409 | ) 410 | ) 411 | ] 412 | 413 | 414 | succeedTest : Test 415 | succeedTest = 416 | describe "succeed" 417 | [ test "ignores the values you send it in favor of the value you provide" <| 418 | \_ -> 419 | "a" 420 | |> Decode.decodeCsv NoFieldNames (Decode.succeed ()) 421 | |> Expect.equal (Ok [ () ]) 422 | , test "provides one value for each row" <| 423 | \_ -> 424 | "a\u{000D}\nb" 425 | |> Decode.decodeCsv NoFieldNames (Decode.succeed ()) 426 | |> Expect.equal (Ok [ (), () ]) 427 | ] 428 | 429 | 430 | failTest : Test 431 | failTest = 432 | describe "fail" 433 | [ test "ignores the values you send it in favor of the value you provide" <| 434 | \_ -> 435 | "a" 436 | |> Decode.decodeCsv NoFieldNames (Decode.fail "a nice description") 437 | |> Expect.equal 438 | (Err 439 | (DecodingErrors 440 | [ Decode.FieldDecodingError 441 | { row = 0 442 | , column = OnlyColumn 443 | , problem = Decode.Failure "a nice description" 444 | } 445 | ] 446 | ) 447 | ) 448 | , test "fails on every row where it's attempted" <| 449 | \_ -> 450 | "a\u{000D}\nb" 451 | |> Decode.decodeCsv NoFieldNames (Decode.fail "a nice description") 452 | |> Expect.equal 453 | (Err 454 | (DecodingErrors 455 | [ Decode.FieldDecodingError 456 | { row = 0 457 | , column = OnlyColumn 458 | , problem = Decode.Failure "a nice description" 459 | } 460 | , Decode.FieldDecodingError 461 | { row = 1 462 | , column = OnlyColumn 463 | , problem = Decode.Failure "a nice description" 464 | } 465 | ] 466 | ) 467 | ) 468 | ] 469 | 470 | 471 | andThenTest : Test 472 | andThenTest = 473 | describe "andThen" 474 | [ describe "for validation" <| 475 | let 476 | positiveInteger : Decoder Int 477 | positiveInteger = 478 | Decode.andThen 479 | (\value -> 480 | if value > 0 then 481 | Decode.succeed value 482 | 483 | else 484 | Decode.fail "Only positive integers are allowed!" 485 | ) 486 | Decode.int 487 | in 488 | [ test "allows positive integers" <| 489 | \_ -> 490 | "1" 491 | |> Decode.decodeCsv NoFieldNames positiveInteger 492 | |> Expect.equal (Ok [ 1 ]) 493 | , test "disallows negative integers" <| 494 | \_ -> 495 | "-1" 496 | |> Decode.decodeCsv NoFieldNames positiveInteger 497 | |> Expect.equal 498 | (Err 499 | (DecodingErrors 500 | [ Decode.FieldDecodingError 501 | { row = 0 502 | , column = OnlyColumn 503 | , problem = Decode.Failure "Only positive integers are allowed!" 504 | } 505 | ] 506 | ) 507 | ) 508 | ] 509 | , describe "for fields depending on each other" <| 510 | let 511 | followThePointer : Decoder String 512 | followThePointer = 513 | Decode.column 0 Decode.int 514 | |> Decode.andThen (\column -> Decode.column column Decode.string) 515 | in 516 | [ test "get the second column" <| 517 | \_ -> 518 | "1,a,b" 519 | |> Decode.decodeCsv NoFieldNames followThePointer 520 | |> Expect.equal (Ok [ "a" ]) 521 | , test "get the third column" <| 522 | \_ -> 523 | "2,a,b" 524 | |> Decode.decodeCsv NoFieldNames followThePointer 525 | |> Expect.equal (Ok [ "b" ]) 526 | , test "has a reasonable error message for missing a column" <| 527 | \_ -> 528 | "3,a,b" 529 | |> Decode.decodeCsv NoFieldNames followThePointer 530 | |> Expect.equal 531 | (Err 532 | (DecodingErrors 533 | [ Decode.FieldDecodingError 534 | { row = 0 535 | , column = Column 3 536 | , problem = Decode.ColumnNotFound 3 537 | } 538 | ] 539 | ) 540 | ) 541 | ] 542 | ] 543 | 544 | 545 | fromResultTest : Test 546 | fromResultTest = 547 | let 548 | hex : Decoder Int 549 | hex = 550 | Decode.string 551 | |> Decode.andThen (Decode.fromResult << Hex.fromString) 552 | in 553 | describe "fromResult" 554 | [ test "succeeds when the function returns Ok" <| 555 | \_ -> 556 | "ff" 557 | |> Decode.decodeCsv NoFieldNames hex 558 | |> Expect.equal (Ok [ 255 ]) 559 | , test "fails when the function returns Err" <| 560 | \_ -> 561 | "banana" 562 | |> Decode.decodeCsv NoFieldNames hex 563 | |> Expect.equal 564 | (Err 565 | (DecodingErrors 566 | [ Decode.FieldDecodingError 567 | { row = 0 568 | , column = OnlyColumn 569 | , problem = Decode.Failure "\"banana\" is not a valid hexadecimal string because n is not a valid hexadecimal character." 570 | } 571 | ] 572 | ) 573 | ) 574 | ] 575 | 576 | 577 | fromMaybeTest : Test 578 | fromMaybeTest = 579 | let 580 | myInt : Decoder Int 581 | myInt = 582 | Decode.string 583 | |> Decode.andThen (Decode.fromMaybe "Expected an int" << String.toInt) 584 | in 585 | describe "fromMaybe" 586 | [ test "succeeds when the function returns Just" <| 587 | \_ -> 588 | "123" 589 | |> Decode.decodeCsv NoFieldNames myInt 590 | |> Expect.equal (Ok [ 123 ]) 591 | , test "fails when the function returns Nothing" <| 592 | \_ -> 593 | "banana" 594 | |> Decode.decodeCsv NoFieldNames myInt 595 | |> Expect.equal 596 | (Err 597 | (DecodingErrors 598 | [ Decode.FieldDecodingError 599 | { row = 0 600 | , column = OnlyColumn 601 | , problem = Decode.Failure "Expected an int" 602 | } 603 | ] 604 | ) 605 | ) 606 | ] 607 | 608 | 609 | testErrorToString : Test 610 | testErrorToString = 611 | describe "errorToString" 612 | [ test "groups simple errors" <| 613 | \() -> 614 | "a\na\na" 615 | |> Decode.decodeCsv NoFieldNames Decode.int 616 | |> Result.mapError Decode.errorToString 617 | |> Expect.equal (Err "There was a problem on rows 0–2, column 0 (the only column present): I could not parse an int from `a`.") 618 | , test "groups simple errors in more complex settings - errors are sorted by row occurence" <| 619 | \() -> 620 | "a\na\nb\na\na" 621 | |> Decode.decodeCsv NoFieldNames Decode.int 622 | |> Result.mapError Decode.errorToString 623 | |> Expect.equal (Err "I saw 3 problems while decoding this CSV:\n\nThere was a problem on rows 0 and 1, column 0 (the only column present): I could not parse an int from `a`.\n\nThere was a problem on row 2, column 0 (the only column present): I could not parse an int from `b`.\n\nThere was a problem on rows 3 and 4, column 0 (the only column present): I could not parse an int from `a`.") 624 | , test "works with map2" <| 625 | \() -> 626 | "foo,bar\na,2\na,b\na,c" 627 | |> Decode.decodeCsv FieldNamesFromFirstRow (Decode.map2 Tuple.pair (Decode.field "foo" Decode.int) (Decode.field "bar" Decode.int)) 628 | |> Result.mapError Decode.errorToString 629 | |> Expect.equal (Err "I saw 3 problems while decoding this CSV:\n\nThere was a problem on rows 1–3, in the `foo` field (column 0): I could not parse an int from `a`.\n\nThere was a problem on row 2, in the `bar` field (column 1): I could not parse an int from `b`.\n\nThere was a problem on row 3, in the `bar` field (column 1): I could not parse an int from `c`.") 630 | , test "works with oneOf" <| 631 | \() -> 632 | "a\n1\n1.2" 633 | |> Decode.decodeCsv NoFieldNames (Decode.oneOf (Decode.map toFloat Decode.int) [ Decode.float ]) 634 | |> Result.mapError Decode.errorToString 635 | |> Expect.equal (Err "There was a problem on row 0 - all of the following decoders failed, but at least one must succeed:\n (1) column 0 (the only column present): I could not parse an int from `a`.\n (2) column 0 (the only column present): I could not parse a float from `a`.") 636 | , test "works with nested oneOf" <| 637 | \() -> 638 | "a\n1\n1.2" 639 | |> Decode.decodeCsv NoFieldNames (Decode.oneOf Decode.float [ Decode.oneOf (Decode.map toFloat Decode.int) [ Decode.float ] ]) 640 | |> Result.mapError Decode.errorToString 641 | |> Expect.equal (Err "There was a problem on row 0 - all of the following decoders failed, but at least one must succeed:\n (1) column 0 (the only column present): I could not parse a float from `a`.\n (2) column 0 (the only column present): I could not parse an int from `a`.\n (3) column 0 (the only column present): I could not parse a float from `a`.") 642 | , test "works with complex decoder" <| 643 | \() -> 644 | "foo,bar\na,2\na,b\na,c" 645 | |> Decode.decodeCsv FieldNamesFromFirstRow (Decode.map2 Tuple.pair (Decode.oneOf (Decode.field "foo" Decode.int) [ Decode.field "bar" (Decode.map round Decode.float) ]) (Decode.field "bar" Decode.int)) 646 | |> Result.mapError Decode.errorToString 647 | |> Expect.equal (Err "I saw 4 problems while decoding this CSV:\n\nThere was a problem on row 2, in the `bar` field (column 1): I could not parse an int from `b`.\n\nThere was a problem on row 2 - all of the following decoders failed, but at least one must succeed:\n (1) in the `foo` field (column 0): I could not parse an int from `a`.\n (2) in the `bar` field (column 1): I could not parse a float from `b`.\n\nThere was a problem on row 3, in the `bar` field (column 1): I could not parse an int from `c`.\n\nThere was a problem on row 3 - all of the following decoders failed, but at least one must succeed:\n (1) in the `foo` field (column 0): I could not parse an int from `a`.\n (2) in the `bar` field (column 1): I could not parse a float from `c`.") 648 | ] 649 | 650 | 651 | availableFieldsTest : Test 652 | availableFieldsTest = 653 | describe "availableFields" 654 | [ test "returns header row in order" <| 655 | \() -> 656 | "foo,bar\na,a\na,b\na,c" 657 | |> Decode.decodeCsv FieldNamesFromFirstRow Decode.availableFields 658 | |> Expect.equal (Ok [ [ "foo", "bar" ], [ "foo", "bar" ], [ "foo", "bar" ] ]) 659 | , test "allows conditional decoding based on header row" <| 660 | \() -> 661 | "foo,bar\na,a\na,b\na,c" 662 | |> Decode.decodeCsv FieldNamesFromFirstRow 663 | (Decode.availableFields 664 | |> Decode.andThen 665 | (\headers -> 666 | if List.member "bar" headers then 667 | Decode.field "bar" Decode.string 668 | 669 | else 670 | Decode.field "foo" Decode.string 671 | ) 672 | ) 673 | |> Expect.equal (Ok [ "a", "b", "c" ]) 674 | , test "returns configured fields" <| 675 | \() -> 676 | "\n" 677 | |> Decode.decodeCsv (CustomFieldNames [ "Foo", "Bar" ]) Decode.availableFields 678 | |> Expect.equal (Ok [ [ "Foo", "Bar" ] ]) 679 | , test "fails when no named fields" <| 680 | \() -> 681 | "\n" 682 | |> Decode.decodeCsv NoFieldNames Decode.availableFields 683 | |> Result.mapError Decode.errorToString 684 | |> Expect.equal (Err "Asked for available fields, but none were provided") 685 | ] 686 | -------------------------------------------------------------------------------- /tests/Csv/EncodeTest.elm: -------------------------------------------------------------------------------- 1 | module Csv.EncodeTest exposing (..) 2 | 3 | import Csv.Encode as Encode 4 | import Csv.Parser as Parser 5 | import Expect 6 | import Fuzz exposing (Fuzzer) 7 | import Test exposing (..) 8 | 9 | 10 | encodeTest : Test 11 | encodeTest = 12 | let 13 | pets : List { id : Int, name : String, species : String } 14 | pets = 15 | [ { id = 1, name = "Atlas", species = "cat" } 16 | , { id = 2, name = "Axel", species = "puffin" } 17 | ] 18 | in 19 | describe "encode" 20 | [ describe "without field names" <| 21 | [ test "encodes without field names" <| 22 | \_ -> 23 | pets 24 | |> Encode.encode 25 | { encoder = 26 | Encode.withoutFieldNames 27 | (\{ id, name, species } -> 28 | [ String.fromInt id 29 | , name 30 | , species 31 | ] 32 | ) 33 | , fieldSeparator = ',' 34 | } 35 | |> Expect.equal "1,Atlas,cat\u{000D}\n2,Axel,puffin" 36 | , test "escapes quotes" <| 37 | \_ -> 38 | [ "\"" ] 39 | |> Encode.encode 40 | { encoder = Encode.withoutFieldNames List.singleton 41 | , fieldSeparator = ',' 42 | } 43 | |> Expect.equal "\"\"\"\"" 44 | , test "escapes field separators (comma)" <| 45 | \_ -> 46 | [ "," ] 47 | |> Encode.encode 48 | { encoder = Encode.withoutFieldNames List.singleton 49 | , fieldSeparator = ',' 50 | } 51 | |> Expect.equal "\",\"" 52 | , test "escapes field separators (semicolon)" <| 53 | \_ -> 54 | [ ";" ] 55 | |> Encode.encode 56 | { encoder = Encode.withoutFieldNames List.singleton 57 | , fieldSeparator = ';' 58 | } 59 | |> Expect.equal "\";\"" 60 | , test "escapes newlines" <| 61 | \_ -> 62 | [ "\n" ] 63 | |> Encode.encode 64 | { encoder = Encode.withoutFieldNames List.singleton 65 | , fieldSeparator = ',' 66 | } 67 | |> Expect.equal "\"\n\"" 68 | , test "escapes row separators" <| 69 | \_ -> 70 | [ "\u{000D}\n" ] 71 | |> Encode.encode 72 | { encoder = Encode.withoutFieldNames List.singleton 73 | , fieldSeparator = ',' 74 | } 75 | |> Expect.equal "\"\u{000D}\n\"" 76 | ] 77 | , describe "with field names" 78 | [ test "encodes with field names" <| 79 | \_ -> 80 | pets 81 | |> Encode.encode 82 | { encoder = 83 | Encode.withFieldNames 84 | (\{ id, name, species } -> 85 | [ ( "ID", String.fromInt id ) 86 | , ( "Name", name ) 87 | , ( "Species", species ) 88 | ] 89 | ) 90 | , fieldSeparator = ',' 91 | } 92 | |> Expect.equal "ID,Name,Species\u{000D}\n1,Atlas,cat\u{000D}\n2,Axel,puffin" 93 | , test "uses a blank character when a field name is missing" <| 94 | \_ -> 95 | pets 96 | |> Encode.encode 97 | { encoder = 98 | Encode.withFieldNames 99 | (\{ id, name, species } -> 100 | if id == 1 then 101 | [ ( "ID", String.fromInt id ) 102 | , ( "Species", species ) 103 | ] 104 | 105 | else 106 | [ ( "ID", String.fromInt id ) 107 | , ( "Name", name ) 108 | , ( "Species", species ) 109 | ] 110 | ) 111 | , fieldSeparator = ',' 112 | } 113 | |> Expect.equal "ID,Name,Species\u{000D}\n1,,cat\u{000D}\n2,Axel,puffin" 114 | , test "uses the average of the field position when fields are sorted differently in different rows" <| 115 | \_ -> 116 | [ { id = "a", name = "z" } 117 | , { id = "z", name = "a" } 118 | ] 119 | |> Encode.encode 120 | { encoder = 121 | Encode.withFieldNames 122 | (\{ id, name } -> 123 | List.sortBy Tuple.second 124 | [ ( "ID", id ) 125 | , ( "Name", name ) 126 | ] 127 | ) 128 | , fieldSeparator = ',' 129 | } 130 | |> Expect.equal "ID,Name\u{000D}\na,z\u{000D}\nz,a" 131 | , test "uses the correct separator" <| 132 | \_ -> 133 | pets 134 | |> Encode.encode 135 | { encoder = 136 | Encode.withFieldNames 137 | (\{ id, name, species } -> 138 | [ ( "ID", String.fromInt id ) 139 | , ( "Name", name ) 140 | , ( "Species", species ) 141 | ] 142 | ) 143 | , fieldSeparator = ';' 144 | } 145 | |> Expect.equal "ID;Name;Species\u{000D}\n1;Atlas;cat\u{000D}\n2;Axel;puffin" 146 | ] 147 | ] 148 | 149 | 150 | roundTripTest : Test 151 | roundTripTest = 152 | fuzz2 weirdCsvFuzzer fieldSeparatorFuzzer "anything we encode, we can parse" <| 153 | \weirdCsv fieldSeparator -> 154 | weirdCsv 155 | |> Encode.encode 156 | { encoder = Encode.withoutFieldNames identity 157 | , fieldSeparator = fieldSeparator 158 | } 159 | |> Parser.parse { fieldSeparator = fieldSeparator } 160 | |> Expect.equal (Ok weirdCsv) 161 | 162 | 163 | weirdCsvFuzzer : Fuzzer (List (List String)) 164 | weirdCsvFuzzer = 165 | Fuzz.oneOf 166 | [ Fuzz.constant "a" 167 | , Fuzz.constant "b " 168 | , Fuzz.constant "\"" 169 | , Fuzz.constant "," 170 | , Fuzz.constant ";" 171 | , Fuzz.constant "\n" 172 | , Fuzz.constant "\u{000D}" 173 | ] 174 | |> nonEmptyList 175 | |> Fuzz.map String.concat 176 | |> nonEmptyList 177 | |> shortList 178 | 179 | 180 | fieldSeparatorFuzzer : Fuzzer Char 181 | fieldSeparatorFuzzer = 182 | Fuzz.oneOf 183 | [ Fuzz.constant ',' 184 | , Fuzz.constant ';' 185 | , Fuzz.constant '\t' 186 | ] 187 | 188 | 189 | nonEmptyList : Fuzzer a -> Fuzzer (List a) 190 | nonEmptyList fuzzer = 191 | Fuzz.map2 (::) fuzzer (shortList fuzzer) 192 | 193 | 194 | shortList : Fuzzer a -> Fuzzer (List a) 195 | shortList fuzzer = 196 | Fuzz.oneOf 197 | [ Fuzz.constant [] 198 | , Fuzz.map List.singleton fuzzer 199 | , Fuzz.map2 (\a b -> [ a, b ]) fuzzer fuzzer 200 | ] 201 | -------------------------------------------------------------------------------- /tests/Csv/ParserTest.elm: -------------------------------------------------------------------------------- 1 | module Csv.ParserTest exposing (..) 2 | 3 | import Csv.Parser as Parser exposing (parse) 4 | import Expect exposing (Expectation) 5 | import Test exposing (..) 6 | 7 | 8 | parseTest : Test 9 | parseTest = 10 | let 11 | configurations : List ( String, { rowSeparator : String, fieldSeparator : Char } ) 12 | configurations = 13 | [ ( "CRLF CSV (US locale)" 14 | , { rowSeparator = "\u{000D}\n" 15 | , fieldSeparator = ',' 16 | } 17 | ) 18 | , ( "CRLF CSV (EU locale, semicolon)" 19 | , { rowSeparator = "\u{000D}\n" 20 | , fieldSeparator = ';' 21 | } 22 | ) 23 | , ( "LF-only CSV" 24 | , { rowSeparator = "\n" 25 | , fieldSeparator = ',' 26 | } 27 | ) 28 | , ( "CRLF TSV" 29 | , { rowSeparator = "\u{000D}\n" 30 | , fieldSeparator = '\t' 31 | } 32 | ) 33 | , ( "LF-only TSV" 34 | , { rowSeparator = "\n" 35 | , fieldSeparator = '\t' 36 | } 37 | ) 38 | ] 39 | in 40 | configurations 41 | |> List.map 42 | (\( description, config ) -> 43 | describe description 44 | [ test "a single value" <| 45 | \_ -> 46 | expectRoundTrip config 47 | [ [ "a" ] ] 48 | , test "two fields" <| 49 | \_ -> 50 | expectRoundTrip config 51 | [ [ "a", "b" ] ] 52 | , test "two rows" <| 53 | \_ -> 54 | expectRoundTrip config 55 | [ [ "a" ] 56 | , [ "b" ] 57 | ] 58 | , test "two rows of two fields" <| 59 | \_ -> 60 | expectRoundTrip config 61 | [ [ "a", "b" ] 62 | , [ "c", "d" ] 63 | ] 64 | , test "blank fields" <| 65 | \_ -> 66 | expectRoundTrip config 67 | [ [ "", "", "" ] ] 68 | , test "only half of a row separator" <| 69 | \_ -> 70 | case String.uncons config.rowSeparator of 71 | Nothing -> 72 | -- really shouldn't ever happen but 73 | -- we'll let it slide here since it's 74 | -- caught in other places. 75 | Expect.pass 76 | 77 | Just ( _, "" ) -> 78 | -- not relevant here 79 | Expect.pass 80 | 81 | Just ( first, _ ) -> 82 | String.fromList [ first ] 83 | |> parse { fieldSeparator = config.fieldSeparator } 84 | |> Expect.equal (Ok [ [ String.fromList [ first ] ] ]) 85 | , describe "quoted values" 86 | [ test "quoted single values" <| 87 | \_ -> 88 | "\"a\"" 89 | |> parse { fieldSeparator = config.fieldSeparator } 90 | |> Expect.equal (Ok [ [ "a" ] ]) 91 | , test "quoted row separators" <| 92 | \_ -> 93 | ("\"" ++ config.rowSeparator ++ "\"") 94 | |> parse { fieldSeparator = config.fieldSeparator } 95 | |> Expect.equal (Ok [ [ config.rowSeparator ] ]) 96 | , test "quoted field separators" <| 97 | \_ -> 98 | ("\"" ++ String.fromChar config.fieldSeparator ++ "\"") 99 | |> parse { fieldSeparator = config.fieldSeparator } 100 | |> Expect.equal (Ok [ [ String.fromChar config.fieldSeparator ] ]) 101 | , test "quoted quotes" <| 102 | \_ -> 103 | "\"\"\"\"" 104 | |> parse { fieldSeparator = config.fieldSeparator } 105 | |> Expect.equal (Ok [ [ "\"" ] ]) 106 | , test "two quoted values in a row" <| 107 | \_ -> 108 | ("\"a\"" ++ String.fromChar config.fieldSeparator ++ "\"b\"") 109 | |> parse { fieldSeparator = config.fieldSeparator } 110 | |> Expect.equal (Ok [ [ "a", "b" ] ]) 111 | , test "two rows with quoted values" <| 112 | \_ -> 113 | ("\"a\"" ++ config.rowSeparator ++ "\"b\"") 114 | |> parse { fieldSeparator = config.fieldSeparator } 115 | |> Expect.equal (Ok [ [ "a" ], [ "b" ] ]) 116 | , test "a trailing newline should be ignored" <| 117 | -- https://github.com/BrianHicks/elm-csv/issues/8 118 | \_ -> 119 | (encode config 120 | [ [ "Country", "Population" ] 121 | , [ "Agentina", "44361150" ] 122 | , [ "Brazil", "212652000" ] 123 | ] 124 | ++ config.rowSeparator 125 | ) 126 | |> parse { fieldSeparator = config.fieldSeparator } 127 | |> Expect.equal 128 | (Ok 129 | [ [ "Country", "Population" ] 130 | , [ "Agentina", "44361150" ] 131 | , [ "Brazil", "212652000" ] 132 | ] 133 | ) 134 | , test "a trailing newline after a quoted field should be ignored" <| 135 | -- https://github.com/BrianHicks/elm-csv/issues/24 136 | \_ -> 137 | ("\"val\"" ++ config.rowSeparator) 138 | |> parse { fieldSeparator = config.fieldSeparator } 139 | |> Expect.equal (Ok [ [ "val" ] ]) 140 | , describe "errors" 141 | [ test "not ending a quoted value is an error" <| 142 | \_ -> 143 | "\"a" 144 | |> parse { fieldSeparator = config.fieldSeparator } 145 | |> Expect.equal (Err (Parser.SourceEndedWithoutClosingQuote 1)) 146 | , test "additional characters after the closing quote but before the field separator is an error" <| 147 | \_ -> 148 | ("\"a\"b" ++ config.rowSeparator) 149 | |> parse { fieldSeparator = config.fieldSeparator } 150 | |> Expect.equal (Err (Parser.AdditionalCharactersAfterClosingQuote 1)) 151 | , test "additional characters after the closing quote but before the row separator is an error" <| 152 | \_ -> 153 | ("\"a\"b" ++ String.fromChar config.fieldSeparator) 154 | |> parse { fieldSeparator = config.fieldSeparator } 155 | |> Expect.equal (Err (Parser.AdditionalCharactersAfterClosingQuote 1)) 156 | ] 157 | ] 158 | ] 159 | ) 160 | |> describe "parse" 161 | 162 | 163 | expectRoundTrip : { rowSeparator : String, fieldSeparator : Char } -> List (List String) -> Expectation 164 | expectRoundTrip config rows = 165 | encode config rows 166 | |> parse { fieldSeparator = config.fieldSeparator } 167 | |> Expect.equal (Ok rows) 168 | 169 | 170 | encode : { rowSeparator : String, fieldSeparator : Char } -> List (List String) -> String 171 | encode config rows = 172 | rows 173 | |> List.map (String.join (String.fromChar config.fieldSeparator)) 174 | |> String.join config.rowSeparator 175 | -------------------------------------------------------------------------------- /tests/elm-verify-examples.json: -------------------------------------------------------------------------------- 1 | { 2 | "root": "../src", 3 | "tests": [ 4 | "Csv.Decode", 5 | "Csv.Encode", 6 | "Csv.Parser", 7 | "README.md" 8 | ] 9 | } 10 | --------------------------------------------------------------------------------