├── .ghci
├── .github
    └── workflows
    │   ├── cabal.yml
    │   └── stack.yml
├── .gitignore
├── LICENSE
├── README.md
├── Setup.hs
├── changelog.md
├── csv-conduit.cabal
├── src
    └── Data
    │   └── CSV
    │       ├── Conduit.hs
    │       └── Conduit
    │           ├── Conversion.hs
    │           ├── Conversion
    │               └── Internal.hs
    │           ├── Monoid.hs
    │           ├── Parser
    │               ├── ByteString.hs
    │               └── Text.hs
    │           └── Types.hs
├── stack.yaml
└── test
    ├── AdHoc.hs
    ├── Test.hs
    ├── test-mac-excel.csv
    ├── test-windows-excel.csv
    ├── test.csv
    └── test.xls


/.ghci:
--------------------------------------------------------------------------------
1 | :set -isrc
2 | 


--------------------------------------------------------------------------------
/.github/workflows/cabal.yml:
--------------------------------------------------------------------------------
  1 | # This GitHub workflow config has been generated by a script via
  2 | #
  3 | #   haskell-ci 'github' 'csv-conduit.cabal'
  4 | #
  5 | # To regenerate the script (for example after adjusting tested-with) run
  6 | #
  7 | #   haskell-ci regenerate
  8 | #
  9 | # For more information, see https://github.com/haskell-CI/haskell-ci
 10 | #
 11 | # version: 0.19.20240608
 12 | #
 13 | # REGENDATA ("0.19.20240608",["github","csv-conduit.cabal"])
 14 | #
 15 | name: cabal
 16 | on:
 17 |   - push
 18 |   - pull_request
 19 | jobs:
 20 |   linux:
 21 |     name: ${{ matrix.compiler }}
 22 |     runs-on: ubuntu-20.04
 23 |     timeout-minutes:
 24 |       60
 25 |     container:
 26 |       image: buildpack-deps:jammy
 27 |     continue-on-error: ${{ matrix.allow-failure }}
 28 |     strategy:
 29 |       matrix:
 30 |         include:
 31 |           - compiler: ghc-9.6.4
 32 |             compilerKind: ghc
 33 |             compilerVersion: 9.6.4
 34 |             setup-method: ghcup
 35 |             allow-failure: false
 36 |           - compiler: ghc-9.0.1
 37 |             compilerKind: ghc
 38 |             compilerVersion: 9.0.1
 39 |             setup-method: ghcup
 40 |             allow-failure: false
 41 |           - compiler: ghc-8.10.4
 42 |             compilerKind: ghc
 43 |             compilerVersion: 8.10.4
 44 |             setup-method: ghcup
 45 |             allow-failure: false
 46 |           - compiler: ghc-8.8.4
 47 |             compilerKind: ghc
 48 |             compilerVersion: 8.8.4
 49 |             setup-method: ghcup
 50 |             allow-failure: false
 51 |           - compiler: ghc-8.8.3
 52 |             compilerKind: ghc
 53 |             compilerVersion: 8.8.3
 54 |             setup-method: ghcup
 55 |             allow-failure: false
 56 |           - compiler: ghc-8.6.5
 57 |             compilerKind: ghc
 58 |             compilerVersion: 8.6.5
 59 |             setup-method: ghcup
 60 |             allow-failure: false
 61 |           - compiler: ghc-8.4.4
 62 |             compilerKind: ghc
 63 |             compilerVersion: 8.4.4
 64 |             setup-method: ghcup
 65 |             allow-failure: false
 66 |           - compiler: ghc-8.2.2
 67 |             compilerKind: ghc
 68 |             compilerVersion: 8.2.2
 69 |             setup-method: ghcup
 70 |             allow-failure: false
 71 |       fail-fast: false
 72 |     steps:
 73 |       - name: apt
 74 |         run: |
 75 |           apt-get update
 76 |           apt-get install -y --no-install-recommends gnupg ca-certificates dirmngr curl git software-properties-common libtinfo5 libnuma-dev
 77 |           mkdir -p "$HOME/.ghcup/bin"
 78 |           curl -sL https://downloads.haskell.org/ghcup/0.1.20.0/x86_64-linux-ghcup-0.1.20.0 > "$HOME/.ghcup/bin/ghcup"
 79 |           chmod a+x "$HOME/.ghcup/bin/ghcup"
 80 |           "$HOME/.ghcup/bin/ghcup" install ghc "$HCVER" || (cat "$HOME"/.ghcup/logs/*.* && false)
 81 |           "$HOME/.ghcup/bin/ghcup" install cabal 3.10.2.0 || (cat "$HOME"/.ghcup/logs/*.* && false)
 82 |         env:
 83 |           HCKIND: ${{ matrix.compilerKind }}
 84 |           HCNAME: ${{ matrix.compiler }}
 85 |           HCVER: ${{ matrix.compilerVersion }}
 86 |       - name: Set PATH and environment variables
 87 |         run: |
 88 |           echo "$HOME/.cabal/bin" >> $GITHUB_PATH
 89 |           echo "LANG=C.UTF-8" >> "$GITHUB_ENV"
 90 |           echo "CABAL_DIR=$HOME/.cabal" >> "$GITHUB_ENV"
 91 |           echo "CABAL_CONFIG=$HOME/.cabal/config" >> "$GITHUB_ENV"
 92 |           HCDIR=/opt/$HCKIND/$HCVER
 93 |           HC=$("$HOME/.ghcup/bin/ghcup" whereis ghc "$HCVER")
 94 |           HCPKG=$(echo "$HC" | sed 's#ghc$#ghc-pkg#')
 95 |           HADDOCK=$(echo "$HC" | sed 's#ghc$#haddock#')
 96 |           echo "HC=$HC" >> "$GITHUB_ENV"
 97 |           echo "HCPKG=$HCPKG" >> "$GITHUB_ENV"
 98 |           echo "HADDOCK=$HADDOCK" >> "$GITHUB_ENV"
 99 |           echo "CABAL=$HOME/.ghcup/bin/cabal-3.10.2.0 -vnormal+nowrap" >> "$GITHUB_ENV"
100 |           HCNUMVER=$(${HC} --numeric-version|perl -ne '/^(\d+)\.(\d+)\.(\d+)(\.(\d+))?$/; print(10000 * $1 + 100 * $2 + ($3 == 0 ? $5 != 1 : $3))')
101 |           echo "HCNUMVER=$HCNUMVER" >> "$GITHUB_ENV"
102 |           echo "ARG_TESTS=--enable-tests" >> "$GITHUB_ENV"
103 |           echo "ARG_BENCH=--enable-benchmarks" >> "$GITHUB_ENV"
104 |           echo "HEADHACKAGE=false" >> "$GITHUB_ENV"
105 |           echo "ARG_COMPILER=--$HCKIND --with-compiler=$HC" >> "$GITHUB_ENV"
106 |           echo "GHCJSARITH=0" >> "$GITHUB_ENV"
107 |         env:
108 |           HCKIND: ${{ matrix.compilerKind }}
109 |           HCNAME: ${{ matrix.compiler }}
110 |           HCVER: ${{ matrix.compilerVersion }}
111 |       - name: env
112 |         run: |
113 |           env
114 |       - name: write cabal config
115 |         run: |
116 |           mkdir -p $CABAL_DIR
117 |           cat >> $CABAL_CONFIG <<EOF
118 |           remote-build-reporting: anonymous
119 |           write-ghc-environment-files: never
120 |           remote-repo-cache: $CABAL_DIR/packages
121 |           logs-dir:          $CABAL_DIR/logs
122 |           world-file:        $CABAL_DIR/world
123 |           extra-prog-path:   $CABAL_DIR/bin
124 |           symlink-bindir:    $CABAL_DIR/bin
125 |           installdir:        $CABAL_DIR/bin
126 |           build-summary:     $CABAL_DIR/logs/build.log
127 |           store-dir:         $CABAL_DIR/store
128 |           install-dirs user
129 |             prefix: $CABAL_DIR
130 |           repository hackage.haskell.org
131 |             url: http://hackage.haskell.org/
132 |           EOF
133 |           cat >> $CABAL_CONFIG <<EOF
134 |           program-default-options
135 |             ghc-options: $GHCJOBS +RTS -M3G -RTS
136 |           EOF
137 |           cat $CABAL_CONFIG
138 |       - name: versions
139 |         run: |
140 |           $HC --version || true
141 |           $HC --print-project-git-commit-id || true
142 |           $CABAL --version || true
143 |       - name: update cabal index
144 |         run: |
145 |           $CABAL v2-update -v
146 |       - name: install cabal-plan
147 |         run: |
148 |           mkdir -p $HOME/.cabal/bin
149 |           curl -sL https://github.com/haskell-hvr/cabal-plan/releases/download/v0.7.3.0/cabal-plan-0.7.3.0-x86_64-linux.xz > cabal-plan.xz
150 |           echo 'f62ccb2971567a5f638f2005ad3173dba14693a45154c1508645c52289714cb2  cabal-plan.xz' | sha256sum -c -
151 |           xz -d < cabal-plan.xz > $HOME/.cabal/bin/cabal-plan
152 |           rm -f cabal-plan.xz
153 |           chmod a+x $HOME/.cabal/bin/cabal-plan
154 |           cabal-plan --version
155 |       - name: checkout
156 |         uses: actions/checkout@v4
157 |         with:
158 |           path: source
159 |       - name: initial cabal.project for sdist
160 |         run: |
161 |           touch cabal.project
162 |           echo "packages: $GITHUB_WORKSPACE/source/." >> cabal.project
163 |           cat cabal.project
164 |       - name: sdist
165 |         run: |
166 |           mkdir -p sdist
167 |           $CABAL sdist all --output-dir $GITHUB_WORKSPACE/sdist
168 |       - name: unpack
169 |         run: |
170 |           mkdir -p unpacked
171 |           find sdist -maxdepth 1 -type f -name '*.tar.gz' -exec tar -C $GITHUB_WORKSPACE/unpacked -xzvf {} \;
172 |       - name: generate cabal.project
173 |         run: |
174 |           PKGDIR_csv_conduit="$(find "$GITHUB_WORKSPACE/unpacked" -maxdepth 1 -type d -regex '.*/csv-conduit-[0-9.]*')"
175 |           echo "PKGDIR_csv_conduit=${PKGDIR_csv_conduit}" >> "$GITHUB_ENV"
176 |           rm -f cabal.project cabal.project.local
177 |           touch cabal.project
178 |           touch cabal.project.local
179 |           echo "packages: ${PKGDIR_csv_conduit}" >> cabal.project
180 |           echo "package csv-conduit" >> cabal.project
181 |           echo "    ghc-options: -Werror=missing-methods" >> cabal.project
182 |           cat >> cabal.project <<EOF
183 |           EOF
184 |           $HCPKG list --simple-output --names-only | perl -ne 'for (split /\s+/) { print "constraints: any.$_ installed\n" unless /^(csv-conduit)$/; }' >> cabal.project.local
185 |           cat cabal.project
186 |           cat cabal.project.local
187 |       - name: dump install plan
188 |         run: |
189 |           $CABAL v2-build $ARG_COMPILER $ARG_TESTS $ARG_BENCH --dry-run all
190 |           cabal-plan
191 |       - name: restore cache
192 |         uses: actions/cache/restore@v4
193 |         with:
194 |           key: ${{ runner.os }}-${{ matrix.compiler }}-${{ github.sha }}
195 |           path: ~/.cabal/store
196 |           restore-keys: ${{ runner.os }}-${{ matrix.compiler }}-
197 |       - name: install dependencies
198 |         run: |
199 |           $CABAL v2-build $ARG_COMPILER --disable-tests --disable-benchmarks --dependencies-only -j2 all
200 |           $CABAL v2-build $ARG_COMPILER $ARG_TESTS $ARG_BENCH --dependencies-only -j2 all
201 |       - name: build w/o tests
202 |         run: |
203 |           $CABAL v2-build $ARG_COMPILER --disable-tests --disable-benchmarks all
204 |       - name: build
205 |         run: |
206 |           $CABAL v2-build $ARG_COMPILER $ARG_TESTS $ARG_BENCH all --write-ghc-environment-files=always
207 |       - name: tests
208 |         run: |
209 |           $CABAL v2-test $ARG_COMPILER $ARG_TESTS $ARG_BENCH all --test-show-details=direct
210 |       - name: cabal check
211 |         run: |
212 |           cd ${PKGDIR_csv_conduit} || false
213 |           ${CABAL} -vnormal check
214 |       - name: haddock
215 |         run: |
216 |           $CABAL v2-haddock --disable-documentation --haddock-all $ARG_COMPILER --with-haddock $HADDOCK $ARG_TESTS $ARG_BENCH all
217 |       - name: unconstrained build
218 |         run: |
219 |           rm -f cabal.project.local
220 |           $CABAL v2-build $ARG_COMPILER --disable-tests --disable-benchmarks all
221 |       - name: save cache
222 |         uses: actions/cache/save@v4
223 |         if: always()
224 |         with:
225 |           key: ${{ runner.os }}-${{ matrix.compiler }}-${{ github.sha }}
226 |           path: ~/.cabal/store
227 | 


--------------------------------------------------------------------------------
/.github/workflows/stack.yml:
--------------------------------------------------------------------------------
 1 | name: stack
 2 | on:
 3 |   - push
 4 |   - pull_request
 5 | 
 6 | jobs:
 7 |   linux:
 8 |     name: ${{ matrix.resolver }}
 9 |     runs-on: ubuntu-latest
10 |     timeout-minutes: 60
11 |     continue-on-error: false
12 |     strategy:
13 |       fail-fast: false
14 |       matrix:
15 |         resolver:
16 |           - "lts-22.34"
17 |           - "lts-21.25"
18 |           - "lts-20.26"
19 |           - "lts-19.33"
20 |           - "lts-18.28"
21 |     steps:
22 |       - name: Clone project
23 |         uses: actions/checkout@v4
24 | 
25 |       - name: Cache dependencies
26 |         uses: actions/cache@v4
27 |         with:
28 |           path: ~/.stack
29 |           key: ${{ runner.os }}-${{ matrix.resolver }}-${{ hashFiles('stack.yaml') }}
30 |           restore-keys: ${{ runner.os }}-${{ matrix.resolver }}
31 | 
32 |       - name: Build and run tests
33 |         shell: bash
34 |         run: |
35 |           set -ex
36 |           stack --version
37 |           stack test --fast --no-terminal --resolver ${{ matrix.resolver }}
38 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | *.o
 2 | *.hi
 3 | dist/
 4 | cabal-dev/
 5 | .DS*
 6 | TAGS
 7 | *.prof
 8 | *.hi
 9 | *.o
10 | *.ps
11 | *.aux
12 | *.hp
13 | .cabal-sandbox
14 | cabal.sandbox.config
15 | .stack-work/
16 | stack.yaml.lock
17 | dist-newstyle
18 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | Copyright (c)2024, Daniel Vianna
 2 | Copyright (c)2013, Ozgun Ataman
 3 | Copyright (c)2012, Johan Tibell
 4 | 
 5 | All rights reserved.
 6 | 
 7 | Redistribution and use in source and binary forms, with or without
 8 | modification, are permitted provided that the following conditions are met:
 9 | 
10 |     * Redistributions of source code must retain the above copyright
11 |       notice, this list of conditions and the following disclaimer.
12 | 
13 |     * Redistributions in binary form must reproduce the above
14 |       copyright notice, this list of conditions and the following
15 |       disclaimer in the documentation and/or other materials provided
16 |       with the distribution.
17 | 
18 |     * Neither the name of Ozgun Ataman nor the names of other
19 |       contributors may be used to endorse or promote products derived
20 |       from this software without specific prior written permission.
21 | 
22 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
23 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
24 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
25 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
26 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
27 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
28 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
29 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
30 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
31 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
32 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # README
  2 | [![cabal
  3 | build](https://github.com/ozataman/csv-conduit/actions/workflows/cabal.yml/badge.svg)](https://github.com/ozataman/csv-conduit/actions)
  4 | [![stack build](https://github.com/ozataman/csv-conduit/actions/workflows/stack.yml/badge.svg)](https://github.com/ozataman/csv-conduit/actions)
  5 | 
  6 | ## CSV Files and Haskell
  7 | 
  8 | CSV files are the de-facto standard in many cases of data transfer,
  9 | particularly when dealing with enterprise application or disparate database
 10 | systems.
 11 | 
 12 | While there are a number of csv libraries in Haskell, at the time of
 13 | this project's start, there wasn't one that provided all of the
 14 | following:
 15 | 
 16 | * Full flexibility in quote characters, separators, input/output
 17 | * Constant space operation
 18 | * Robust parsing and error resiliency
 19 | * Battle-tested reliability in real-world datasets
 20 | * Fast operation
 21 | * Convenient interface that supports a variety of use cases
 22 | 
 23 | Over time, people created other plausible CSV packages like cassava.
 24 | The major benefit from this library remains to be:
 25 | 
 26 | * Direct participation in the conduit ecosystem, which is now quite
 27 |   large, and all the benefits that come with it.
 28 | * Flexibility in CSV format definition.
 29 | * Resiliency to errors in the input data.
 30 | 
 31 | 
 32 | ## This package
 33 | 
 34 | csv-conduit is a conduit-based CSV parsing library that is easy to
 35 | use, flexible and fast. It leverages the conduit infrastructure to
 36 | provide constant-space operation, which is quite critical in many real
 37 | world use cases.
 38 | 
 39 | For example, you can use http-conduit to download a CSV file from the
 40 | internet and plug its Source into intoCSV to stream-convert the
 41 | download into the Row data type and do something with it as the data
 42 | streams, that is without having to download the entire file to disk
 43 | first.
 44 | 
 45 | 
 46 | ## Author & Contributors
 47 | 
 48 | - Ozgun Ataman (@ozataman)
 49 | - Daniel Bergey (@bergey)
 50 | - BJTerry (@BJTerry)
 51 | - Mike Craig (@mkscrg)
 52 | - Daniel Corson (@dancor)
 53 | - Dmitry Dzhus (@dzhus)
 54 | - Niklas Hambüchen (@nh2)
 55 | - Facundo Domínguez (@facundominguez)
 56 | - Daniel Vianna (@dmvianna)
 57 | 
 58 | ### Introduction
 59 | 
 60 | * The CSVeable typeclass implements the key operations.
 61 | * CSVeable is parameterized on both a stream type and a target CSV row type.
 62 | * There are 2 basic row types and they implement *exactly* the same operations,
 63 |   so you can chose the right one for the job at hand:
 64 |   - `type MapRow t = Map t t`
 65 |   - `type Row t = [t]`
 66 | * You basically use the Conduits defined in this library to do the
 67 |   parsing from a CSV stream and rendering back into a CSV stream.
 68 | * Use the full flexibility and modularity of conduits for sources and sinks.
 69 | 
 70 | ### Speed
 71 | 
 72 | While fast operation is of concern, I have so far cared more about correct
 73 | operation and a flexible API. Please let me know if you notice any performance
 74 | regressions or optimization opportunities.
 75 | 
 76 | 
 77 | ### Usage Examples
 78 | 
 79 | 
 80 | #### Example #1: Basics Using Convenience API
 81 | 
 82 | ```haskell
 83 | {-# LANGUAGE OverloadedStrings #-}
 84 | 
 85 | import Data.Conduit
 86 | import Data.Conduit.Binary
 87 | import Data.Conduit.List as CL
 88 | import Data.CSV.Conduit
 89 | import Data.Text (Text)
 90 | 
 91 | -- Just reverse te columns
 92 | myProcessor :: Monad m => Conduit (Row Text) m (Row Text)
 93 | myProcessor = CL.map reverse
 94 | 
 95 | test :: IO ()
 96 | test = runResourceT $
 97 |   transformCSV defCSVSettings
 98 |                (sourceFile "input.csv")
 99 |                myProcessor
100 |                (sinkFile "output.csv")
101 | ```
102 | 
103 | #### Example #2: Basics Using Conduit API
104 | 
105 | ```haskell
106 | {-# LANGUAGE OverloadedStrings #-}
107 | 
108 | import Data.Conduit
109 | import Data.Conduit.Binary
110 | import Data.CSV.Conduit
111 | import Data.Text (Text)
112 | 
113 | myProcessor :: Monad m => Conduit (Row Text) m (Row Text)
114 | myProcessor = awaitForever $ yield
115 | 
116 | -- Let's simply stream from a file, parse the CSV, reserialize it
117 | -- and push back into another file.
118 | test :: IO ()
119 | test = runResourceT $
120 |   sourceFile "test/BigFile.csv" $=
121 |   intoCSV defCSVSettings $=
122 |   myProcessor $=
123 |   fromCSV defCSVSettings $$
124 |   sinkFile "test/BigFileOut.csv"
125 | ```
126 | 


--------------------------------------------------------------------------------
/Setup.hs:
--------------------------------------------------------------------------------
1 | import Distribution.Simple
2 | main = defaultMain
3 | 


--------------------------------------------------------------------------------
/changelog.md:
--------------------------------------------------------------------------------
 1 | 1.0.1.1
 2 | * Fix test suite to build with text 2.1.2 and ghc 9.12.2, resolving [#62](https://github.com/ozataman/csv-conduit/issues/62)
 3 | 
 4 | 1.0.1.0
 5 | * Use ConduitT instead of ConduitM (prettier type inference with newer conduit imports)
 6 | 
 7 | 1.0.0.2
 8 | * Fixed [#17](https://github.com/ozataman/csv-conduit/issues/17),
 9 |   where CSV created with Excel in Mac OS failed to parse due to its
10 |   newline characters.
11 | 
12 | 1.0.0.1
13 | * Removed dependencies: mmorph, monad-control, mtl,
14 |   unordered-containers, primitive
15 | 
16 | 1.0.0.0
17 | * Removed `return` from the `Monad` instance for `Parser`, and
18 |   transfered its definition to `pure` in the `Applicative` instance of
19 |   `Parser`. This was necessary to support GHC 9.6.4.
20 | * Create new API to choose whether to handle empty CSV cells as empty
21 |   strings or NULLs.
22 | * Added imports that were removed from `Prelude` in GHC 9.6.4.
23 | * Bumped the default Stack resolver to LTS-22.20.
24 | 
25 | 0.7.3.0
26 | * Add ordered versions of named records for consistent, controllable header column ordering. [PR 44](https://github.com/ozataman/csv-conduit/pull/44)
27 | * Add support for GHC 9.0.1
28 | 
29 | 0.7.2.0
30 | * Remove some dependency upper bounds for forward compatibility.
31 | 
32 | 0.7.1.0
33 | * Add MonadFail instance for Parser. [PR 38](https://github.com/ozataman/csv-conduit/pull/38)
34 | 
35 | 0.7.0.0
36 | * BREAKING: Switch from partial Monoid instance on Parser to total Semigroup instance.
37 | * Compatibility with GHC 8.4.x/base-4.11.1.0
38 | 
39 | 0.6.8.1
40 | * Fix documentation mistake in FromNamedRecord/ToNamedRecord examples.
41 | 
42 | 0.6.8
43 | * Haddocks improvements
44 | * Fix inlining and specialization rules around formatDecimal
45 | * Updates to permit newest conduit/resourcet packages
46 | 
47 | 0.6.7
48 | * Fix build for GHC 8.0.1
49 | 


--------------------------------------------------------------------------------
/csv-conduit.cabal:
--------------------------------------------------------------------------------
  1 | name:               csv-conduit
  2 | version:            1.0.1.1
  3 | synopsis:
  4 |   A flexible, fast, conduit-based CSV parser library for Haskell.
  5 | 
  6 | homepage:           http://github.com/ozataman/csv-conduit
  7 | license:            BSD3
  8 | license-file:       LICENSE
  9 | author:             Ozgun Ataman
 10 | maintainer:         Daniel Vianna <dmvianna@gmail.com>
 11 | category:           Data, Conduit, CSV, Text
 12 | build-type:         Simple
 13 | cabal-version:      >=1.10
 14 | tested-with:
 15 |   GHC ==8.2.2
 16 |    || ==8.4.4
 17 |    || ==8.6.5
 18 |    || ==8.8.3
 19 |    || ==8.8.4
 20 |    || ==8.10.4
 21 |    || ==9.0.1
 22 |    || ==9.6.4
 23 | 
 24 | description:
 25 |   CSV files are the de-facto standard in many situations involving data transfer,
 26 |   particularly when dealing with enterprise application or disparate database
 27 |   systems.
 28 |   .
 29 |   While there are a number of CSV libraries in Haskell, at the time of this
 30 |   project's start in 2010, there wasn't one that provided all of the following:
 31 |   .
 32 |   * Full flexibility in quote characters, separators, input/output
 33 |   .
 34 |   * Constant space operation
 35 |   .
 36 |   * Robust parsing, correctness and error resiliency
 37 |   .
 38 |   * Convenient interface that supports a variety of use cases
 39 |   .
 40 |   * Fast operation
 41 |   .
 42 |   This library is an attempt to close these gaps. Please note that
 43 |   this library started its life based on the enumerator package and
 44 |   has recently been ported to work with conduits instead. In the
 45 |   process, it has been greatly simplified thanks to the modular nature
 46 |   of the conduits library.
 47 |   .
 48 |   Following the port to conduits, the library has also gained the
 49 |   ability to parameterize on the stream type and work both with
 50 |   ByteString and Text.
 51 |   .
 52 |   For more documentation and examples, check out the README at:
 53 |   .
 54 |   <http://github.com/ozataman/csv-conduit>
 55 |   .
 56 | 
 57 | extra-source-files:
 58 |   changelog.md
 59 |   README.md
 60 |   test/test-mac-excel.csv
 61 |   test/test-windows-excel.csv
 62 |   test/test.csv
 63 |   test/Test.hs
 64 |   test/test.xls
 65 | 
 66 | flag lib-Werror
 67 |   default: False
 68 |   manual:  True
 69 | 
 70 | library
 71 |   default-language: Haskell2010
 72 |   exposed-modules:
 73 |     Data.CSV.Conduit
 74 |     Data.CSV.Conduit.Conversion
 75 |     Data.CSV.Conduit.Parser.ByteString
 76 |     Data.CSV.Conduit.Parser.Text
 77 |     Data.CSV.Conduit.Types
 78 | 
 79 |   other-modules:
 80 |     Data.CSV.Conduit.Conversion.Internal
 81 |     Data.CSV.Conduit.Monoid
 82 | 
 83 |   ghc-options:      -Wall -funbox-strict-fields
 84 | 
 85 |   if flag(lib-werror)
 86 |     ghc-options: -Werror
 87 | 
 88 |   hs-source-dirs:   src
 89 |   build-depends:
 90 |       array
 91 |     , attoparsec          >=0.10
 92 |     , base                >=4       && <5
 93 |     , blaze-builder
 94 |     , bytestring
 95 |     , conduit             >=1.3.0   && <2.0
 96 |     , conduit-extra
 97 |     , containers          >=0.3
 98 |     , data-default
 99 |     , exceptions          >=0.3
100 |     , ordered-containers
101 |     , primitive
102 |     , resourcet           >=1.1.2.1
103 |     , text
104 |     , transformers
105 |     , vector
106 | 
107 | test-suite test
108 |   default-language: Haskell2010
109 |   type:             exitcode-stdio-1.0
110 |   main-is:          Test.hs
111 |   ghc-options:      -Wall -Wunused-packages
112 | 
113 |   if flag(lib-werror)
114 |     ghc-options: -Werror
115 | 
116 |   hs-source-dirs:   test
117 |   build-depends:
118 |       base                  >=4     && <5
119 |     , bytestring
120 |     , conduit               >=1.3.0
121 |     , containers            >=0.3
122 |     , csv-conduit
123 |     , directory
124 |     , HUnit                 >=1.2
125 |     , ordered-containers
126 |     , test-framework
127 |     , test-framework-hunit
128 |     , text
129 |     , vector
130 | 
131 | source-repository head
132 |   type:     git
133 |   location: git://github.com/ozataman/csv-conduit.git
134 | 


--------------------------------------------------------------------------------
/src/Data/CSV/Conduit.hs:
--------------------------------------------------------------------------------
  1 | {-# LANGUAGE BangPatterns          #-}
  2 | {-# LANGUAGE CPP                   #-}
  3 | {-# LANGUAGE FlexibleContexts      #-}
  4 | {-# LANGUAGE FlexibleInstances     #-}
  5 | {-# LANGUAGE MultiParamTypeClasses #-}
  6 | {-# LANGUAGE OverloadedStrings     #-}
  7 | {-# LANGUAGE RankNTypes            #-}
  8 | {-# LANGUAGE ScopedTypeVariables   #-}
  9 | {-# LANGUAGE UndecidableInstances  #-}
 10 | 
 11 | module Data.CSV.Conduit
 12 |     (
 13 | 
 14 |     -- * Main Interface
 15 |       decodeCSV
 16 |     , readCSVFile
 17 |     , writeCSVFile
 18 |     , transformCSV
 19 |     , transformCSV'
 20 |     , mapCSVFile
 21 |     , writeHeaders
 22 |     , writeHeadersOrdered
 23 | 
 24 |     -- Types
 25 |     , CSV (..)
 26 |     , CSVSettings (..)
 27 |     , defCSVSettings
 28 |     , MapRow
 29 |     , OrderedMapRow
 30 |     , Row
 31 | 
 32 |     -- * Re-exported For Convenience
 33 |     , runResourceT
 34 |     ) where
 35 | 
 36 | -------------------------------------------------------------------------------
 37 | import           Control.Exception
 38 | import           Control.Monad.Catch.Pure           (runCatchT, CatchT)
 39 | import           Control.Monad.IO.Class             (MonadIO (liftIO))
 40 | import           Control.Monad.Primitive
 41 | import           Control.Monad.ST
 42 | import           Control.Monad.Trans.Class          (MonadTrans(lift))
 43 | import           Control.Monad.Trans.Except         (ExceptT(..), runExceptT)
 44 | import           Control.Monad.Trans.Resource       (MonadResource, MonadThrow,
 45 |                                                      runResourceT)
 46 | import           Data.Attoparsec.Types              (Parser)
 47 | import qualified Data.ByteString                    as B
 48 | import           Data.ByteString.Char8              (ByteString)
 49 | import qualified Data.ByteString.Char8              as B8
 50 | import           Data.ByteString.Internal           (c2w)
 51 | import           Data.Conduit
 52 | import           Data.Conduit.Attoparsec
 53 | import           Data.Conduit.Binary                (sinkFile, sinkIOHandle,
 54 |                                                      sourceFile)
 55 | import qualified Data.Conduit.List                  as C
 56 | import qualified Data.Map                           as M
 57 | import qualified Data.Map.Ordered                   as MO
 58 | import           Data.String
 59 | import           Data.Text                          (Text)
 60 | import qualified Data.Text                          as T
 61 | import qualified Data.Text.Encoding                 as T
 62 | import qualified Data.Vector                        as V
 63 | import qualified Data.Vector.Generic                as GV
 64 | import qualified Data.Vector.Generic.Mutable        as GMV
 65 | import           Data.Void                           as Void
 66 | import           System.IO
 67 | -------------------------------------------------------------------------------
 68 | import           Data.CSV.Conduit.Conversion        (FromNamedRecord (..),
 69 |                                                      FromNamedRecordOrdered (..),
 70 |                                                      Named (..),
 71 |                                                      NamedOrdered (..),
 72 |                                                      ToNamedRecord (..),
 73 |                                                      ToNamedRecordOrdered (..),
 74 |                                                      runParser)
 75 | import qualified Data.CSV.Conduit.Parser.ByteString as BSP
 76 | import qualified Data.CSV.Conduit.Parser.Text       as TP
 77 | import           Data.CSV.Conduit.Types
 78 | -------------------------------------------------------------------------------
 79 | 
 80 | 
 81 | -------------------------------------------------------------------------------
 82 | -- | Represents types 'r' that are CSV-like and can be converted
 83 | -- to/from an underlying stream of type 's'. There is nothing scary
 84 | -- about the type:
 85 | --
 86 | -- @s@ represents stream types that can be converted to\/from CSV rows.
 87 | -- Examples are 'ByteString', 'Text' and 'String'.
 88 | --
 89 | -- @r@ represents the target CSV row representations that this library
 90 | -- can work with. Examples are the 'Row' types, the 'Record' type and
 91 | -- the 'MapRow' family of types. We can also convert directly to
 92 | -- complex Haskell types using the 'Data.CSV.Conduit.Conversion'
 93 | -- module that was borrowed from the cassava package, which was itself
 94 | -- inspired by the aeson package.
 95 | --
 96 | --
 97 | -- Example #1: Basics Using Convenience API
 98 | --
 99 | -- >import Data.Conduit
100 | -- >import Data.Conduit.Binary
101 | -- >import Data.Conduit.List as CL
102 | -- >import Data.CSV.Conduit
103 | -- >
104 | -- >myProcessor :: Conduit (Row Text) m (Row Text)
105 | -- >myProcessor = CL.map reverse
106 | -- >
107 | -- >test = runResourceT $
108 | -- >  transformCSV defCSVSettings
109 | -- >               (sourceFile "input.csv")
110 | -- >               myProcessor
111 | -- >               (sinkFile "output.csv")
112 | --
113 | --
114 | -- Example #2: Basics Using Conduit API
115 | --
116 | -- >import Data.Conduit
117 | -- >import Data.Conduit.Binary
118 | -- >import Data.CSV.Conduit
119 | -- >
120 | -- >myProcessor :: Conduit (MapRow Text) m (MapRow Text)
121 | -- >myProcessor = undefined
122 | -- >
123 | -- >test = runResourceT $ runConduit $
124 | -- >  sourceFile "test/BigFile.csv" .|
125 | -- >  intoCSV defCSVSettings .|
126 | -- >  myProcessor .|
127 | -- >  (writeHeaders defCSVSettings >> fromCSV defCSVSettings) .|
128 | -- >  sinkFile "test/BigFileOut.csv"
129 | class CSV s r where
130 | 
131 |   -----------------------------------------------------------------------------
132 |   -- | Convert a CSV row into strict ByteString equivalent.
133 |   rowToStr :: CSVSettings -> r -> s
134 | 
135 |   -----------------------------------------------------------------------------
136 |   -- | Turn a stream of 's' into a stream of CSV row type. An example
137 |   -- would be parsing a ByteString stream as rows of 'MapRow' 'Text'.
138 |   intoCSV :: (MonadThrow m) => CSVSettings -> ConduitT s r m ()
139 | 
140 |   -----------------------------------------------------------------------------
141 |   -- | Turn a stream of CSV row type back into a stream of 's'. An
142 |   -- example would be rendering a stream of 'Row' 'ByteString' rows as
143 |   -- 'Text'.
144 |   fromCSV :: Monad m => CSVSettings -> ConduitT r s m ()
145 | 
146 | 
147 | 
148 | 
149 | 
150 | ------------------------------------------------------------------------------
151 | -- | 'Row' instance using 'ByteString'
152 | instance CSV ByteString (Row ByteString) where
153 |   rowToStr s !r =
154 |     let
155 |       sep = B.pack [c2w (csvSep s)]
156 |       wrapField !f = case csvQuoteCharAndStyle s of
157 |         Just (x, quoteEmpty) ->
158 |           case quoteEmpty == DoQuoteEmpty || B8.length f /= 0 of
159 |             True -> (x `B8.cons` escape x f) `B8.snoc` x
160 |             False -> f
161 |         Nothing   -> f
162 |       escape c str = B8.intercalate (B8.pack [c,c]) $ B8.split c str
163 |     in B.intercalate sep . map wrapField $ r
164 | 
165 |   intoCSV set = intoCSVRow (BSP.row set)
166 |   fromCSV set = fromCSVRow set
167 | 
168 | 
169 | ------------------------------------------------------------------------------
170 | -- | 'Row' instance using 'Text'
171 | instance CSV Text (Row Text) where
172 |   rowToStr s !r =
173 |     let
174 |       sep = T.pack [csvSep s]
175 |       wrapField !f = case csvQuoteCharAndStyle s of
176 |         Just (x, quoteEmpty) -> case quoteEmpty == DoQuoteEmpty || not (T.null f) of
177 |           True -> x `T.cons` escape x f `T.snoc` x
178 |           False -> f
179 |         Nothing -> f
180 |       escape c str = T.intercalate (T.pack [c,c]) $ T.split (== c) str
181 |     in T.intercalate sep . map wrapField $ r
182 | 
183 |   intoCSV set = intoCSVRow (TP.row set)
184 |   fromCSV set = fromCSVRow set
185 | 
186 | 
187 | -------------------------------------------------------------------------------
188 | -- | 'Row' instance using 'Text' based on 'ByteString' stream
189 | instance CSV ByteString (Row Text) where
190 |     rowToStr s r = T.encodeUtf8 $ rowToStr s r
191 |     intoCSV set = intoCSV set .| C.map (map T.decodeUtf8)
192 |     fromCSV set = fromCSV set .| C.map T.encodeUtf8
193 | 
194 | 
195 | 
196 | -------------------------------------------------------------------------------
197 | -- | 'Row' instance using 'String' based on 'ByteString' stream.
198 | -- Please note this uses the ByteString operations underneath and has
199 | -- lots of unnecessary overhead. Included for convenience.
200 | instance CSV ByteString (Row String) where
201 |     rowToStr s r = rowToStr s $ map B8.pack r
202 |     intoCSV set = intoCSV set .| C.map (map B8.unpack)
203 |     fromCSV set = C.map (map B8.pack) .| fromCSV set
204 | 
205 | 
206 | -- | Support for parsing rows in the 'Vector' form.
207 | instance (CSV s (Row s)) => CSV s (V.Vector s) where
208 |     rowToStr s r = rowToStr s . V.toList $ r
209 |     intoCSV set = intoCSV set .| C.map (V.fromList)
210 |     fromCSV set = C.map (V.toList) .| fromCSV set
211 | 
212 | 
213 | 
214 | -------------------------------------------------------------------------------
215 | fromCSVRow :: (Monad m, IsString s, CSV s r)
216 |            => CSVSettings -> ConduitT r s m ()
217 | fromCSVRow set = awaitForever $ \row -> mapM_ yield [rowToStr set row, "\n"]
218 | 
219 | 
220 | 
221 | -------------------------------------------------------------------------------
222 | intoCSVRow :: (MonadThrow m, AttoparsecInput i) => Parser i (Maybe o) -> ConduitT i o m ()
223 | intoCSVRow p = parse .| puller
224 |   where
225 |     parse = {-# SCC "conduitParser_p" #-} conduitParser p
226 |     puller = {-# SCC "puller" #-}
227 |       awaitForever $ \ (_, mrow) -> maybe (return ()) yield mrow
228 | 
229 | 
230 | -------------------------------------------------------------------------------
231 | -- | Generic 'MapRow' instance; any stream type with a 'Row' instance
232 | -- automatically gets a 'MapRow' instance.
233 | instance (CSV s (Row s'), Ord s', IsString s) => CSV s (MapRow s') where
234 |   rowToStr s r = rowToStr s . M.elems $ r
235 |   intoCSV set = intoCSVMap set
236 |   fromCSV set = fromCSVMap set
237 | 
238 | instance (CSV s (Row s'), Ord s', IsString s) => CSV s (OrderedMapRow s') where
239 |   rowToStr s r = rowToStr s . (map snd . MO.assocs) $ r
240 |   intoCSV set = intoCSVMapOrdered set
241 |   fromCSV set = fromCSVMapOrdered set
242 | 
243 | 
244 | -------------------------------------------------------------------------------
245 | intoCSVMap :: (Ord a, MonadThrow m, CSV s [a])
246 |            => CSVSettings -> ConduitT s (MapRow a) m ()
247 | intoCSVMap set = intoCSV set .| (headers >>= converter)
248 |   where
249 |     headers = do
250 |       mrow <- await
251 |       case mrow of
252 |         Nothing -> return []
253 |         Just [] -> headers
254 |         Just hs -> return hs
255 |     converter hs = awaitForever $ yield . toMapCSV hs
256 |     toMapCSV !hs !fs = M.fromList $ zip hs fs
257 | 
258 | intoCSVMapOrdered :: (Ord a, MonadThrow m, CSV s [a])
259 |            => CSVSettings -> ConduitM s (OrderedMapRow a) m ()
260 | intoCSVMapOrdered set = intoCSV set .| (headers >>= converter)
261 |   where
262 |     headers = do
263 |       mrow <- await
264 |       case mrow of
265 |         Nothing -> return []
266 |         Just [] -> headers
267 |         Just hs -> return hs
268 |     converter hs = awaitForever $ yield . toMapCSV hs
269 |     toMapCSV !hs !fs = MO.fromList $ zip hs fs
270 | 
271 | 
272 | -- | Conversion of stream directly to/from a custom complex haskell
273 | -- type.
274 | instance (FromNamedRecord a, ToNamedRecord a, CSV s (MapRow ByteString)) =>
275 |     CSV s (Named a) where
276 |     rowToStr s a = rowToStr s . toNamedRecord . getNamed $ a
277 |     intoCSV set = intoCSV set .| C.mapMaybe go
278 |         where
279 |           go x = either (const Nothing) (Just . Named) $
280 |                  runParser (parseNamedRecord x)
281 | 
282 |     fromCSV set = C.map go .| fromCSV set
283 |         where
284 |           go = toNamedRecord . getNamed
285 | 
286 | instance (FromNamedRecordOrdered a, ToNamedRecordOrdered a, CSV s (OrderedMapRow ByteString)) =>
287 |     CSV s (NamedOrdered a) where
288 |     rowToStr s a = rowToStr s . toNamedRecordOrdered . getNamedOrdered $ a
289 |     intoCSV set = intoCSV set .| C.mapMaybe go
290 |         where
291 |           go x = either (const Nothing) (Just . NamedOrdered) $
292 |                  runParser (parseNamedRecordOrdered x)
293 | 
294 |     fromCSV set = C.map go .| fromCSV set
295 |         where
296 |           go = toNamedRecordOrdered . getNamedOrdered
297 | 
298 | 
299 | -------------------------------------------------------------------------------
300 | fromCSVMap :: (Monad m, IsString s, CSV s [a])
301 |            => CSVSettings -> ConduitT (M.Map k a) s m ()
302 | fromCSVMap set = awaitForever push
303 |   where
304 |     push r = mapM_ yield [rowToStr set (M.elems r), "\n"]
305 | 
306 | fromCSVMapOrdered :: (Monad m, IsString s, CSV s [a])
307 |                   => CSVSettings -> ConduitM (MO.OMap k a) s m ()
308 | fromCSVMapOrdered set = awaitForever push
309 |   where
310 |     push r = mapM_ yield [rowToStr set (map snd $ MO.assocs r), "\n"]
311 | 
312 | 
313 | -------------------------------------------------------------------------------
314 | -- | Write headers AND the row into the output stream, once. If you
315 | -- don't call this while using 'MapRow' family of row types, then your
316 | -- resulting output will NOT have any headers in it.
317 | --
318 | -- Usage: Just chain this using the 'Monad' instance in your pipeline:
319 | --
320 | -- > runConduit $ ... .| writeHeaders settings >> fromCSV settings .| sinkFile "..."
321 | writeHeaders
322 |     :: (Monad m, CSV s (Row r), IsString s)
323 |     => CSVSettings
324 |     -> ConduitT (MapRow r) s m ()
325 | writeHeaders set = do
326 |   mrow <- await
327 |   case mrow of
328 |     Nothing -> return ()
329 |     Just row -> mapM_ yield [ rowToStr set (M.keys row)
330 |                             , "\n"
331 |                             , rowToStr set (M.elems row)
332 |                             , "\n" ]
333 | 
334 | writeHeadersOrdered
335 |     :: (Monad m, CSV s (Row r), IsString s)
336 |     => CSVSettings
337 |     -> ConduitM (OrderedMapRow r) s m ()
338 | writeHeadersOrdered set = do
339 |   mrow <- await
340 |   case mrow of
341 |     Nothing -> return ()
342 |     Just row -> mapM_ yield [ rowToStr set (map fst $ MO.assocs row)
343 |                             , "\n"
344 |                             , rowToStr set (map snd $ MO.assocs row)
345 |                             , "\n" ]
346 | 
347 | 
348 |                           ---------------------------
349 |                           -- Convenience Functions --
350 |                           ---------------------------
351 | 
352 | 
353 | -------------------------------------------------------------------------------
354 | -- | Read the entire contents of a CSV file into memory.
355 | readCSVFile
356 |     :: (MonadIO m, CSV ByteString a)
357 |     => CSVSettings -- ^ Settings to use in deciphering stream
358 |     -> FilePath    -- ^ Input file
359 |     -> m (V.Vector a)
360 | readCSVFile set fp = liftIO . runResourceT $ runConduit $ sourceFile fp .| intoCSV set .| transPipe lift (sinkVector growthFactor)
361 |   where
362 |     growthFactor = 10
363 | 
364 | 
365 | -------------------------------------------------------------------------------
366 | -- | A simple way to decode a CSV string. Don't be alarmed by the
367 | -- polymorphic nature of the signature. 's' is the type for the string
368 | -- and 'v' is a kind of 'Vector' here.
369 | --
370 | -- For example for 'ByteString':
371 | --
372 | -- >>> s <- LB.readFile "my.csv"
373 | -- >>> decodeCSV defCSVSettings s :: Either SomeException (Vector (Vector ByteString))
374 | --
375 | -- will work as long as the data is comma separated.
376 | decodeCSV
377 |     :: forall v a s. (GV.Vector v a, CSV s a)
378 |     => CSVSettings
379 |     -> s
380 |     -> Either SomeException (v a)
381 | decodeCSV set bs = runST $ runExceptT pipeline
382 |   where
383 |     src :: ConduitT () s (ExceptT SomeException (ST s1)) ()
384 |     src = C.sourceList [bs]
385 |     csvConvert :: ConduitT s a (ExceptT SomeException (ST s1)) ()
386 |     csvConvert = transPipe (ExceptT . runCatchT) csvConvert'
387 |     csvConvert' :: ConduitT s a (CatchT (ST s1)) ()
388 |     csvConvert' = intoCSV set
389 |     growthFactor = 10
390 |     sink :: ConduitT a Void.Void (ExceptT SomeException (ST s1)) (v a)
391 |     sink = sinkVector growthFactor
392 |     pipeline :: ExceptT SomeException (ST s1) (v a)
393 |     pipeline = runConduit (src .| csvConvert .| sink)
394 | 
395 | 
396 | 
397 | -------------------------------------------------------------------------------
398 | -- | Write CSV data into file. As we use a 'ByteString' sink, you'll
399 | -- need to get your data into a 'ByteString' stream type.
400 | writeCSVFile
401 |   :: (CSV ByteString a)
402 |   => CSVSettings
403 |   -- ^ CSV Settings
404 |   -> FilePath
405 |   -- ^ Target file
406 |   -> IOMode
407 |   -- ^ Write vs. append mode
408 |   -> [a]
409 |   -- ^ List of rows
410 |   -> IO ()
411 | writeCSVFile set fo fmode rows = runResourceT $ runConduit $ do
412 |   C.sourceList rows .| fromCSV set .|
413 |     sinkIOHandle (openFile fo fmode)
414 | 
415 | 
416 | -------------------------------------------------------------------------------
417 | -- | Map over the rows of a CSV file. Provided for convenience for
418 | -- historical reasons.
419 | --
420 | -- An easy way to run this function would be 'runResourceT' after
421 | -- feeding it all the arguments.
422 | mapCSVFile
423 |     :: ( MonadResource m
424 |        , CSV ByteString a
425 |        , CSV ByteString b
426 | # if MIN_VERSION_resourcet(1,2,0)
427 |        , MonadThrow m
428 | #endif
429 |        )
430 |       => CSVSettings
431 |       -- ^ Settings to use both for both input and output
432 |       -> (a -> [b])
433 |       -- ^ A mapping function
434 |       -> FilePath
435 |       -- ^ Input file
436 |       -> FilePath
437 |       -- ^ Output file
438 |       -> m ()
439 | mapCSVFile set f fi fo =
440 |   transformCSV set (sourceFile fi) (C.concatMap f) (sinkFile fo)
441 | 
442 | 
443 | -------------------------------------------------------------------------------
444 | -- | Like transformCSV' but uses the same settings for both input and
445 | -- output.
446 | transformCSV
447 |     :: (MonadThrow m, CSV s a, CSV s' b)
448 |     => CSVSettings
449 |     -- ^ Settings to be used for both input and output
450 |     -> ConduitT () s m ()
451 |     -- ^ A raw stream data source. Ex: 'sourceFile inFile'
452 |     -> ConduitT a b m ()
453 |     -- ^ A transforming conduit
454 |     -> ConduitT s' Void.Void m ()
455 |     -- ^ A raw stream data sink. Ex: 'sinkFile outFile'
456 |     -> m ()
457 | transformCSV set = transformCSV' set set
458 | 
459 | 
460 | -------------------------------------------------------------------------------
461 | -- | General purpose CSV transformer. Apply a list-like processing
462 | -- function from 'Data.Conduit.List' to the rows of a CSV stream. You
463 | -- need to provide a stream data source, a transformer and a stream
464 | -- data sink.
465 | --
466 | -- An easy way to run this function would be 'runResourceT' after
467 | -- feeding it all the arguments.
468 | --
469 | -- Example - map a function over the rows of a CSV file:
470 | --
471 | -- > transformCSV setIn setOut (sourceFile inFile) (C.map f) (sinkFile outFile)
472 | transformCSV'
473 |     :: (MonadThrow m, CSV s a, CSV s' b)
474 |     => CSVSettings
475 |     -- ^ Settings to be used for input
476 |     -> CSVSettings
477 |     -- ^ Settings to be used for output
478 |     -> ConduitT () s m ()
479 |     -- ^ A raw stream data source. Ex: 'sourceFile inFile'
480 |     -> ConduitT a b m ()
481 |     -- ^ A transforming conduit
482 |     -> ConduitT s' Void.Void m ()
483 |     -- ^ A raw stream data sink. Ex: 'sinkFile outFile'
484 |     -> m ()
485 | transformCSV' setIn setOut source c sink = runConduit $
486 |     source .|
487 |     intoCSV setIn .|
488 |     c .|
489 |     fromCSV setOut .|
490 |     sink
491 | 
492 | 
493 | 
494 | 
495 |                               ------------------
496 |                               -- Vector Utils --
497 |                               ------------------
498 | 
499 | 
500 | 
501 | -------------------------------------------------------------------------------
502 | -- | An efficient sink that incrementally grows a vector from the input stream
503 | sinkVector :: (PrimMonad m, GV.Vector v a) => Int -> ConduitT a o m (v a)
504 | sinkVector by = do
505 |     v <- lift $ GMV.new by
506 |     go 0 v
507 |   where
508 |     -- i is the index of the next element to be written by go
509 |     -- also exactly the number of elements in v so far
510 |     go i v = do
511 |         res <- await
512 |         case res of
513 |           Nothing -> do
514 |             v' <- lift $ GV.freeze $ GMV.slice 0 i v
515 |             return $! v'
516 |           Just x -> do
517 |             v' <- case GMV.length v == i of
518 |                     True  -> lift $ GMV.grow v by
519 |                     False -> return v
520 |             lift $ GMV.write v' i x
521 |             go (i+1) v'
522 | 


--------------------------------------------------------------------------------
/src/Data/CSV/Conduit/Conversion.hs:
--------------------------------------------------------------------------------
  1 | {-# LANGUAGE BangPatterns, CPP, FlexibleInstances, OverloadedStrings,
  2 |              Rank2Types #-}
  3 | #ifdef GENERICS
  4 | {-# LANGUAGE DefaultSignatures, TypeOperators, KindSignatures, FlexibleContexts,
  5 |              MultiParamTypeClasses, UndecidableInstances, ScopedTypeVariables,
  6 |              DataKinds #-}
  7 | #endif
  8 | 
  9 | -----------------------------------------------------------------------------
 10 | -- |
 11 | -- Module      :  Data.CSV.Conduit.Conversion
 12 | -- Copyright   :  Ozgun Ataman, Johan Tibell
 13 | -- License     :  BSD3
 14 | --
 15 | -- Maintainer  :  Ozgun Ataman <ozataman@gmail.com>
 16 | -- Stability   :  experimental
 17 | --
 18 | -- This module has been shamelessly taken from Johan Tibell's nicely
 19 | -- put together cassava package, which itself borrows the approach
 20 | -- from Bryan O'Sullivan's widely used aeson package.
 21 | --
 22 | -- We make the necessary adjustments and some simplifications here to
 23 | -- bolt this parsing interface onto our underlying "CSV" typeclass.
 24 | ----------------------------------------------------------------------------
 25 | 
 26 | module Data.CSV.Conduit.Conversion
 27 |     (
 28 |     -- * Type conversion
 29 |       Only(..)
 30 |     , Named (..)
 31 |     , NamedOrdered (..)
 32 |     , Record
 33 |     , NamedRecord
 34 |     , NamedRecordOrdered
 35 |     , FromRecord(..)
 36 |     , FromNamedRecord(..)
 37 |     , FromNamedRecordOrdered(..)
 38 |     , ToNamedRecord(..)
 39 |     , ToNamedRecordOrdered(..)
 40 |     , FromField(..)
 41 |     , ToRecord(..)
 42 |     , ToField(..)
 43 |     , Field
 44 | 
 45 |     -- * Parser
 46 |     , Parser
 47 |     , runParser
 48 | 
 49 |     -- * Accessors
 50 |     , index
 51 |     , (.!)
 52 |     , unsafeIndex
 53 |     , lookup
 54 |     , lookupOrdered
 55 |     , (.:)
 56 |     , namedField
 57 |     , (.=)
 58 |     , record
 59 |     , namedRecord
 60 |     , namedRecordOrdered
 61 |     ) where
 62 | 
 63 | import Control.Applicative as A
 64 | import Control.Monad (MonadPlus, mplus, mzero)
 65 | import Data.Attoparsec.ByteString.Char8 (double, parseOnly)
 66 | import qualified Data.Attoparsec.ByteString.Char8 as A8
 67 | import qualified Data.ByteString as B
 68 | import qualified Data.ByteString.Char8 as B8
 69 | import qualified Data.ByteString.Lazy as L
 70 | import Data.Int (Int8, Int16, Int32, Int64)
 71 | import qualified Data.Map as M
 72 | import qualified Data.Map.Ordered as MO
 73 | import Data.Semigroup as Semigroup
 74 | import qualified Data.Text as T
 75 | import qualified Data.Text.Encoding as T
 76 | import qualified Data.Text.Lazy as LT
 77 | import qualified Data.Text.Lazy.Encoding as LT
 78 | import Data.Traversable as DT
 79 | import Data.Vector (Vector, (!))
 80 | import qualified Data.Vector as V
 81 | import qualified Data.Vector.Unboxed as U
 82 | import Data.Word as W
 83 | import GHC.Float (double2Float)
 84 | import Prelude hiding (lookup, takeWhile)
 85 | 
 86 | #ifdef GENERICS
 87 | import GHC.Generics
 88 | import qualified Data.IntMap as IM
 89 | #endif
 90 | 
 91 | import Data.CSV.Conduit.Conversion.Internal
 92 | 
 93 | 
 94 | ------------------------------------------------------------------------
 95 | -- bytestring compatibility
 96 | 
 97 | toStrict   :: L.ByteString -> B.ByteString
 98 | fromStrict :: B.ByteString -> L.ByteString
 99 | #if MIN_VERSION_bytestring(0,10,0)
100 | toStrict   = L.toStrict
101 | fromStrict = L.fromStrict
102 | #else
103 | toStrict   = B.concat . L.toChunks
104 | fromStrict = L.fromChunks . (:[])
105 | #endif
106 | {-# INLINE toStrict #-}
107 | {-# INLINE fromStrict #-}
108 | 
109 | ------------------------------------------------------------------------
110 | -- Type conversion
111 | 
112 | 
113 | 
114 | -- | A shorthand for the ByteString case of 'MapRow'
115 | type NamedRecord = M.Map B8.ByteString B8.ByteString
116 | type NamedRecordOrdered = MO.OMap B8.ByteString B8.ByteString
117 | 
118 | 
119 | -- | A wrapper around custom haskell types that can directly be
120 | -- converted/parsed from an incoming CSV stream.
121 | --
122 | -- We define this wrapper to stop GHC from complaining
123 | -- about overlapping instances. Just use 'getNamed' to get your
124 | -- object out of the wrapper.
125 | newtype Named a = Named { getNamed :: a } deriving (Eq,Show,Read,Ord)
126 | newtype NamedOrdered a = NamedOrdered { getNamedOrdered :: a } deriving (Eq,Show,Read,Ord)
127 | 
128 | -- | A record corresponds to a single line in a CSV file.
129 | type Record = Vector B8.ByteString
130 | 
131 | -- | A single field within a record.
132 | type Field = B8.ByteString
133 | 
134 | 
135 | ------------------------------------------------------------------------
136 | -- Index-based conversion
137 | 
138 | -- | A type that can be converted from a single CSV record, with the
139 | -- possibility of failure.
140 | --
141 | -- When writing an instance, use 'empty', 'mzero', or 'fail' to make a
142 | -- conversion fail, e.g. if a 'Record' has the wrong number of
143 | -- columns.
144 | --
145 | -- Given this example data:
146 | --
147 | -- > John,56
148 | -- > Jane,55
149 | --
150 | -- here's an example type and instance:
151 | --
152 | -- > data Person = Person { name :: !Text, age :: !Int }
153 | -- >
154 | -- > instance FromRecord Person where
155 | -- >     parseRecord v
156 | -- >         | length v == 2 = Person <$>
157 | -- >                           v .! 0 <*>
158 | -- >                           v .! 1
159 | -- >         | otherwise     = mzero
160 | class FromRecord a where
161 |     parseRecord :: Record -> Parser a
162 | 
163 | #ifdef GENERICS
164 |     default parseRecord :: (Generic a, GFromRecord (Rep a)) => Record -> Parser a
165 |     parseRecord r = to A.<$> gparseRecord r
166 | #endif
167 | 
168 | -- | Haskell lacks a single-element tuple type, so if you CSV data
169 | -- with just one column you can use the 'Only' type to represent a
170 | -- single-column result.
171 | newtype Only a = Only {
172 |       fromOnly :: a
173 |     } deriving (Eq, Ord, Read, Show)
174 | 
175 | -- | A type that can be converted to a single CSV record.
176 | --
177 | -- An example type and instance:
178 | --
179 | -- > data Person = Person { name :: !Text, age :: !Int }
180 | -- >
181 | -- > instance ToRecord Person where
182 | -- >     toRecord (Person name age) = record [
183 | -- >         toField name, toField age]
184 | --
185 | -- Outputs data on this form:
186 | --
187 | -- > John,56
188 | -- > Jane,55
189 | class ToRecord a where
190 |     toRecord :: a -> Record
191 | 
192 | #ifdef GENERICS
193 |     default toRecord :: (Generic a, GToRecord (Rep a) Field) => a -> Record
194 |     toRecord = V.fromList . gtoRecord . from
195 | #endif
196 | 
197 | instance FromField a => FromRecord (Only a) where
198 |     parseRecord v
199 |         | n == 1    = Only <$> unsafeIndex v 0
200 |         | otherwise = lengthMismatch 1 v
201 |           where
202 |             n = V.length v
203 | 
204 | -- TODO: Check if we want all toRecord conversions to be stricter.
205 | 
206 | instance ToField a => ToRecord (Only a) where
207 |     toRecord = V.singleton . toField . fromOnly
208 | 
209 | instance (FromField a, FromField b) => FromRecord (a, b) where
210 |     parseRecord v
211 |         | n == 2    = (,) <$> unsafeIndex v 0
212 |                           <*> unsafeIndex v 1
213 |         | otherwise = lengthMismatch 2 v
214 |           where
215 |             n = V.length v
216 | 
217 | instance (ToField a, ToField b) => ToRecord (a, b) where
218 |     toRecord (a, b) = V.fromList [toField a, toField b]
219 | 
220 | instance (FromField a, FromField b, FromField c) => FromRecord (a, b, c) where
221 |     parseRecord v
222 |         | n == 3    = (,,) <$> unsafeIndex v 0
223 |                            <*> unsafeIndex v 1
224 |                            <*> unsafeIndex v 2
225 |         | otherwise = lengthMismatch 3 v
226 |           where
227 |             n = V.length v
228 | 
229 | instance (ToField a, ToField b, ToField c) =>
230 |          ToRecord (a, b, c) where
231 |     toRecord (a, b, c) = V.fromList [toField a, toField b, toField c]
232 | 
233 | instance (FromField a, FromField b, FromField c, FromField d) =>
234 |          FromRecord (a, b, c, d) where
235 |     parseRecord v
236 |         | n == 4    = (,,,) <$> unsafeIndex v 0
237 |                             <*> unsafeIndex v 1
238 |                             <*> unsafeIndex v 2
239 |                             <*> unsafeIndex v 3
240 |         | otherwise = lengthMismatch 4 v
241 |           where
242 |             n = V.length v
243 | 
244 | instance (ToField a, ToField b, ToField c, ToField d) =>
245 |          ToRecord (a, b, c, d) where
246 |     toRecord (a, b, c, d) = V.fromList [
247 |         toField a, toField b, toField c, toField d]
248 | 
249 | instance (FromField a, FromField b, FromField c, FromField d, FromField e) =>
250 |          FromRecord (a, b, c, d, e) where
251 |     parseRecord v
252 |         | n == 5    = (,,,,) <$> unsafeIndex v 0
253 |                              <*> unsafeIndex v 1
254 |                              <*> unsafeIndex v 2
255 |                              <*> unsafeIndex v 3
256 |                              <*> unsafeIndex v 4
257 |         | otherwise = lengthMismatch 5 v
258 |           where
259 |             n = V.length v
260 | 
261 | instance (ToField a, ToField b, ToField c, ToField d, ToField e) =>
262 |          ToRecord (a, b, c, d, e) where
263 |     toRecord (a, b, c, d, e) = V.fromList [
264 |         toField a, toField b, toField c, toField d, toField e]
265 | 
266 | instance (FromField a, FromField b, FromField c, FromField d, FromField e,
267 |           FromField f) =>
268 |          FromRecord (a, b, c, d, e, f) where
269 |     parseRecord v
270 |         | n == 6    = (,,,,,) <$> unsafeIndex v 0
271 |                               <*> unsafeIndex v 1
272 |                               <*> unsafeIndex v 2
273 |                               <*> unsafeIndex v 3
274 |                               <*> unsafeIndex v 4
275 |                               <*> unsafeIndex v 5
276 |         | otherwise = lengthMismatch 6 v
277 |           where
278 |             n = V.length v
279 | 
280 | instance (ToField a, ToField b, ToField c, ToField d, ToField e, ToField f) =>
281 |          ToRecord (a, b, c, d, e, f) where
282 |     toRecord (a, b, c, d, e, f) = V.fromList [
283 |         toField a, toField b, toField c, toField d, toField e, toField f]
284 | 
285 | instance (FromField a, FromField b, FromField c, FromField d, FromField e,
286 |           FromField f, FromField g) =>
287 |          FromRecord (a, b, c, d, e, f, g) where
288 |     parseRecord v
289 |         | n == 7    = (,,,,,,) <$> unsafeIndex v 0
290 |                                <*> unsafeIndex v 1
291 |                                <*> unsafeIndex v 2
292 |                                <*> unsafeIndex v 3
293 |                                <*> unsafeIndex v 4
294 |                                <*> unsafeIndex v 5
295 |                                <*> unsafeIndex v 6
296 |         | otherwise = lengthMismatch 7 v
297 |           where
298 |             n = V.length v
299 | 
300 | instance (ToField a, ToField b, ToField c, ToField d, ToField e, ToField f,
301 |           ToField g) =>
302 |          ToRecord (a, b, c, d, e, f, g) where
303 |     toRecord (a, b, c, d, e, f, g) = V.fromList [
304 |         toField a, toField b, toField c, toField d, toField e, toField f,
305 |         toField g]
306 | 
307 | lengthMismatch :: Int -> Record -> Parser a
308 | lengthMismatch expected v =
309 |     fail $ "cannot unpack array of length " ++
310 |     show n ++ " into a " ++ desired ++ ". Input record: " ++
311 |     show v
312 |   where
313 |     n = V.length v
314 |     desired | expected == 1 = "Only"
315 |             | expected == 2 = "pair"
316 |             | otherwise     = show expected ++ "-tuple"
317 | 
318 | instance FromField a => FromRecord [a] where
319 |     parseRecord = DT.traverse parseField . V.toList
320 | 
321 | instance ToField a => ToRecord [a] where
322 |     toRecord = V.fromList . map toField
323 | 
324 | instance FromField a => FromRecord (V.Vector a) where
325 |     parseRecord = traverse parseField
326 | 
327 | instance ToField a => ToRecord (Vector a) where
328 |     toRecord = V.map toField
329 | 
330 | instance (FromField a, U.Unbox a) => FromRecord (U.Vector a) where
331 |     parseRecord = fmap U.convert . traverse parseField
332 | 
333 | instance (ToField a, U.Unbox a) => ToRecord (U.Vector a) where
334 |     toRecord = V.map toField . U.convert
335 | 
336 | ------------------------------------------------------------------------
337 | -- Name-based conversion
338 | 
339 | -- | A type that can be converted from a single CSV record, with the
340 | -- possibility of failure.
341 | --
342 | -- When writing an instance, use 'empty', 'mzero', or 'fail' to make a
343 | -- conversion fail, e.g. if a 'Record' has the wrong number of
344 | -- columns.
345 | --
346 | -- Given this example data:
347 | --
348 | -- > name,age
349 | -- > John,56
350 | -- > Jane,55
351 | --
352 | -- here's an example type and instance:
353 | --
354 | -- > {-# LANGUAGE OverloadedStrings #-}
355 | -- >
356 | -- > data Person = Person { name :: !Text, age :: !Int }
357 | -- >
358 | -- > instance FromNamedRecord Person where
359 | -- >     parseNamedRecord m = Person <$>
360 | -- >                          m .: "name" <*>
361 | -- >                          m .: "age"
362 | --
363 | -- Note the use of the @OverloadedStrings@ language extension which
364 | -- enables 'B8.ByteString' values to be written as string literals.
365 | class FromNamedRecord a where
366 |     parseNamedRecord :: NamedRecord -> Parser a
367 | 
368 | #ifdef GENERICS
369 |     default parseNamedRecord :: (Generic a, GFromNamedRecord (Rep a)) => NamedRecord -> Parser a
370 |     parseNamedRecord r = to <$> gparseNamedRecord r
371 | #endif
372 | 
373 | class FromNamedRecordOrdered a where
374 |     parseNamedRecordOrdered :: NamedRecordOrdered -> Parser a
375 | 
376 | -- | A type that can be converted to a single CSV record.
377 | --
378 | -- An example type and instance:
379 | --
380 | -- > data Person = Person { name :: !Text, age :: !Int }
381 | -- >
382 | -- > instance ToNamedRecord Person where
383 | -- >     toNamedRecord (Person name age) = namedRecord [
384 | -- >         "name" .= name, "age" .= age]
385 | class ToNamedRecord a where
386 |     toNamedRecord :: a -> NamedRecord
387 | 
388 | #ifdef GENERICS
389 |     default toNamedRecord :: (Generic a, GToRecord (Rep a) (B.ByteString, B.ByteString)) => a -> NamedRecord
390 |     toNamedRecord = namedRecord . gtoRecord . from
391 | #endif
392 | 
393 | class ToNamedRecordOrdered a where
394 |     toNamedRecordOrdered :: a -> NamedRecordOrdered
395 | 
396 | instance FromField a => FromNamedRecord (M.Map B.ByteString a) where
397 |     parseNamedRecord m = traverse parseField m
398 | 
399 | instance FromField a => FromNamedRecordOrdered (MO.OMap B.ByteString a) where
400 |     parseNamedRecordOrdered m = traverse parseField m
401 | 
402 | instance ToField a => ToNamedRecord (M.Map B.ByteString a) where
403 |     toNamedRecord = M.map toField
404 | 
405 | instance ToField a => ToNamedRecordOrdered (MO.OMap B.ByteString a) where
406 |     toNamedRecordOrdered a = MO.fromList $ map (fmap toField) $ MO.assocs a
407 | 
408 | -- instance FromField a => FromNamedRecord (HM.HashMap B.ByteString a) where
409 | --     parseNamedRecord m = traverse (\ s -> parseField s) m
410 | 
411 | -- instance ToField a => ToNamedRecord (HM.HashMap B.ByteString a) where
412 | --     toNamedRecord = HM.map toField
413 | 
414 | ------------------------------------------------------------------------
415 | -- Individual field conversion
416 | 
417 | -- | A type that can be converted from a single CSV field, with the
418 | -- possibility of failure.
419 | --
420 | -- When writing an instance, use 'empty', 'mzero', or 'fail' to make a
421 | -- conversion fail, e.g. if a 'Field' can't be converted to the given
422 | -- type.
423 | --
424 | -- Example type and instance:
425 | --
426 | -- > {-# LANGUAGE OverloadedStrings #-}
427 | -- >
428 | -- > data Color = Red | Green | Blue
429 | -- >
430 | -- > instance FromField Color where
431 | -- >     parseField s
432 | -- >         | s == "R"  = pure Red
433 | -- >         | s == "G"  = pure Green
434 | -- >         | s == "B"  = pure Blue
435 | -- >         | otherwise = mzero
436 | class FromField a where
437 |     parseField :: Field -> Parser a
438 | 
439 | -- | A type that can be converted to a single CSV field.
440 | --
441 | -- Example type and instance:
442 | --
443 | -- > {-# LANGUAGE OverloadedStrings #-}
444 | -- >
445 | -- > data Color = Red | Green | Blue
446 | -- >
447 | -- > instance ToField Color where
448 | -- >     toField Red   = "R"
449 | -- >     toField Green = "G"
450 | -- >     toField Blue  = "B"
451 | class ToField a where
452 |     toField :: a -> Field
453 | 
454 | -- | 'Nothing' if the 'Field' is 'B.empty', 'Just' otherwise.
455 | instance FromField a => FromField (Maybe a) where
456 |     parseField s
457 |         | B.null s  = pure Nothing
458 |         | otherwise = Just <$> parseField s
459 |     {-# INLINE parseField #-}
460 | 
461 | -- | 'Nothing' is encoded as an 'B.empty' field.
462 | instance ToField a => ToField (Maybe a) where
463 |     toField = maybe B.empty toField
464 |     {-# INLINE toField #-}
465 | 
466 | -- | Ignores the 'Field'. Always succeeds.
467 | instance FromField () where
468 |     parseField _ = pure ()
469 |     {-# INLINE parseField #-}
470 | 
471 | -- | Assumes UTF-8 encoding.
472 | instance FromField Char where
473 |     parseField s =
474 |         case T.decodeUtf8' s of
475 |           Left e -> fail $ show e
476 |           Right t
477 |             | T.compareLength t 1 == EQ -> pure (T.head t)
478 |             | otherwise -> typeError "Char" s Nothing
479 |     {-# INLINE parseField #-}
480 | 
481 | -- | Uses UTF-8 encoding.
482 | instance ToField Char where
483 |     toField = toField . T.encodeUtf8 . T.singleton
484 |     {-# INLINE toField #-}
485 | 
486 | -- | Accepts same syntax as 'rational'.
487 | instance FromField Double where
488 |     parseField = parseDouble
489 |     {-# INLINE parseField #-}
490 | 
491 | -- | Uses decimal notation or scientific notation, depending on the
492 | -- number.
493 | instance ToField Double where
494 |     toField = realFloat
495 |     {-# INLINE toField #-}
496 | 
497 | -- | Accepts same syntax as 'rational'.
498 | instance FromField Float where
499 |     parseField s = double2Float <$> parseDouble s
500 |     {-# INLINE parseField #-}
501 | 
502 | -- | Uses decimal notation or scientific notation, depending on the
503 | -- number.
504 | instance ToField Float where
505 |     toField = realFloat
506 |     {-# INLINE toField #-}
507 | 
508 | parseDouble :: B.ByteString -> Parser Double
509 | parseDouble s = case parseOnly double s of
510 |     Left err -> typeError "Double" s (Just err)
511 |     Right n  -> pure n
512 | {-# INLINE parseDouble #-}
513 | 
514 | -- | Accepts a signed decimal number.
515 | instance FromField Int where
516 |     parseField = parseSigned "Int"
517 |     {-# INLINE parseField #-}
518 | 
519 | -- | Uses decimal encoding with optional sign.
520 | instance ToField Int where
521 |     toField = decimal
522 |     {-# INLINE toField #-}
523 | 
524 | -- | Accepts a signed decimal number.
525 | instance FromField Integer where
526 |     parseField = parseSigned "Integer"
527 |     {-# INLINE parseField #-}
528 | 
529 | -- | Uses decimal encoding with optional sign.
530 | instance ToField Integer where
531 |     toField = decimal
532 |     {-# INLINE toField #-}
533 | 
534 | -- | Accepts a signed decimal number.
535 | instance FromField Int8 where
536 |     parseField = parseSigned "Int8"
537 |     {-# INLINE parseField #-}
538 | 
539 | -- | Uses decimal encoding with optional sign.
540 | instance ToField Int8 where
541 |     toField = decimal
542 |     {-# INLINE toField #-}
543 | 
544 | -- | Accepts a signed decimal number.
545 | instance FromField Int16 where
546 |     parseField = parseSigned "Int16"
547 |     {-# INLINE parseField #-}
548 | 
549 | -- | Uses decimal encoding with optional sign.
550 | instance ToField Int16 where
551 |     toField = decimal
552 |     {-# INLINE toField #-}
553 | 
554 | -- | Accepts a signed decimal number.
555 | instance FromField Int32 where
556 |     parseField = parseSigned "Int32"
557 |     {-# INLINE parseField #-}
558 | 
559 | -- | Uses decimal encoding with optional sign.
560 | instance ToField Int32 where
561 |     toField = decimal
562 |     {-# INLINE toField #-}
563 | 
564 | -- | Accepts a signed decimal number.
565 | instance FromField Int64 where
566 |     parseField = parseSigned "Int64"
567 |     {-# INLINE parseField #-}
568 | 
569 | -- | Uses decimal encoding with optional sign.
570 | instance ToField Int64 where
571 |     toField = decimal
572 |     {-# INLINE toField #-}
573 | 
574 | -- | Accepts an unsigned decimal number.
575 | instance FromField W.Word where
576 |     parseField = parseUnsigned "Word"
577 |     {-# INLINE parseField #-}
578 | 
579 | -- | Uses decimal encoding.
580 | instance ToField Word where
581 |     toField = decimal
582 |     {-# INLINE toField #-}
583 | 
584 | -- | Accepts an unsigned decimal number.
585 | instance FromField Word8 where
586 |     parseField = parseUnsigned "Word8"
587 |     {-# INLINE parseField #-}
588 | 
589 | -- | Uses decimal encoding.
590 | instance ToField Word8 where
591 |     toField = decimal
592 |     {-# INLINE toField #-}
593 | 
594 | -- | Accepts an unsigned decimal number.
595 | instance FromField Word16 where
596 |     parseField = parseUnsigned "Word16"
597 |     {-# INLINE parseField #-}
598 | 
599 | -- | Uses decimal encoding.
600 | instance ToField Word16 where
601 |     toField = decimal
602 |     {-# INLINE toField #-}
603 | 
604 | -- | Accepts an unsigned decimal number.
605 | instance FromField Word32 where
606 |     parseField = parseUnsigned "Word32"
607 |     {-# INLINE parseField #-}
608 | 
609 | -- | Uses decimal encoding.
610 | instance ToField Word32 where
611 |     toField = decimal
612 |     {-# INLINE toField #-}
613 | 
614 | -- | Accepts an unsigned decimal number.
615 | instance FromField Word64 where
616 |     parseField = parseUnsigned "Word64"
617 |     {-# INLINE parseField #-}
618 | 
619 | -- | Uses decimal encoding.
620 | instance ToField Word64 where
621 |     toField = decimal
622 |     {-# INLINE toField #-}
623 | 
624 | instance FromField B.ByteString where
625 |     parseField = pure
626 |     {-# INLINE parseField #-}
627 | 
628 | instance ToField B.ByteString where
629 |     toField = id
630 |     {-# INLINE toField #-}
631 | 
632 | instance FromField L.ByteString where
633 |     parseField = pure . fromStrict
634 |     {-# INLINE parseField #-}
635 | 
636 | instance ToField L.ByteString where
637 |     toField = toStrict
638 |     {-# INLINE toField #-}
639 | 
640 | -- | Assumes UTF-8 encoding. Fails on invalid byte sequences.
641 | instance FromField T.Text where
642 |     parseField = either (fail . show) pure . T.decodeUtf8'
643 |     {-# INLINE parseField #-}
644 | 
645 | -- | Uses UTF-8 encoding.
646 | instance ToField T.Text where
647 |     toField = toField . T.encodeUtf8
648 |     {-# INLINE toField #-}
649 | 
650 | -- | Assumes UTF-8 encoding. Fails on invalid byte sequences.
651 | instance FromField LT.Text where
652 |     parseField = either (fail . show) (pure . LT.fromStrict) . T.decodeUtf8'
653 |     {-# INLINE parseField #-}
654 | 
655 | -- | Uses UTF-8 encoding.
656 | instance ToField LT.Text where
657 |     toField = toField . toStrict . LT.encodeUtf8
658 |     {-# INLINE toField #-}
659 | 
660 | -- | Assumes UTF-8 encoding. Fails on invalid byte sequences.
661 | instance FromField [Char] where
662 |     parseField = fmap T.unpack . parseField
663 |     {-# INLINE parseField #-}
664 | 
665 | -- | Uses UTF-8 encoding.
666 | instance ToField [Char] where
667 |     toField = toField . T.pack
668 |     {-# INLINE toField #-}
669 | 
670 | parseSigned :: (Integral a) => String -> B.ByteString -> Parser a
671 | parseSigned typ s = case parseOnly (A8.signed A8.decimal) s of
672 |     Left err -> typeError typ s (Just err)
673 |     Right n  -> pure n
674 | {-# INLINE parseSigned #-}
675 | 
676 | parseUnsigned :: Integral a => String -> B.ByteString -> Parser a
677 | parseUnsigned typ s = case parseOnly A8.decimal s of
678 |     Left err -> typeError typ s (Just err)
679 |     Right n  -> pure n
680 | {-# INLINE parseUnsigned #-}
681 | 
682 | typeError :: String -> B.ByteString -> Maybe String -> Parser a
683 | typeError typ s mmsg =
684 |     fail $ "expected " ++ typ ++ ", got " ++ show (B8.unpack s) ++ cause
685 |   where
686 |     cause = case mmsg of
687 |         Just msg -> " (" ++ msg ++ ")"
688 |         Nothing  -> ""
689 | 
690 | ------------------------------------------------------------------------
691 | -- Constructors and accessors
692 | 
693 | -- | Retrieve the /n/th field in the given record. The result is
694 | -- 'empty' if the value cannot be converted to the desired type.
695 | -- Raises an exception if the index is out of bounds.
696 | --
697 | -- 'index' is a simple convenience function that is equivalent to
698 | -- @'parseField' (v '!' idx)@. If you're certain that the index is not
699 | -- out of bounds, using 'unsafeIndex' is somewhat faster.
700 | index :: FromField a => Record -> Int -> Parser a
701 | index v idx = parseField (v ! idx)
702 | {-# INLINE index #-}
703 | 
704 | -- | Alias for 'index'.
705 | (.!) :: FromField a => Record -> Int -> Parser a
706 | (.!) = index
707 | {-# INLINE (.!) #-}
708 | infixl 9 .!
709 | 
710 | -- | Like 'index' but without bounds checking.
711 | unsafeIndex :: FromField a => Record -> Int -> Parser a
712 | unsafeIndex v idx = parseField (V.unsafeIndex v idx)
713 | {-# INLINE unsafeIndex #-}
714 | 
715 | -- | Retrieve a field in the given record by name.  The result is
716 | -- 'empty' if the field is missing or if the value cannot be converted
717 | -- to the desired type.
718 | lookup :: FromField a => NamedRecord -> B.ByteString -> Parser a
719 | lookup m name = maybe (fail err) parseField $ M.lookup name m
720 |   where err = "no field named " ++ show (B8.unpack name)
721 | {-# INLINE lookup #-}
722 | 
723 | lookupOrdered :: FromField a => NamedRecordOrdered -> B.ByteString -> Parser a
724 | lookupOrdered m name = maybe (fail err) parseField $ MO.lookup name m
725 |   where err = "no field named " ++ show (B8.unpack name)
726 | {-# INLINE lookupOrdered #-}
727 | 
728 | -- | Alias for 'lookup'.
729 | (.:) :: FromField a => NamedRecord -> B.ByteString -> Parser a
730 | (.:) = lookup
731 | {-# INLINE (.:) #-}
732 | 
733 | -- | Construct a pair from a name and a value.  For use with
734 | -- 'namedRecord'.
735 | namedField :: ToField a => B.ByteString -> a -> (B.ByteString, B.ByteString)
736 | namedField name val = (name, toField val)
737 | {-# INLINE namedField #-}
738 | 
739 | -- | Alias for 'namedField'.
740 | (.=) :: ToField a => B.ByteString -> a -> (B.ByteString, B.ByteString)
741 | (.=) = namedField
742 | {-# INLINE (.=) #-}
743 | 
744 | -- | Construct a record from a list of 'B.ByteString's.  Use 'toField'
745 | -- to convert values to 'B.ByteString's for use with 'record'.
746 | record :: [B.ByteString] -> Record
747 | record = V.fromList
748 | 
749 | -- | Construct a named record from a list of name-value 'B.ByteString'
750 | -- pairs.  Use '.=' to construct such a pair from a name and a value.
751 | namedRecord :: [(B.ByteString, B.ByteString)] -> NamedRecord
752 | namedRecord = M.fromList
753 | 
754 | namedRecordOrdered :: [(B.ByteString, B.ByteString)] -> NamedRecordOrdered
755 | namedRecordOrdered = MO.fromList
756 | 
757 | ------------------------------------------------------------------------
758 | -- Parser for converting records to data types
759 | 
760 | -- | Failure continuation.
761 | type Failure f r   = String -> f r
762 | -- | Success continuation.
763 | type Success a f r = a -> f r
764 | 
765 | -- | Conversion of a field to a value might fail e.g. if the field is
766 | -- malformed. This possibility is captured by the 'Parser' type, which
767 | -- lets you compose several field conversions together in such a way
768 | -- that if any of them fail, the whole record conversion fails.
769 | newtype Parser a = Parser {
770 |       unParser :: forall f r.
771 |                   Failure f r
772 |                -> Success a f r
773 |                -> f r
774 |     }
775 | 
776 | instance Monad Parser where
777 |     m >>= g = Parser $ \kf ks -> let ks' a = unParser (g a) kf ks
778 |                                  in unParser m kf ks'
779 |     {-# INLINE (>>=) #-}
780 | 
781 | #if MIN_VERSION_base(4,13,0)
782 | instance MonadFail Parser where
783 | #endif
784 | 
785 |     fail msg = Parser $ \kf _ks -> kf msg
786 |     {-# INLINE fail #-}
787 | 
788 | instance Functor Parser where
789 |     fmap f m = Parser $ \kf ks -> let ks' a = ks (f a)
790 |                                   in unParser m kf ks'
791 |     {-# INLINE fmap #-}
792 | 
793 | instance Applicative Parser where
794 | 
795 |     (<*>) = apP
796 |     {-# INLINE (<*>) #-}
797 |     pure a = Parser $ \_kf ks -> ks a
798 |     {-# INLINE pure #-}
799 | 
800 | instance Alternative Parser where
801 |     empty = fail "empty"
802 |     {-# INLINE empty #-}
803 |     (<|>) = mplus
804 |     {-# INLINE (<|>) #-}
805 | 
806 | instance MonadPlus Parser where
807 |     mzero = fail "mzero"
808 |     {-# INLINE mzero #-}
809 |     mplus a b = Parser $ \kf ks -> let kf' _ = unParser b kf ks
810 |                                    in unParser a kf' ks
811 |     {-# INLINE mplus #-}
812 | 
813 | instance Semigroup.Semigroup (Parser a) where
814 |     (<>) = mplus
815 |     {-# INLINE (<>) #-}
816 | 
817 | apP :: Parser (a -> b) -> Parser a -> Parser b
818 | apP d e = do
819 |   b <- d
820 |   a <- e
821 |   return (b a)
822 | {-# INLINE apP #-}
823 | 
824 | -- | Run a 'Parser', returning either @'Left' errMsg@ or @'Right'
825 | -- result@. Forces the value in the 'Left' or 'Right' constructors to
826 | -- weak head normal form.
827 | --
828 | -- You most likely won't need to use this function directly, but it's
829 | -- included for completeness.
830 | runParser :: Parser a -> Either String a
831 | runParser p = unParser p left right
832 |   where
833 |     left !errMsg = Left errMsg
834 |     right !x = Right x
835 | {-# INLINE runParser #-}
836 | 
837 | #ifdef GENERICS
838 | 
839 | class GFromRecord f where
840 |     gparseRecord :: Record -> Parser (f p)
841 | 
842 | instance GFromRecordSum f Record => GFromRecord (M1 i n f) where
843 |     gparseRecord v =
844 |         case (IM.lookup n gparseRecordSum) of
845 |             Nothing -> lengthMismatch n v
846 |             Just p -> M1 <$> p v
847 |       where
848 |         n = V.length v
849 | 
850 | class GFromNamedRecord f where
851 |     gparseNamedRecord :: NamedRecord -> Parser (f p)
852 | 
853 | instance GFromRecordSum f NamedRecord => GFromNamedRecord (M1 i n f) where
854 |     gparseNamedRecord v =
855 |         foldr (\f p -> p <|> M1 <$> f v) empty (IM.elems gparseRecordSum)
856 | 
857 | class GFromRecordSum f r where
858 |     gparseRecordSum :: IM.IntMap (r -> Parser (f p))
859 | 
860 | instance (GFromRecordSum a r, GFromRecordSum b r) => GFromRecordSum (a :+: b) r where
861 |     gparseRecordSum =
862 |         IM.unionWith (\a b r -> a r <|> b r)
863 |             (fmap (L1 <$>) <$> gparseRecordSum)
864 |             (fmap (R1 <$>) <$> gparseRecordSum)
865 | 
866 | instance GFromRecordProd f r => GFromRecordSum (M1 i n f) r where
867 |     gparseRecordSum = IM.singleton n (fmap (M1 <$>) f)
868 |       where
869 |         (n, f) = gparseRecordProd 0
870 | 
871 | class GFromRecordProd f r where
872 |     gparseRecordProd :: Int -> (Int, r -> Parser (f p))
873 | 
874 | instance GFromRecordProd U1 r where
875 |     gparseRecordProd n = (n, const (pure U1))
876 | 
877 | instance (GFromRecordProd a r, GFromRecordProd b r) => GFromRecordProd (a :*: b) r where
878 |     gparseRecordProd n0 = (n2, f)
879 |       where
880 |         f r = (:*:) <$> fa r <*> fb r
881 |         (n1, fa) = gparseRecordProd n0
882 |         (n2, fb) = gparseRecordProd n1
883 | 
884 | instance GFromRecordProd f Record => GFromRecordProd (M1 i n f) Record where
885 |     gparseRecordProd n = fmap (M1 <$>) <$> gparseRecordProd n
886 | 
887 | instance FromField a => GFromRecordProd (K1 i a) Record where
888 |     gparseRecordProd n = (n + 1, \v -> K1 <$> parseField (V.unsafeIndex v n))
889 | 
890 | #if MIN_VERSION_base(4,9,0)
891 | data Proxy (s :: Meta) (f :: Type -> Type) a = Proxy
892 | #else
893 | data Proxy s (f :: * -> *) a = Proxy
894 | #endif
895 | 
896 | instance (FromField a, Selector s) => GFromRecordProd (M1 S s (K1 i a)) NamedRecord where
897 |     gparseRecordProd n = (n + 1, \v -> (M1 . K1) <$> v .: name)
898 |       where
899 |         name = T.encodeUtf8 (T.pack (selName (Proxy :: Proxy s f a)))
900 | 
901 | 
902 | class GToRecord a f where
903 |     gtoRecord :: a p -> [f]
904 | 
905 | instance GToRecord U1 f where
906 |     gtoRecord U1 = []
907 | 
908 | instance (GToRecord a f, GToRecord b f) => GToRecord (a :*: b) f where
909 |     gtoRecord (a :*: b) = gtoRecord a ++ gtoRecord b
910 | 
911 | instance (GToRecord a f, GToRecord b f) => GToRecord (a :+: b) f where
912 |     gtoRecord (L1 a) = gtoRecord a
913 |     gtoRecord (R1 b) = gtoRecord b
914 | 
915 | instance GToRecord a f => GToRecord (M1 D c a) f where
916 |     gtoRecord (M1 a) = gtoRecord a
917 | 
918 | instance GToRecord a f => GToRecord (M1 C c a) f where
919 |     gtoRecord (M1 a) = gtoRecord a
920 | 
921 | instance GToRecord a Field => GToRecord (M1 S c a) Field where
922 |     gtoRecord (M1 a) = gtoRecord a
923 | 
924 | instance ToField a => GToRecord (K1 i a) Field where
925 |     gtoRecord (K1 a) = [toField a]
926 | 
927 | instance (ToField a, Selector s) => GToRecord (M1 S s (K1 i a)) (B.ByteString, B.ByteString) where
928 |     gtoRecord m@(M1 (K1 a)) = [T.encodeUtf8 (T.pack (selName m)) .= toField a]
929 | 
930 | #endif
931 | 


--------------------------------------------------------------------------------
/src/Data/CSV/Conduit/Conversion/Internal.hs:
--------------------------------------------------------------------------------
  1 | module Data.CSV.Conduit.Conversion.Internal
  2 |     ( decimal
  3 |     , realFloat
  4 |     ) where
  5 | 
  6 | import           Blaze.ByteString.Builder
  7 | import           Blaze.ByteString.Builder.Char8
  8 | import           Data.Array.Base                (unsafeAt)
  9 | import           Data.Array.IArray
 10 | import qualified Data.ByteString                as B
 11 | import           Data.Char                      (ord)
 12 | import           Data.Int
 13 | import           Data.Word
 14 | 
 15 | import           Data.CSV.Conduit.Monoid        as Monoid ((<>))
 16 | 
 17 | ------------------------------------------------------------------------
 18 | -- Integers
 19 | 
 20 | decimal :: Integral a => a -> B.ByteString
 21 | decimal = toByteString . formatDecimal
 22 | {-# INLINE decimal #-}
 23 | 
 24 | -- TODO: Add an optimized version for Integer.
 25 | 
 26 | formatDecimal :: Integral a => a -> Builder
 27 | {-# RULES "formatDecimal/Int" formatDecimal = formatBoundedSigned
 28 |     :: Int -> Builder #-}
 29 | {-# RULES "formatDecimal/Int8" formatDecimal = formatBoundedSigned
 30 |     :: Int8 -> Builder #-}
 31 | {-# RULES "formatDecimal/Int16" formatDecimal = formatBoundedSigned
 32 |     :: Int16 -> Builder #-}
 33 | {-# RULES "formatDecimal/Int32" formatDecimal = formatBoundedSigned
 34 |     :: Int32 -> Builder #-}
 35 | {-# RULES "formatDecimal/Int64" formatDecimal = formatBoundedSigned
 36 |     :: Int64 -> Builder #-}
 37 | {-# RULES "formatDecimal/Word" formatDecimal = formatPositive
 38 |     :: Word -> Builder #-}
 39 | {-# RULES "formatDecimal/Word8" formatDecimal = formatPositive
 40 |     :: Word8 -> Builder #-}
 41 | {-# RULES "formatDecimal/Word16" formatDecimal = formatPositive
 42 |     :: Word16 -> Builder #-}
 43 | {-# RULES "formatDecimal/Word32" formatDecimal = formatPositive
 44 |     :: Word32 -> Builder #-}
 45 | {-# RULES "formatDecimal/Word64" formatDecimal = formatPositive
 46 |     :: Word64 -> Builder #-}
 47 | {-# NOINLINE formatDecimal #-}
 48 | formatDecimal i
 49 |     | i < 0     = minus Monoid.<>
 50 |                   if i <= -128
 51 |                   then formatPositive (-(i `quot` 10)) <> digit (-(i `rem` 10))
 52 |                   else formatPositive (-i)
 53 |     | otherwise = formatPositive i
 54 | 
 55 | formatBoundedSigned :: (Integral a, Bounded a) => a -> Builder
 56 | {-# SPECIALIZE formatBoundedSigned :: Int -> Builder #-}
 57 | {-# SPECIALIZE formatBoundedSigned :: Int8 -> Builder #-}
 58 | {-# SPECIALIZE formatBoundedSigned :: Int16 -> Builder #-}
 59 | {-# SPECIALIZE formatBoundedSigned :: Int32 -> Builder #-}
 60 | {-# SPECIALIZE formatBoundedSigned :: Int64 -> Builder #-}
 61 | formatBoundedSigned i
 62 |     | i < 0     = minus <>
 63 |                   if i == minBound
 64 |                   then formatPositive (-(i `quot` 10)) <> digit (-(i `rem` 10))
 65 |                   else formatPositive (-i)
 66 |     | otherwise = formatPositive i
 67 | 
 68 | formatPositive :: Integral a => a -> Builder
 69 | {-# SPECIALIZE formatPositive :: Int -> Builder #-}
 70 | {-# SPECIALIZE formatPositive :: Int8 -> Builder #-}
 71 | {-# SPECIALIZE formatPositive :: Int16 -> Builder #-}
 72 | {-# SPECIALIZE formatPositive :: Int32 -> Builder #-}
 73 | {-# SPECIALIZE formatPositive :: Int64 -> Builder #-}
 74 | {-# SPECIALIZE formatPositive :: Word -> Builder #-}
 75 | {-# SPECIALIZE formatPositive :: Word8 -> Builder #-}
 76 | {-# SPECIALIZE formatPositive :: Word16 -> Builder #-}
 77 | {-# SPECIALIZE formatPositive :: Word32 -> Builder #-}
 78 | {-# SPECIALIZE formatPositive :: Word64 -> Builder #-}
 79 | formatPositive = go
 80 |   where go n | n < 10    = digit n
 81 |              | otherwise = go (n `quot` 10) <> digit (n `rem` 10)
 82 | 
 83 | minus :: Builder
 84 | minus = fromWord8 45
 85 | 
 86 | zero :: Word8
 87 | zero = 48
 88 | 
 89 | digit :: Integral a => a -> Builder
 90 | digit n = fromWord8 $! i2w (fromIntegral n)
 91 | {-# INLINE digit #-}
 92 | 
 93 | i2w :: Int -> Word8
 94 | i2w i = zero + fromIntegral i
 95 | {-# INLINE i2w #-}
 96 | 
 97 | ------------------------------------------------------------------------
 98 | -- Floating point numbers
 99 | 
100 | realFloat :: RealFloat a => a -> B.ByteString
101 | {-# SPECIALIZE realFloat :: Float -> B.ByteString #-}
102 | {-# SPECIALIZE realFloat :: Double -> B.ByteString #-}
103 | realFloat = toByteString . formatRealFloat Generic
104 | 
105 | -- | Control the rendering of floating point numbers.
106 | data FPFormat = Exponent
107 |               -- ^ Scientific notation (e.g. @2.3e123@).
108 |               | Fixed
109 |               -- ^ Standard decimal notation.
110 |               | Generic
111 |               -- ^ Use decimal notation for values between @0.1@ and
112 |               -- @9,999,999@, and scientific notation otherwise.
113 |                 deriving (Enum, Read, Show)
114 | 
115 | formatRealFloat :: RealFloat a => FPFormat -> a -> Builder
116 | {-# SPECIALIZE formatRealFloat :: FPFormat -> Float -> Builder #-}
117 | {-# SPECIALIZE formatRealFloat :: FPFormat -> Double -> Builder #-}
118 | formatRealFloat fmt x
119 |    | isNaN x                   = fromString "NaN"
120 |    | isInfinite x              = if x < 0
121 |                                  then fromString "-Infinity"
122 |                                  else fromString "Infinity"
123 |    | x < 0 || isNegativeZero x = minus <> doFmt fmt (floatToDigits (-x))
124 |    | otherwise                 = doFmt fmt (floatToDigits x)
125 |  where
126 |   doFmt format (is, e) =
127 |     let ds = map i2d is in
128 |     case format of
129 |      Generic ->
130 |       doFmt (if e < 0 || e > 7 then Exponent else Fixed)
131 |             (is,e)
132 |      Exponent ->
133 |         let show_e' = formatDecimal (e-1) in
134 |         case ds of
135 |           [48]    -> fromString "0.0e0"
136 |           [d]     -> fromWord8 d <> fromString ".0e" <> show_e'
137 |           (d:ds') -> fromWord8 d <> fromChar '.' <> fromWord8s ds' <>
138 |                      fromChar 'e' <> show_e'
139 |           []      -> error "formatRealFloat/doFmt/Exponent: []"
140 |      Fixed
141 |           | e <= 0    -> fromString "0." <>
142 |                          fromByteString (B.replicate (-e) zero) <>
143 |                          fromWord8s ds
144 |           | otherwise ->
145 |              let
146 |                 f 0 s    rs  = mk0 (reverse s) <> fromChar '.' <> mk0 rs
147 |                 f n s    []  = f (n-1) (zero:s) []
148 |                 f n s (r:rs) = f (n-1) (r:s) rs
149 |              in
150 |                 f e [] ds
151 |        where mk0 ls = case ls of { [] -> fromWord8 zero ; _ -> fromWord8s ls}
152 | 
153 | -- Based on "Printing Floating-Point Numbers Quickly and Accurately"
154 | -- by R.G. Burger and R.K. Dybvig in PLDI 96.
155 | -- This version uses a much slower logarithm estimator. It should be improved.
156 | 
157 | -- | 'floatToDigits' takes a base and a non-negative 'RealFloat' number,
158 | -- and returns a list of digits and an exponent.
159 | -- In particular, if @x>=0@, and
160 | --
161 | -- > floatToDigits base x = ([d1,d2,...,dn], e)
162 | --
163 | -- then
164 | --
165 | --      (1) @n >= 1@
166 | --
167 | --      (2) @x = 0.d1d2...dn * (base**e)@
168 | --
169 | --      (3) @0 <= di <= base-1@
170 | 
171 | floatToDigits :: (RealFloat a) => a -> ([Int], Int)
172 | {-# SPECIALIZE floatToDigits :: Float -> ([Int], Int) #-}
173 | {-# SPECIALIZE floatToDigits :: Double -> ([Int], Int) #-}
174 | floatToDigits 0 = ([0], 0)
175 | floatToDigits x =
176 |  let
177 |   (f0, e0) = decodeFloat x
178 |   (minExp0, _) = floatRange x
179 |   p = floatDigits x
180 |   b = floatRadix x
181 |   minExp = minExp0 - p -- the real minimum exponent
182 |   -- Haskell requires that f be adjusted so denormalized numbers
183 |   -- will have an impossibly low exponent.  Adjust for this.
184 |   (f, e) =
185 |    let n = minExp - e0 in
186 |    if n > 0 then (f0 `quot` (expt b n), e0+n) else (f0, e0)
187 |   (r, s, mUp, mDn) =
188 |    if e >= 0 then
189 |     let be = expt b e in
190 |     if f == expt b (p-1) then
191 |       (f*be*b*2, 2*b, be*b, be)     -- according to Burger and Dybvig
192 |     else
193 |       (f*be*2, 2, be, be)
194 |    else
195 |     if e > minExp && f == expt b (p-1) then
196 |       (f*b*2, expt b (-e+1)*2, b, 1)
197 |     else
198 |       (f*2, expt b (-e)*2, 1, 1)
199 |   k :: Int
200 |   k =
201 |    let
202 |     k0 :: Int
203 |     k0 =
204 |      if b == 2 then
205 |         -- logBase 10 2 is very slightly larger than 8651/28738
206 |         -- (about 5.3558e-10), so if log x >= 0, the approximation
207 |         -- k1 is too small, hence we add one and need one fixup step less.
208 |         -- If log x < 0, the approximation errs rather on the high side.
209 |         -- That is usually more than compensated for by ignoring the
210 |         -- fractional part of logBase 2 x, but when x is a power of 1/2
211 |         -- or slightly larger and the exponent is a multiple of the
212 |         -- denominator of the rational approximation to logBase 10 2,
213 |         -- k1 is larger than logBase 10 x. If k1 > 1 + logBase 10 x,
214 |         -- we get a leading zero-digit we don't want.
215 |         -- With the approximation 3/10, this happened for
216 |         -- 0.5^1030, 0.5^1040, ..., 0.5^1070 and values close above.
217 |         -- The approximation 8651/28738 guarantees k1 < 1 + logBase 10 x
218 |         -- for IEEE-ish floating point types with exponent fields
219 |         -- <= 17 bits and mantissae of several thousand bits, earlier
220 |         -- convergents to logBase 10 2 would fail for long double.
221 |         -- Using quot instead of div is a little faster and requires
222 |         -- fewer fixup steps for negative lx.
223 |         let lx = p - 1 + e0
224 |             k1 = (lx * 8651) `quot` 28738
225 |         in if lx >= 0 then k1 + 1 else k1
226 |      else
227 |         -- f :: Integer, log :: Float -> Float,
228 |         --               ceiling :: Float -> Int
229 |         ceiling ((log (fromInteger (f+1) :: Float) +
230 |                  fromIntegral e * log (fromInteger b)) /
231 |                    log 10)
232 | --WAS:            fromInt e * log (fromInteger b))
233 | 
234 |     fixup n =
235 |       if n >= 0 then
236 |         if r + mUp <= expt 10 n * s then n else fixup (n+1)
237 |       else
238 |         if expt 10 (-n) * (r + mUp) <= s then n else fixup (n+1)
239 |    in
240 |    fixup k0
241 | 
242 |   gen ds rn sN mUpN mDnN =
243 |    let
244 |     (dn, rn') = (rn * 10) `quotRem` sN
245 |     mUpN' = mUpN * 10
246 |     mDnN' = mDnN * 10
247 |    in
248 |    case (rn' < mDnN', rn' + mUpN' > sN) of
249 |     (True,  False) -> dn : ds
250 |     (False, True)  -> dn+1 : ds
251 |     (True,  True)  -> if rn' * 2 < sN then dn : ds else dn+1 : ds
252 |     (False, False) -> gen (dn:ds) rn' sN mUpN' mDnN'
253 | 
254 |   rds =
255 |    if k >= 0 then
256 |       gen [] r (s * expt 10 k) mUp mDn
257 |    else
258 |      let bk = expt 10 (-k) in
259 |      gen [] (r * bk) s (mUp * bk) (mDn * bk)
260 |  in
261 |  (map fromIntegral (reverse rds), k)
262 | 
263 | -- Exponentiation with a cache for the most common numbers.
264 | minExpt, maxExpt :: Int
265 | minExpt = 0
266 | maxExpt = 1100
267 | 
268 | expt :: Integer -> Int -> Integer
269 | expt base n
270 |     | base == 2 && n >= minExpt && n <= maxExpt = expts `unsafeAt` n
271 |     | base == 10 && n <= maxExpt10              = expts10 `unsafeAt` n
272 |     | otherwise                                 = base^n
273 | 
274 | expts :: Array Int Integer
275 | expts = array (minExpt,maxExpt) [(n,2^n) | n <- [minExpt .. maxExpt]]
276 | 
277 | maxExpt10 :: Int
278 | maxExpt10 = 324
279 | 
280 | expts10 :: Array Int Integer
281 | expts10 = array (minExpt,maxExpt10) [(n,10^n) | n <- [minExpt .. maxExpt10]]
282 | 
283 | -- | Unsafe conversion for decimal digits.
284 | {-# INLINE i2d #-}
285 | i2d :: Int -> Word8
286 | i2d i = fromIntegral (ord '0' + i)
287 | 


--------------------------------------------------------------------------------
/src/Data/CSV/Conduit/Monoid.hs:
--------------------------------------------------------------------------------
 1 | {-# LANGUAGE CPP #-}
 2 | {-# OPTIONS_GHC -fno-warn-unused-imports #-}
 3 | 
 4 | module Data.CSV.Conduit.Monoid
 5 |     ( (<>)
 6 |     ) where
 7 | 
 8 | import           Data.Monoid
 9 | 
10 | #if !MIN_VERSION_base(4,5,0)
11 | infixr 6 <>
12 | -- | An infix synonym for 'mappend'.
13 | (<>) :: Monoid m => m -> m -> m
14 | (<>) = mappend
15 | {-# INLINE (<>) #-}
16 | #endif
17 | 


--------------------------------------------------------------------------------
/src/Data/CSV/Conduit/Parser/ByteString.hs:
--------------------------------------------------------------------------------
 1 | {-|
 2 | 
 3 |   This module exports the underlying Attoparsec row parser. This is helpful if
 4 |   you want to do some ad-hoc CSV string parsing.
 5 | 
 6 | -}
 7 | 
 8 | module Data.CSV.Conduit.Parser.ByteString
 9 |     ( parseCSV
10 |     , parseRow
11 |     , row
12 |     , csv
13 |     ) where
14 | 
15 | -------------------------------------------------------------------------------
16 | import           Control.Applicative
17 | import           Control.Monad                    (mzero)
18 | import           Data.Attoparsec.ByteString       as P hiding (take)
19 | import qualified Data.Attoparsec.ByteString.Char8 as C8
20 | import           Data.ByteString.Char8            (ByteString)
21 | import qualified Data.ByteString.Char8            as B8
22 | import           Data.Word                        (Word8)
23 | -------------------------------------------------------------------------------
24 | import           Data.CSV.Conduit.Types
25 | 
26 | 
27 | ------------------------------------------------------------------------------
28 | -- | Try to parse given string as CSV
29 | parseCSV :: CSVSettings -> ByteString -> Either String [Row ByteString]
30 | parseCSV s = parseOnly $ csv s
31 | 
32 | 
33 | ------------------------------------------------------------------------------
34 | -- | Try to parse given string as 'Row ByteString'
35 | parseRow :: CSVSettings -> ByteString -> Either String (Maybe (Row ByteString))
36 | parseRow s = parseOnly $ row s
37 | 
38 | 
39 | ------------------------------------------------------------------------------
40 | -- | Parse CSV
41 | csv :: CSVSettings -> Parser [Row ByteString]
42 | csv s = do
43 |   r <- row s
44 |   end <- atEnd
45 |   if end
46 |     then case r of
47 |       Just x -> return [x]
48 |       Nothing -> return []
49 |     else do
50 |       rest <- csv s
51 |       return $ case r of
52 |         Just x -> x : rest
53 |         Nothing -> rest
54 | 
55 | 
56 | ------------------------------------------------------------------------------
57 | -- | Parse a CSV row
58 | row :: CSVSettings -> Parser (Maybe (Row ByteString))
59 | row csvs = csvrow csvs <|> badrow
60 | 
61 | csvEndOfLine :: Parser ()
62 | csvEndOfLine = (word8 10 >> return ()) <|> ((word8 13 *> word8 10) >> return ()) <|> (word8 13 >> return ())
63 | 
64 | badrow :: Parser (Maybe (Row ByteString))
65 | badrow = P.takeWhile (not . C8.isEndOfLine) *>
66 |          (csvEndOfLine <|> C8.endOfInput) *> return Nothing
67 | 
68 | csvrow :: CSVSettings -> Parser (Maybe (Row ByteString))
69 | csvrow c =
70 |   let rowbody = (quotedField' <|> field c) `sepBy` C8.char (csvSep c)
71 |       properrow = rowbody <* (csvEndOfLine <|> P.endOfInput)
72 |       quotedField' = case csvQuoteChar c of
73 |           Nothing -> mzero
74 |           Just q' -> try (quotedField q')
75 |   in do
76 |     res <- properrow
77 |     return $ Just res
78 | 
79 | field :: CSVSettings -> Parser ByteString
80 | field s = P.takeWhile (isFieldChar s)
81 | 
82 | isFieldChar :: CSVSettings -> Word8 -> Bool
83 | isFieldChar s = notInClass xs'
84 |   where xs = csvSep s : "\n\r"
85 |         xs' = case csvQuoteChar s of
86 |           Nothing -> xs
87 |           Just x -> x : xs
88 | 
89 | quotedField :: Char -> Parser ByteString
90 | quotedField c =
91 |   let quoted = string dbl *> return c
92 |       dbl = B8.pack [c,c]
93 |   in do
94 |   _ <- C8.char c
95 |   f <- many (C8.notChar c <|> quoted)
96 |   _ <- C8.char c
97 |   return $ B8.pack f
98 | 


--------------------------------------------------------------------------------
/src/Data/CSV/Conduit/Parser/Text.hs:
--------------------------------------------------------------------------------
 1 | {-|
 2 | 
 3 |   This module exports the underlying Attoparsec row parser. This is helpful if
 4 |   you want to do some ad-hoc CSV string parsing.
 5 | 
 6 | -}
 7 | 
 8 | module Data.CSV.Conduit.Parser.Text
 9 |     ( parseCSV
10 |     , parseRow
11 |     , row
12 |     , csv
13 |     ) where
14 | 
15 | -------------------------------------------------------------------------------
16 | import           Control.Applicative
17 | import           Control.Monad          (mzero)
18 | import           Data.Attoparsec.Text   as P hiding (take)
19 | import qualified Data.Attoparsec.Text   as T
20 | import           Data.Text              (Text)
21 | import qualified Data.Text              as T
22 | -------------------------------------------------------------------------------
23 | import           Data.CSV.Conduit.Types
24 | -------------------------------------------------------------------------------
25 | 
26 | 
27 | ------------------------------------------------------------------------------
28 | -- | Try to parse given string as CSV
29 | parseCSV :: CSVSettings -> Text -> Either String [Row Text]
30 | parseCSV s = parseOnly $ csv s
31 | 
32 | 
33 | ------------------------------------------------------------------------------
34 | -- | Try to parse given string as 'Row Text'
35 | parseRow :: CSVSettings -> Text -> Either String (Maybe (Row Text))
36 | parseRow s = parseOnly $ row s
37 | 
38 | 
39 | ------------------------------------------------------------------------------
40 | -- | Parse CSV
41 | csv :: CSVSettings -> Parser [Row Text]
42 | csv s = do
43 |   r <- row s
44 |   end <- atEnd
45 |   if end
46 |     then case r of
47 |       Just x -> return [x]
48 |       Nothing -> return []
49 |     else do
50 |       rest <- csv s
51 |       return $ case r of
52 |         Just x -> x : rest
53 |         Nothing -> rest
54 | 
55 | 
56 | ------------------------------------------------------------------------------
57 | -- | Parse a CSV row
58 | row :: CSVSettings -> Parser (Maybe (Row Text))
59 | row csvs = csvrow csvs <|> badrow
60 | 
61 | csvEndOfLine :: Parser ()
62 | csvEndOfLine = (char '\n' >> return ()) <|> (string (T.pack "\r\n") >> return ()) <|> (char '\r' >> return ())
63 | 
64 | badrow :: Parser (Maybe (Row Text))
65 | badrow = P.takeWhile (not . T.isEndOfLine) *>
66 |          (csvEndOfLine <|> T.endOfInput) *> return Nothing
67 | 
68 | csvrow :: CSVSettings -> Parser (Maybe (Row Text))
69 | csvrow c =
70 |   let rowbody = (quotedField' <|> field c) `sepBy` T.char (csvSep c)
71 |       properrow = rowbody <* (csvEndOfLine <|> P.endOfInput)
72 |       quotedField' = case csvQuoteChar c of
73 |           Nothing -> mzero
74 |           Just q' -> try (quotedField q')
75 |   in do
76 |     res <- properrow
77 |     return $ Just res
78 | 
79 | field :: CSVSettings -> Parser Text
80 | field s = P.takeWhile (isFieldChar s)
81 | 
82 | isFieldChar :: CSVSettings -> Char -> Bool
83 | isFieldChar s = notInClass xs'
84 |   where xs = csvSep s : "\n\r"
85 |         xs' = case csvQuoteChar s of
86 |           Nothing -> xs
87 |           Just x -> x : xs
88 | 
89 | quotedField :: Char -> Parser Text
90 | quotedField c = do
91 |   let quoted = string dbl *> return c
92 |       dbl = T.pack [c,c]
93 |   _ <- T.char c
94 |   f <- many (T.notChar c <|> quoted)
95 |   _ <- T.char c
96 |   return $ T.pack f
97 | 
98 | 
99 | 


--------------------------------------------------------------------------------
/src/Data/CSV/Conduit/Types.hs:
--------------------------------------------------------------------------------
 1 | {-# LANGUAGE OverloadedStrings    #-}
 2 | {-# LANGUAGE TypeSynonymInstances #-}
 3 | 
 4 | module Data.CSV.Conduit.Types where
 5 | 
 6 | -------------------------------------------------------------------------------
 7 | import           Data.Default
 8 | import qualified Data.Map         as M
 9 | import qualified Data.Map.Ordered as MO
10 | -------------------------------------------------------------------------------
11 | 
12 | data QuoteEmpty = DoQuoteEmpty | DontQuoteEmpty deriving (Show, Eq)
13 | 
14 | -------------------------------------------------------------------------------
15 | -- | Settings for a CSV file. This library is intended to be flexible
16 | -- and offer a way to process the majority of text data files out
17 | -- there.
18 | data CSVSettings = CSVSettings
19 |   {
20 |     -- | Separator character to be used in between fields
21 |     csvSep       :: !Char
22 | 
23 |     -- | Quote character that may sometimes be present around fields.
24 |     -- If 'Nothing' is given, the library will never expect quotation
25 |     -- even if it is present.
26 |   , csvQuoteCharAndStyle :: !(Maybe (Char, QuoteEmpty))
27 |   } deriving (Show, Eq)
28 | 
29 | 
30 | csvQuoteChar :: CSVSettings -> Maybe Char
31 | csvQuoteChar = (fst <$>) . csvQuoteCharAndStyle
32 | 
33 | -------------------------------------------------------------------------------
34 | -- | Default settings for a CSV file.
35 | --
36 | -- > csvSep = ','
37 | -- > csvQuoteChar = Just '"'
38 | --
39 | defCSVSettings :: CSVSettings
40 | defCSVSettings = CSVSettings
41 |   { csvSep = ','
42 |   , csvQuoteCharAndStyle = Just ('"', DoQuoteEmpty)
43 |   }
44 | 
45 | defDontQuoteEmptyCSVSettings :: CSVSettings
46 | defDontQuoteEmptyCSVSettings = CSVSettings
47 |   { csvSep = ','
48 |   , csvQuoteCharAndStyle = Just ('"', DontQuoteEmpty)
49 |   }
50 | 
51 | instance Default CSVSettings where
52 |     def = defCSVSettings
53 | 
54 | -------------------------------------------------------------------------------
55 | -- | A 'Row' is just a list of fields
56 | type Row a = [a]
57 | 
58 | -------------------------------------------------------------------------------
59 | -- | A 'MapRow' is a dictionary based on 'Data.Map' where column names
60 | -- are keys and row's individual cell values are the values of the
61 | -- 'Map'.
62 | type MapRow a = M.Map a a
63 | 
64 | -- | An 'OrderedMapRow' is a dictionary based on 'Data.Map.Ordered' where column
65 | -- names are keys and row's individual cell values are the values of the 'OMap'.
66 | -- Unlike 'MapRow', 'OrderedMapRow' preserves the insertion ordering of columns.
67 | -- 'OrderedMapRow' is a reasonable default in most cases.
68 | type OrderedMapRow a = MO.OMap a a
69 | 


--------------------------------------------------------------------------------
/stack.yaml:
--------------------------------------------------------------------------------
1 | resolver: lts-22.34
2 | packages:
3 |   - .
4 | flags:
5 |   csv-conduit:
6 |     lib-Werror: true
7 | extra-package-dbs: []
8 | 


--------------------------------------------------------------------------------
/test/AdHoc.hs:
--------------------------------------------------------------------------------
 1 | module Main where
 2 | 
 3 | 
 4 | import qualified Data.ByteString.Char8 as B
 5 | import           Data.Conduit
 6 | import           Data.Conduit.Binary
 7 | import           Data.CSV.Conduit
 8 | import           Data.Map              ((!))
 9 | import           System.Directory
10 | import           System.Environment
11 | 
12 | 
13 | -------------------------------------------------------------------------------
14 | main = do
15 |     fi : fo : _ <- getArgs
16 |     runResourceT $ mapCSVFile defCSVSettings f fi fo
17 |     -- runResourceT $ sourceFile fi $$ sinkFile fo
18 | 
19 | 
20 | f :: Row B.ByteString -> [Row B.ByteString]
21 | f = return
22 | 


--------------------------------------------------------------------------------
/test/Test.hs:
--------------------------------------------------------------------------------
  1 | {-# LANGUAGE MultiParamTypeClasses #-}
  2 | {-# LANGUAGE OverloadedStrings #-}
  3 | {-# LANGUAGE ScopedTypeVariables #-}
  4 | 
  5 | module Main where
  6 | 
  7 | import qualified Conduit as C
  8 | import Control.Exception
  9 | import qualified Data.ByteString.Char8 as B
 10 | import Data.CSV.Conduit
 11 | import Data.CSV.Conduit.Conversion
 12 | import Data.CSV.Conduit.Types
 13 | import qualified Data.Map as Map
 14 | import qualified Data.Map.Ordered as OMap
 15 | import Data.Monoid as M
 16 | import Data.Text (Text)
 17 | import qualified Data.Vector as V
 18 | import System.Directory
 19 | import Test.Framework (Test, defaultMain, testGroup)
 20 | import Test.Framework.Providers.HUnit
 21 | import Test.HUnit (assertFailure, (@=?), (@?=))
 22 | 
 23 | main :: IO ()
 24 | main = defaultMain tests
 25 | 
 26 | tests :: [Test]
 27 | tests =
 28 |   [ testGroup "Basic Ops" baseTests,
 29 |     testGroup "decodeCSV" decodeCSVTests
 30 |   ]
 31 | 
 32 | baseTests :: [Test]
 33 | baseTests =
 34 |   [ testCase "mapping with id works" test_identityMap,
 35 |     testCase "simple parsing works" (test_simpleParse testFile1),
 36 |     testCase "simple parsing works for Mac-Excel" (test_simpleParse testFile3),
 37 |     testCase "simple parsing works for Windows-Excel" (test_simpleParse testFile4),
 38 |     testCase "fails parsing gracefully" test_parseFail,
 39 |     testCase "OrderedMap" test_orderedMap
 40 |   ]
 41 | 
 42 | decodeCSVTests :: [Test]
 43 | decodeCSVTests =
 44 |   [ testCase "parses a CSV" $ do
 45 |       let efoos = decodeCSV defCSVSettings ("Foo\nfoo" :: B.ByteString)
 46 |       case efoos :: Either SomeException (V.Vector (Named Foo)) of
 47 |         Left e -> assertFailure (show e)
 48 |         Right foos -> V.fromList [Named Foo] @=? foos,
 49 |     testCase "eats parse errors, evidently" $ do
 50 |       let efoos = decodeCSV defCSVSettings ("Foo\nbad" :: B.ByteString)
 51 |       case efoos :: Either SomeException (V.Vector (Named Foo)) of
 52 |         Left e -> assertFailure (show e)
 53 |         Right foos -> M.mempty @=? foos
 54 |   ]
 55 | 
 56 | data Foo = Foo deriving (Show, Eq)
 57 | 
 58 | instance FromNamedRecord Foo where
 59 |   parseNamedRecord nr = do
 60 |     s <- nr .: "Foo"
 61 |     case s of
 62 |       "foo" -> pure Foo
 63 |       _ -> fail ("Expected \"foo\" but got " <> B.unpack s)
 64 | 
 65 | instance ToNamedRecord Foo where
 66 |   toNamedRecord Foo = namedRecord ["Foo" .= ("foo" :: B.ByteString)]
 67 | 
 68 | test_identityMap :: IO ()
 69 | test_identityMap = do
 70 |   _ <- runResourceT $ mapCSVFile csvSettings f testFile2 outFile
 71 |   f1 <- readFile testFile2
 72 |   f2 <- readFile outFile
 73 |   f1 @=? f2
 74 |   removeFile outFile
 75 |   where
 76 |     outFile = "test/testOut.csv"
 77 |     f :: Row Text -> [Row Text]
 78 |     f = return
 79 | 
 80 | test_simpleParse :: FilePath -> IO ()
 81 | test_simpleParse fp = do
 82 |   (d :: V.Vector (MapRow B.ByteString)) <- readCSVFile csvSettings fp
 83 |   V.mapM_ assertRow d
 84 |   where
 85 |     assertRow r = v3 @=? (v1 + v2)
 86 |       where
 87 |         v1 = readBS $ r Map.! "Col2"
 88 |         v2 = readBS $ r Map.! "Col3"
 89 |         v3 = readBS $ r Map.! "Sum"
 90 | 
 91 | test_parseFail :: IO ()
 92 | test_parseFail = do
 93 |   (d :: V.Vector (MapRow B.ByteString)) <- readCSVFile csvSettings testXLS
 94 |   errored <- catch (V.mapM_ assertRow d >> pure False) handler
 95 |   if errored then pure () else assertFailure "readCSVFile shouldn't read XLS"
 96 |   where
 97 |     handler :: ErrorCall -> IO Bool
 98 |     handler _ = pure True
 99 |     assertRow r = v3 @=? (v1 + v2)
100 |       where
101 |         v1 = readBS $ r Map.! "Col2"
102 |         v2 = readBS $ r Map.! "Col3"
103 |         v3 = readBS $ r Map.! "Sum"
104 | 
105 | test_orderedMap :: IO ()
106 | test_orderedMap = do
107 |   unorderedRes <-
108 |     C.runConduit $
109 |       C.yieldMany [unorderedRow]
110 |         C..| writeHeaders defCSVSettings
111 |         C..| C.foldC
112 |   unorderedRes @?= ("\"a\",\"b\"\n\"aval\",\"bval\"\n" :: B.ByteString)
113 |   orderedRes <-
114 |     C.runConduit $
115 |       C.yieldMany [orderedRow]
116 |         C..| writeHeadersOrdered defCSVSettings
117 |         C..| C.foldC
118 |   orderedRes @?= ("\"b\",\"a\"\n\"bval\",\"aval\"\n" :: B.ByteString)
119 |   where
120 |     orderedRow :: OrderedMapRow Text
121 |     orderedRow = OMap.fromList pairs
122 |     unorderedRow :: MapRow Text
123 |     unorderedRow = Map.fromList pairs
124 |     pairs = [("b", "bval"), ("a", "aval")]
125 | 
126 | csvSettings :: CSVSettings
127 | csvSettings = defCSVSettings {csvQuoteCharAndStyle = Just ('`', DontQuoteEmpty)}
128 | 
129 | testFile1, testFile2, testFile3, testFile4 :: FilePath
130 | testFile1 = "test/test.csv"
131 | testFile2 = "test/test.csv"
132 | testFile3 = "test/test-mac-excel.csv"
133 | testFile4 = "test/test-windows-excel.csv"
134 | 
135 | testXLS :: FilePath
136 | testXLS = "test/test.xls"
137 | 
138 | readBS :: B.ByteString -> Int
139 | readBS = read . B.unpack
140 | 


--------------------------------------------------------------------------------
/test/test-mac-excel.csv:
--------------------------------------------------------------------------------
1 | `Col1`,`Col2`,`Col3`,`Sum``A`,`2`,`3`,`5``B`,`3`,`4`,`7``Field using the quote char ``this is the in-quoted value```,`4`,`5`,`9`


--------------------------------------------------------------------------------
/test/test-windows-excel.csv:
--------------------------------------------------------------------------------
1 | Col1,Col2,Col3,Sum
2 | A,2,3,5
3 | B,3,4,7
4 | "Field using the quote char ""this is the in-quoted value""",4,5,9
5 | 


--------------------------------------------------------------------------------
/test/test.csv:
--------------------------------------------------------------------------------
1 | `Col1`,`Col2`,`Col3`,`Sum`
2 | `A`,`2`,`3`,`5`
3 | `B`,`3`,`4`,`7`
4 | `Field using the quote char ``this is the in-quoted value```,`4`,`5`,`9`
5 | 


--------------------------------------------------------------------------------
/test/test.xls:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ozataman/csv-conduit/5656585611b06f8a97cea9570fb35dba910e9b48/test/test.xls


--------------------------------------------------------------------------------