├── .cspell-base.json ├── .cspell.json ├── .gitattributes ├── .github └── workflows │ ├── virmator-prerelease.yml │ ├── virmator-tagged-release.yml │ └── virmator-tests.yml ├── .gitignore ├── .npmignore ├── .prettierignore ├── .prettierrc.js ├── .vscode └── settings.json ├── LICENSE ├── README.md ├── bulk-sanitize.sh ├── files ├── downloads │ └── .gitignore ├── example-dir │ ├── README.md │ ├── example-sub-dir-2 │ │ └── sub-2-nothing │ ├── example-sub-dir │ │ ├── example-sub-sub-dir │ │ │ └── sub-sub-nothing │ │ ├── sub-nothing │ │ └── sub-nothing-2 │ └── top-nothing ├── sample-files │ ├── dummy.pdf │ ├── pdfkit-out.pdf │ └── sanitized │ │ ├── chase-prime-visa-credit │ │ ├── basic-2.json │ │ ├── basic-3.json │ │ ├── basic.json │ │ ├── newer.json │ │ ├── no-payments.json │ │ └── short.json │ │ ├── citi-costco-visa-credit │ │ ├── basic-2.json │ │ ├── basic.json │ │ ├── no-payments.json │ │ └── post-date.json │ │ ├── paypal │ │ ├── basic.json │ │ ├── big-expense.json │ │ ├── long.json │ │ └── nothing.json │ │ ├── usaa-bank │ │ └── basic.json │ │ └── usaa-visa-credit │ │ ├── basic.json │ │ ├── long-payments.json │ │ ├── multi-line-description.json │ │ ├── short-payments.json │ │ ├── spacing.json │ │ └── two-cards.json └── temp-output │ └── .gitignore ├── package-lock.json ├── package.json ├── src ├── augments │ └── date.ts ├── bash-scripting.ts ├── global.ts ├── index.ts ├── package-contents.test.ts ├── package-version.ts ├── parser │ ├── all-parsers.ts │ ├── implemented-parsers │ │ ├── chase-prime-visa-credit-card-parser.ts │ │ ├── citi-costco-visa-credit-card-parser.ts │ │ ├── example-parser.ts │ │ ├── paypal-parser.ts │ │ ├── usaa-bank-account-parser.ts │ │ └── usaa-visa-credit-card-parser.ts │ ├── parse-api.ts │ ├── parsed-output.ts │ ├── parser-function.ts │ ├── parser-options.ts │ ├── parser-state-machine.ts │ └── statement-parser.ts ├── pdf │ ├── generate-pdf.test.ts │ ├── generate-pdf.ts │ ├── read-pdf.test.ts │ └── read-pdf.ts ├── readme-examples │ ├── all-options.example.ts │ ├── api-simple-parse-inputs.example.ts │ ├── api-simple-parse.example.ts │ ├── better-async.example.ts │ ├── direct-parsing.example.ts │ ├── direct-text-parsing.example.ts │ └── parser-type.example.ts ├── repo-paths.test.ts ├── repo-paths.ts └── sanitizer │ ├── sanitize-for-test-file-cli.test.ts │ ├── sanitize-for-test-file-cli.ts │ ├── sanitized-files.test.ts │ ├── sanitized-test.test.ts │ ├── sanitized-test.ts │ ├── sanitizer.test.ts │ └── sanitizer.ts ├── test-specific-file.sh └── tsconfig.json /.cspell-base.json: -------------------------------------------------------------------------------- 1 | { 2 | "ignorePaths": [ 3 | ".DS_Store", 4 | ".git/", 5 | "dist/", 6 | "node_modules/", 7 | "package-lock.json", 8 | ".cspell-base.json", 9 | ".cspell.json" 10 | ], 11 | "version": "0.2", 12 | "words": [ 13 | "accum", 14 | "adipiscing", 15 | "amet", 16 | "andymckay", 17 | "argb", 18 | "autofix", 19 | "autofixable", 20 | "autofixing", 21 | "config's", 22 | "consectetur", 23 | "cpsell", 24 | "derp", 25 | "derpy", 26 | "electrovir", 27 | "entrypoints", 28 | "esbenp", 29 | "gitattributes", 30 | "globstar", 31 | "locationchange", 32 | "marvinpinto", 33 | "nojekyll", 34 | "npmignore", 35 | "packagejson", 36 | "parens", 37 | "pdfs", 38 | "prettierignore", 39 | "prettierrc", 40 | "pushstate", 41 | "replacestate", 42 | "repo's", 43 | "repos", 44 | "rgba", 45 | "stylelint", 46 | "stylelintrc", 47 | "subcommand", 48 | "thingie", 49 | "threejs", 50 | "transpiled", 51 | "transpiling", 52 | "typeof", 53 | "virmator's", 54 | "virmator" 55 | ] 56 | } 57 | -------------------------------------------------------------------------------- /.cspell.json: -------------------------------------------------------------------------------- 1 | { 2 | "import": ".cspell-base.json", 3 | "words": ["adipiscing", "amet", "transpiled", "citi", "consectetur", "usaa", "pdfs"] 4 | } 5 | -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | * text=auto eol=lf -------------------------------------------------------------------------------- /.github/workflows/virmator-prerelease.yml: -------------------------------------------------------------------------------- 1 | name: branch-prerelease 2 | 3 | on: 4 | workflow_run: 5 | workflows: ['tests'] 6 | branches: '**' 7 | types: 8 | - completed 9 | 10 | jobs: 11 | package-release: 12 | name: 'package release' 13 | runs-on: 'ubuntu-latest' 14 | if: ${{ github.event.workflow_run.conclusion == 'success' }} 15 | steps: 16 | - uses: actions/checkout@v1 17 | - name: Use Node.js 14.x 18 | uses: actions/setup-node@v1 19 | with: 20 | node-version: 14.x 21 | - name: run test 22 | run: | 23 | npm ci 24 | npm run test:full 25 | - name: pack and set vars 26 | id: vars 27 | run: | 28 | branchName="${GITHUB_REF#refs/*/}" 29 | echo ::set-output name=short_ref::"$branchName" 30 | 31 | originalTarName="$(npm pack)" 32 | packageName="$(npm -s run env echo '$npm_package_name')" 33 | timeStamp="$(date +%s)" 34 | newTarName="$packageName-$timeStamp.tgz" 35 | mv "$originalTarName" "$newTarName" 36 | echo ::set-output name=tar_name::"$newTarName" 37 | - uses: 'marvinpinto/action-automatic-releases@latest' 38 | with: 39 | repo_token: '${{ secrets.GITHUB_TOKEN }}' 40 | automatic_release_tag: 'prerelease-${{ steps.vars.outputs.short_ref }}' 41 | prerelease: true 42 | files: | 43 | ${{ steps.vars.outputs.tar_name }} 44 | -------------------------------------------------------------------------------- /.github/workflows/virmator-tagged-release.yml: -------------------------------------------------------------------------------- 1 | name: 'tagged-release' 2 | 3 | on: 4 | push: 5 | tags: 6 | - 'v*' 7 | 8 | jobs: 9 | tagged-release: 10 | name: 'Tagged Release' 11 | runs-on: 'ubuntu-latest' 12 | 13 | steps: 14 | - uses: actions/checkout@v1 15 | - name: Use Node.js 14.x 16 | uses: actions/setup-node@v1 17 | with: 18 | node-version: 14.x 19 | - name: run test 20 | run: | 21 | npm ci 22 | npm run test:full 23 | 24 | - uses: 'marvinpinto/action-automatic-releases@latest' 25 | with: 26 | repo_token: '${{ secrets.GITHUB_TOKEN }}' 27 | prerelease: false 28 | -------------------------------------------------------------------------------- /.github/workflows/virmator-tests.yml: -------------------------------------------------------------------------------- 1 | name: tests 2 | 3 | on: 4 | push: 5 | branches: 6 | - '**' 7 | 8 | jobs: 9 | build: 10 | runs-on: ${{ matrix.os }} 11 | strategy: 12 | fail-fast: false 13 | matrix: 14 | os: [ubuntu-latest, windows-latest, macos-latest] 15 | node-version: [12.x, 14.x] 16 | steps: 17 | - uses: actions/checkout@v1 18 | - name: Use Node.js ${{ matrix.node-version }} 19 | uses: actions/setup-node@v1 20 | with: 21 | node-version: ${{ matrix.node-version }} 22 | - name: run test 23 | run: | 24 | npm ci 25 | npm run test:full 26 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | node_modules 2 | dist 3 | -------------------------------------------------------------------------------- /.npmignore: -------------------------------------------------------------------------------- 1 | .cspell-base.json 2 | .cspell.json 3 | .gitattributes 4 | .github/ 5 | .gitignore 6 | .prettierignore 7 | .prettierrc.js 8 | .vscode/ 9 | *.test.d.ts 10 | *.test.js 11 | dist/test/ 12 | scripts/ 13 | snowpack.config.js 14 | snowpack.config.mjs 15 | src/ 16 | static/ 17 | test-repos/ 18 | tsconfig-base.json 19 | tsconfig.json 20 | 21 | files/ 22 | dist/sanitizer/ 23 | repo-paths* 24 | *.sh 25 | dist/readme* 26 | dist/package-version* 27 | dist/bash* -------------------------------------------------------------------------------- /.prettierignore: -------------------------------------------------------------------------------- 1 | dist/ 2 | node_modules/ 3 | package-lock.json 4 | files/ -------------------------------------------------------------------------------- /.prettierrc.js: -------------------------------------------------------------------------------- 1 | const fs = require('fs'); 2 | const path = require('path'); 3 | 4 | const plugins = [ 5 | 'prettier-plugin-sort-json', 6 | 'prettier-plugin-packagejson', 7 | 'prettier-plugin-organize-imports', 8 | 'prettier-plugin-jsdoc', 9 | ].map((pluginName) => { 10 | const defaultPath = `./node_modules/${pluginName}`; 11 | if (fs.existsSync(path.resolve(__dirname, defaultPath))) { 12 | return defaultPath; 13 | } else { 14 | return `./node_modules/virmator/node_modules/${pluginName}`; 15 | } 16 | }); 17 | 18 | module.exports = { 19 | arrowParens: 'always', 20 | bracketSpacing: false, 21 | endOfLine: 'lf', 22 | htmlWhitespaceSensitivity: 'ignore', 23 | jsonRecursiveSort: true, 24 | bracketSameLine: false, 25 | plugins, 26 | printWidth: 100, 27 | singleQuote: true, 28 | tabWidth: 4, 29 | trailingComma: 'all', 30 | }; 31 | -------------------------------------------------------------------------------- /.vscode/settings.json: -------------------------------------------------------------------------------- 1 | { 2 | "[html]": { 3 | "editor.defaultFormatter": "esbenp.prettier-vscode" 4 | }, 5 | "[javascript]": { 6 | "editor.defaultFormatter": "esbenp.prettier-vscode" 7 | }, 8 | "[json]": { 9 | "editor.defaultFormatter": "esbenp.prettier-vscode" 10 | }, 11 | "[jsonc]": { 12 | "editor.defaultFormatter": "esbenp.prettier-vscode" 13 | }, 14 | "[markdown]": { 15 | "editor.defaultFormatter": "esbenp.prettier-vscode" 16 | }, 17 | "[scss]": { 18 | "editor.defaultFormatter": "esbenp.prettier-vscode" 19 | }, 20 | "[typescript]": { 21 | "editor.defaultFormatter": "esbenp.prettier-vscode" 22 | }, 23 | "[yaml]": { 24 | "editor.defaultFormatter": "esbenp.prettier-vscode" 25 | }, 26 | "editor.formatOnSave": true, 27 | "editor.rulers": [100], 28 | "typescript.tsdk": "node_modules/typescript/lib" 29 | } 30 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2020 electrovir 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. -------------------------------------------------------------------------------- /bulk-sanitize.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | set -e; 3 | 4 | # example usage for all downloads: ./bulk-sanitize.sh files/downloads 5 | # example usage for just paypal: ./bulk-sanitize.sh files/downloads paypal 6 | # example usage with debug: ./bulk-sanitize.sh files/downloads paypal --debug 7 | # example usage with debug: ./bulk-sanitize.sh files/downloads --debug 8 | 9 | searchDir="$1" 10 | 11 | debug="" 12 | if [[ "$*" == *"--debug"* ]]; then 13 | debug="--debug" 14 | fi 15 | 16 | specificParser="" 17 | # I no good bash 18 | if [[ "$#" -gt 1 && -z "$debug" ]]; then 19 | specificParser="$2" 20 | elif [[ "$#" -gt 2 && -n "$debug" ]]; then 21 | if [[ "${*: -1:1}" -eq "--debug" ]]; then 22 | # if the last argument is --debug then use the second to last argument for specificParser 23 | specificParser="${*: -2:1}" 24 | elif [[ "${*: -2:1}" -eq "--debug" ]]; then 25 | # if the second to last argument is --debug then use the last argument for specificParser 26 | specificParser="${*: -1:1}" 27 | else 28 | echo "Where did you put the --debug flag?" 29 | exit 1 30 | fi 31 | fi 32 | 33 | echo "search in: $searchDir" 34 | echo "parser: $specificParser" 35 | echo "debug: $debug" 36 | 37 | if [ -z "$searchDir" ]; then 38 | echo "Search dir is missing." 39 | exit 1; 40 | fi 41 | 42 | npm run compile 43 | 44 | if [ -z "$specificParser" ]; then 45 | for filePath in "$searchDir"/**/*.pdf; do 46 | parserType="$(basename "$(dirname "$filePath")")" 47 | npm run sanitize:no-compile "$parserType" "$filePath" "$(basename "$filePath" .pdf).json" -- "$debug" 48 | done 49 | else 50 | for filePath in "$searchDir"/"$specificParser"/*.pdf; do 51 | npm run sanitize:no-compile "$specificParser" "$filePath" "$(basename "$filePath" .pdf).json" -- "$debug" 52 | done 53 | fi -------------------------------------------------------------------------------- /files/downloads/.gitignore: -------------------------------------------------------------------------------- 1 | * 2 | !.gitignore 3 | -------------------------------------------------------------------------------- /files/example-dir/README.md: -------------------------------------------------------------------------------- 1 | This directory is used for testing the `getAllRecursiveFiles` function. 2 | -------------------------------------------------------------------------------- /files/example-dir/example-sub-dir-2/sub-2-nothing: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/electrovir/statement-parser/74b1adc2b07a28d9b64485a69cf72d8d4b3de53d/files/example-dir/example-sub-dir-2/sub-2-nothing -------------------------------------------------------------------------------- /files/example-dir/example-sub-dir/example-sub-sub-dir/sub-sub-nothing: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/electrovir/statement-parser/74b1adc2b07a28d9b64485a69cf72d8d4b3de53d/files/example-dir/example-sub-dir/example-sub-sub-dir/sub-sub-nothing -------------------------------------------------------------------------------- /files/example-dir/example-sub-dir/sub-nothing: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/electrovir/statement-parser/74b1adc2b07a28d9b64485a69cf72d8d4b3de53d/files/example-dir/example-sub-dir/sub-nothing -------------------------------------------------------------------------------- /files/example-dir/example-sub-dir/sub-nothing-2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/electrovir/statement-parser/74b1adc2b07a28d9b64485a69cf72d8d4b3de53d/files/example-dir/example-sub-dir/sub-nothing-2 -------------------------------------------------------------------------------- /files/example-dir/top-nothing: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/electrovir/statement-parser/74b1adc2b07a28d9b64485a69cf72d8d4b3de53d/files/example-dir/top-nothing -------------------------------------------------------------------------------- /files/sample-files/dummy.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/electrovir/statement-parser/74b1adc2b07a28d9b64485a69cf72d8d4b3de53d/files/sample-files/dummy.pdf -------------------------------------------------------------------------------- /files/sample-files/pdfkit-out.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/electrovir/statement-parser/74b1adc2b07a28d9b64485a69cf72d8d4b3de53d/files/sample-files/pdfkit-out.pdf -------------------------------------------------------------------------------- /files/sample-files/sanitized/chase-prime-visa-credit/basic-2.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "Sanitized files/sample-files/sanitized/chase-prime-visa-credit/basic-2.json", 3 | "output": { 4 | "accountSuffix": "5", 5 | "endDate": "2001-08-12T00:00:00.000Z", 6 | "expenses": [ 7 | { 8 | "amount": 4444.44, 9 | "date": "2007-02-03T00:00:00.000Z", 10 | "description": "l m/o", 11 | "originalText": ["2/3 l m/o 4,444.44"] 12 | }, 13 | { 14 | "amount": 1, 15 | "date": "2007-08-09T00:00:00.000Z", 16 | "description": "t u/w", 17 | "originalText": ["8/9 t u/w 1"] 18 | } 19 | ], 20 | "incomes": [ 21 | { 22 | "amount": -1, 23 | "date": "2007-08-09T00:00:00.000Z", 24 | "description": "f g-h", 25 | "originalText": ["8/9 f g-h -1"] 26 | } 27 | ], 28 | "name": "Sanitized files/sample-files/sanitized/chase-prime-visa-credit/basic-2.json", 29 | "startDate": "2007-05-03T00:00:00.000Z", 30 | "yearPrefix": 20 31 | }, 32 | "packageVersion": "2.0.0", 33 | "parserType": "chase-prime-visa-credit", 34 | "text": [ 35 | "c", 36 | "", 37 | " e", 38 | "i", 39 | "m", 40 | "n p", 41 | "q s", 42 | "1-2-3-4", 43 | "t ", 44 | "u ", 45 | "v/w", 46 | "z", 47 | "c", 48 | "d-e", 49 | "", 50 | "g", 51 | "h 5", 52 | "j", 53 | "$6", 54 | "q", 55 | "t 7", 56 | "w", 57 | "c 8", 58 | "9 1 2 3 4 5 6", 59 | "k 7", 60 | "$8,888.88", 61 | "9 1 2 3 4 5 6", 62 | "q 7", 63 | "t", 64 | "y 8", 65 | "9 1 2 3 4 5 6", 66 | "7/8/9", 67 | "d 1", 68 | "2 3 4 5 6 7 8", 69 | "k 9", 70 | "- p 1", 71 | "2 3 4 5 6 7 8", 72 | "9 1 2 3 4 5 6 t", 73 | "u 7", 74 | "x e", 75 | "p", 76 | "e", 77 | "n $8 q", 78 | "s $9", 79 | "x 1 d", 80 | "o", 81 | "v w/x", 82 | "a", 83 | "n", 84 | "account number: 2 3 4 5", 85 | "y", 86 | "a $6", 87 | "k", 88 | "m -$7", 89 | "v", 90 | "g", 91 | "h i$8,888.88", 92 | "r", 93 | "t $9", 94 | "e", 95 | "g $1", 96 | "r", 97 | "a", 98 | "c $2", 99 | "o", 100 | "q $3", 101 | "y", 102 | "a $4,444.44 b", 103 | "opening/closing date 5/6/7 - 8/9/1", 104 | "p", 105 | "s $2", 106 | "c", 107 | "m", 108 | "o $3", 109 | "w", 110 | "z $4", 111 | "5-6-7-8,888.88 ", 112 | "c $9", 113 | "f $1", 114 | "l $2", 115 | "", 116 | "3 n 4 o 5 6 7/8/9 p 1 q 2 3 s 4 ", 117 | "5 ", 118 | "", 119 | "6", 120 | "", 121 | "u 7", 122 | "x 8/9/1", 123 | "c", 124 | " e 2-3", 125 | "g $4,444.44", 126 | "i j/k", 127 | "o", 128 | "q $5", 129 | "account number: 6 7 8 9", 130 | "", 131 | "$", 132 | "u", 133 | "v/e", 134 | "1 f 2 g ", 135 | "i", 136 | "3 n", 137 | "p q-4", 138 | "s", 139 | "u 5", 140 | "x 6-7", 141 | "", 142 | "8 9", 143 | "d", 144 | "g", 145 | "i 1-2-3-4", 146 | "j 5-6-7-8", 147 | "l", 148 | "o r", 149 | "t u-9-1-2", 150 | "v/w", 151 | "x 3-4-5-6", 152 | "z 7", 153 | "b 8", 154 | "g", 155 | "j 9-1", 156 | "l 2-3", 157 | "", 158 | "x ", 159 | "y/k", 160 | "m", 161 | " o", 162 | "s", 163 | "w", 164 | "x z", 165 | "a c", 166 | "4-5-6-7", 167 | "d ", 168 | "e ", 169 | "f/g", 170 | "j", 171 | "m", 172 | "", 173 | "o", 174 | "q", 175 | "s", 176 | "t u y $ z", 177 | "", 178 | "payments and other credits", 179 | "8/9 f g-h -1", 180 | "", 181 | "PURCHASE", 182 | "2/3 l m/o 4,444.44", 183 | "q 5-6-7", 184 | "", 185 | "8/9 t u/w 1", 186 | "y 2-3-4", 187 | "", 188 | "5 totals year-to-date", 189 | "g 6 $7", 190 | "k 8 $9", 191 | "l-m-w", 192 | "x-y-i", 193 | "m", 194 | "q", 195 | "", 196 | "t", 197 | "x (y) g", 198 | "h i", 199 | "k l", 200 | "m o", 201 | "p", 202 | "q (r) t", 203 | "", 204 | "PURCHASES", 205 | "v w(x)(y) - 1 - - 2 - ", 206 | "a", 207 | "c d(e)(f) - 3 - - 4 - ", 208 | "h", 209 | "j k(l)(m) - 5 - - 6 - ", 210 | "7 q", 211 | "(r) u", 212 | "(v) z (b c)", 213 | "(d) i (k l)", 214 | "e", 215 | "p", 216 | "", 217 | "r", 218 | "v", 219 | "z", 220 | "b", 221 | "c h $ i j", 222 | "m", 223 | "8/9 o p/q 1 2", 224 | "3/4 r s/t 5 6", 225 | "", 226 | "v x 7 z 8/9/1", 227 | "2 b 3 c 4 5 6/7/8 d 9 e 1 2 g 3 " 228 | ] 229 | } 230 | -------------------------------------------------------------------------------- /files/sample-files/sanitized/chase-prime-visa-credit/basic-3.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "Sanitized files/sample-files/sanitized/chase-prime-visa-credit/basic-3.json", 3 | "output": { 4 | "accountSuffix": "3", 5 | "endDate": "2003-01-05T00:00:00.000Z", 6 | "expenses": [ 7 | { 8 | "amount": 3, 9 | "date": "2009-01-02T00:00:00.000Z", 10 | "description": "c d/f", 11 | "originalText": ["1/2 c d/f 3"] 12 | }, 13 | { 14 | "amount": 9, 15 | "date": "2009-07-08T00:00:00.000Z", 16 | "description": "i j/l", 17 | "originalText": ["7/8 i j/l 9"] 18 | }, 19 | { 20 | "amount": 6, 21 | "date": "2009-04-05T00:00:00.000Z", 22 | "description": "o p/r", 23 | "originalText": ["4/5 o p/r 6"] 24 | }, 25 | { 26 | "amount": 3, 27 | "date": "2009-01-02T00:00:00.000Z", 28 | "description": "w x/z", 29 | "originalText": ["1/2 w x/z 3"] 30 | }, 31 | { 32 | "amount": 9, 33 | "date": "2009-07-08T00:00:00.000Z", 34 | "description": "e f/h", 35 | "originalText": ["7/8 e f/h 9"] 36 | }, 37 | { 38 | "amount": 9, 39 | "date": "2009-04-05T00:00:00.000Z", 40 | "description": "o 6-7-8 p", 41 | "originalText": ["4/5 o 6-7-8 p 9"] 42 | }, 43 | { 44 | "amount": 3, 45 | "date": "2009-01-02T00:00:00.000Z", 46 | "description": "s t/v", 47 | "originalText": ["1/2 s t/v 3"] 48 | }, 49 | { 50 | "amount": 9, 51 | "date": "2009-07-08T00:00:00.000Z", 52 | "description": "a b/d", 53 | "originalText": ["7/8 a b/d 9"] 54 | }, 55 | { 56 | "amount": 6, 57 | "date": "2009-04-05T00:00:00.000Z", 58 | "description": "i j/l", 59 | "originalText": ["4/5 i j/l 6"] 60 | }, 61 | { 62 | "amount": 3, 63 | "date": "2009-01-02T00:00:00.000Z", 64 | "description": "o p/r", 65 | "originalText": ["1/2 o p/r 3"] 66 | }, 67 | { 68 | "amount": 9, 69 | "date": "2009-07-08T00:00:00.000Z", 70 | "description": "w x/z", 71 | "originalText": ["7/8 w x/z 9"] 72 | }, 73 | { 74 | "amount": 6, 75 | "date": "2009-04-05T00:00:00.000Z", 76 | "description": "e f/h", 77 | "originalText": ["4/5 e f/h 6"] 78 | }, 79 | { 80 | "amount": 3, 81 | "date": "2009-01-02T00:00:00.000Z", 82 | "description": "m n/p", 83 | "originalText": ["1/2 m n/p 3"] 84 | }, 85 | { 86 | "amount": 9, 87 | "date": "2009-07-08T00:00:00.000Z", 88 | "description": "u v/x", 89 | "originalText": ["7/8 u v/x 9"] 90 | }, 91 | { 92 | "amount": 6, 93 | "date": "2009-04-05T00:00:00.000Z", 94 | "description": "c d/f", 95 | "originalText": ["4/5 c d/f 6"] 96 | }, 97 | { 98 | "amount": 3, 99 | "date": "2009-01-02T00:00:00.000Z", 100 | "description": "i j/l", 101 | "originalText": ["1/2 i j/l 3"] 102 | }, 103 | { 104 | "amount": 9, 105 | "date": "2009-07-08T00:00:00.000Z", 106 | "description": "q r/t", 107 | "originalText": ["7/8 q r/t 9"] 108 | } 109 | ], 110 | "incomes": [ 111 | { 112 | "amount": -3, 113 | "date": "2009-01-02T00:00:00.000Z", 114 | "description": "q r-s", 115 | "originalText": ["1/2 q r-s -3"] 116 | }, 117 | { 118 | "amount": -6, 119 | "date": "2009-04-05T00:00:00.000Z", 120 | "description": "u v-w", 121 | "originalText": ["4/5 u v-w -6"] 122 | }, 123 | { 124 | "amount": -9, 125 | "date": "2009-07-08T00:00:00.000Z", 126 | "description": "y z-a", 127 | "originalText": ["7/8 y z-a -9"] 128 | } 129 | ], 130 | "name": "Sanitized files/sample-files/sanitized/chase-prime-visa-credit/basic-3.json", 131 | "startDate": "2009-07-05T00:00:00.000Z", 132 | "yearPrefix": 20 133 | }, 134 | "packageVersion": "2.0.0", 135 | "parserType": "chase-prime-visa-credit", 136 | "text": [ 137 | "c", 138 | "", 139 | " e", 140 | "i", 141 | "m", 142 | "n p", 143 | "q s", 144 | "1-2-3-4", 145 | "t ", 146 | "u ", 147 | "v/w", 148 | "z", 149 | "c", 150 | "d-e", 151 | "", 152 | "g", 153 | "h 5", 154 | "j", 155 | "$6", 156 | "q", 157 | "t 7", 158 | "z 8", 159 | "9 1 2 3 4 5 6", 160 | "c", 161 | "k 7", 162 | "8 9 1 2 3 4 5", 163 | "q 6", 164 | "$7", 165 | "v 8", 166 | "9 1 2 3 4 5 6", 167 | "a 7", 168 | "d", 169 | "8 9 1 2 3 4 5", 170 | "k 6", 171 | "7/8/9", 172 | "s 1", 173 | "2 3 4 5 6 7 8", 174 | "- x 9", 175 | "1 2 3 4 5 6 7", 176 | "b", 177 | "e l", 178 | "m 8", 179 | "b", 180 | "m", 181 | "o $9", 182 | "x $1 a", 183 | "d j", 184 | "o 2 u", 185 | "h", 186 | "s", 187 | "z b", 188 | "i j/k", 189 | "o y", 190 | "g", 191 | "t", 192 | "a c", 193 | "l", 194 | "p", 195 | "z", 196 | "i", 197 | "l 3 m $4", 198 | "x", 199 | "y", 200 | "h", 201 | "s", 202 | "c", 203 | "n", 204 | "5-6-7-8,888.88", 205 | "w", 206 | "i", 207 | "q", 208 | "s", 209 | "u", 210 | "v", 211 | "account number: 9 1 2 3", 212 | "i", 213 | "k $4,444.44", 214 | "u", 215 | "w -$5", 216 | "g", 217 | "o", 218 | "p q$6", 219 | "7-8-9-1 ", 220 | "s $2", 221 | "u $3", 222 | "w $4,444.44", 223 | "y $5", 224 | "a $6", 225 | "opening/closing date 7/8/9 - 1/2/3", 226 | "g $4", 227 | "i $5", 228 | "l $6", 229 | "o $7", 230 | "r $8,888.88", 231 | "x $9", 232 | "", 233 | "1 z 2 a 3 4 5/6/7 b 8 c 9 1 e 2 ", 234 | "3 ", 235 | "", 236 | "4", 237 | "", 238 | "g 5", 239 | "j 6/7/8", 240 | "o", 241 | " q 9-1", 242 | "s $2", 243 | "u v/w", 244 | "a", 245 | "c $3", 246 | "account number: 4 5 6 7", 247 | "", 248 | "$", 249 | "g", 250 | "h/q", 251 | "8 r 9 s ", 252 | "u", 253 | "1 z", 254 | "b c-2", 255 | "e", 256 | "g 3", 257 | "j 4-5", 258 | "", 259 | "6 7", 260 | "p", 261 | "s", 262 | "u 8-9-1-2", 263 | "v 3-4-5-6", 264 | "x", 265 | "a d", 266 | "e 7-8-9-1", 267 | "f/g", 268 | "i j-2-3-4", 269 | "l 5", 270 | "n 6", 271 | "r", 272 | "u 7-8", 273 | "w 9-1", 274 | "2-3-4-5", 275 | "", 276 | "i ", 277 | "j/v", 278 | "x", 279 | " z", 280 | "d", 281 | "h", 282 | "i k", 283 | "l n", 284 | "6-7-8-9", 285 | "o ", 286 | "p ", 287 | "q/r", 288 | "u", 289 | "x", 290 | "", 291 | "z", 292 | "b", 293 | "d", 294 | "e f j $ k", 295 | "", 296 | "payments and other credits", 297 | "1/2 q r-s -3", 298 | "4/5 u v-w -6", 299 | "7/8 y z-a -9", 300 | "", 301 | "PURCHASE", 302 | "1/2 c d/f 3", 303 | "h 4-5-6", 304 | "", 305 | "7/8 i j/l 9", 306 | "n 1-2-3", 307 | "", 308 | "4/5 o p/r 6", 309 | "t 7-8-9", 310 | "", 311 | "1/2 w x/z 3", 312 | "b 4-5-6", 313 | "", 314 | "7/8 e f/h 9", 315 | "j 1-2-3", 316 | "", 317 | "4/5 o 6-7-8 p 9", 318 | "1/2 s t/v 3", 319 | "x 4-5-6", 320 | "", 321 | "7/8 a b/d 9", 322 | "f 1-2-3", 323 | "", 324 | "4/5 i j/l 6", 325 | "n 7-8-9", 326 | "", 327 | "1/2 o p/r 3", 328 | "t 4-5-6", 329 | "", 330 | "7/8 w x/z 9", 331 | "b 1-2-3", 332 | "", 333 | "4/5 e f/h 6", 334 | "j 7-8-9", 335 | "", 336 | "1/2 m n/p 3", 337 | "r 4-5-6", 338 | "", 339 | "7/8 u v/x 9", 340 | "z 1-2-3", 341 | "", 342 | "4/5 c d/f 6", 343 | "h 7-8-9", 344 | "", 345 | "1/2 i j/l 3", 346 | "n 4-5-6", 347 | "", 348 | "7/8 q r/t 9", 349 | "v 1-2-3", 350 | "", 351 | "4 totals year-to-date", 352 | "d 5 $6", 353 | "h 7 $8,888.88", 354 | "i-j-t", 355 | "u-v-f", 356 | "j", 357 | "n", 358 | "", 359 | "p", 360 | "r", 361 | "v (w) e", 362 | "f g", 363 | "i j", 364 | "k m", 365 | "n", 366 | "o (p) r", 367 | "", 368 | "PURCHASES", 369 | "t u(v)(w) - 9 - - 1 - ", 370 | "y", 371 | "a b(c)(d) - 2 - - 3 - ", 372 | "f", 373 | "h i(j)(k) - 4 - - 5 - ", 374 | "6 o", 375 | "(p) s", 376 | "(t) x (z a)", 377 | "(b) g (i j)", 378 | "e", 379 | "o", 380 | "", 381 | "q s 7 u 8/9/1", 382 | "2 w 3 x 4 5 6/7/8 y 9 z 1 2 b 3 ", 383 | "d", 384 | "h", 385 | "l", 386 | "n", 387 | "o t $ u v", 388 | "y", 389 | "4/5 a b/c 6 7", 390 | "8/9 d e/f 1 2", 391 | "", 392 | "g 3 i 4 j 5 6 7/8/9 k 1 l 2 3 n 4 " 393 | ] 394 | } 395 | -------------------------------------------------------------------------------- /files/sample-files/sanitized/chase-prime-visa-credit/basic.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "Sanitized files/sample-files/sanitized/chase-prime-visa-credit/basic.json", 3 | "output": { 4 | "accountSuffix": "3", 5 | "endDate": "2003-01-05T00:00:00.000Z", 6 | "expenses": [ 7 | { 8 | "amount": 9, 9 | "date": "2009-07-08T00:00:00.000Z", 10 | "description": "t u/w", 11 | "originalText": ["7/8 t u/w 9"] 12 | }, 13 | { 14 | "amount": 6, 15 | "date": "2009-04-05T00:00:00.000Z", 16 | "description": "z a/c", 17 | "originalText": ["4/5 z a/c 6"] 18 | }, 19 | { 20 | "amount": 3, 21 | "date": "2009-01-02T00:00:00.000Z", 22 | "description": "f g/i", 23 | "originalText": ["1/2 f g/i 3"] 24 | }, 25 | { 26 | "amount": 9, 27 | "date": "2009-07-08T00:00:00.000Z", 28 | "description": "n o/q", 29 | "originalText": ["7/8 n o/q 9"] 30 | }, 31 | { 32 | "amount": 6, 33 | "date": "2009-04-05T00:00:00.000Z", 34 | "description": "v w/y", 35 | "originalText": ["4/5 v w/y 6"] 36 | }, 37 | { 38 | "amount": 6, 39 | "date": "2009-01-02T00:00:00.000Z", 40 | "description": "f 3-4-5 g", 41 | "originalText": ["1/2 f 3-4-5 g 6"] 42 | }, 43 | { 44 | "amount": 9, 45 | "date": "2009-07-08T00:00:00.000Z", 46 | "description": "j k/m", 47 | "originalText": ["7/8 j k/m 9"] 48 | }, 49 | { 50 | "amount": 6, 51 | "date": "2009-04-05T00:00:00.000Z", 52 | "description": "r s/u", 53 | "originalText": ["4/5 r s/u 6"] 54 | }, 55 | { 56 | "amount": 3, 57 | "date": "2009-01-02T00:00:00.000Z", 58 | "description": "z a/c", 59 | "originalText": ["1/2 z a/c 3"] 60 | }, 61 | { 62 | "amount": 9, 63 | "date": "2009-07-08T00:00:00.000Z", 64 | "description": "f g/i", 65 | "originalText": ["7/8 f g/i 9"] 66 | }, 67 | { 68 | "amount": 6, 69 | "date": "2009-04-05T00:00:00.000Z", 70 | "description": "n o/q", 71 | "originalText": ["4/5 n o/q 6"] 72 | }, 73 | { 74 | "amount": 3, 75 | "date": "2009-01-02T00:00:00.000Z", 76 | "description": "v w/y", 77 | "originalText": ["1/2 v w/y 3"] 78 | }, 79 | { 80 | "amount": 9, 81 | "date": "2009-07-08T00:00:00.000Z", 82 | "description": "d e/g", 83 | "originalText": ["7/8 d e/g 9"] 84 | }, 85 | { 86 | "amount": 6, 87 | "date": "2009-04-05T00:00:00.000Z", 88 | "description": "l m/o", 89 | "originalText": ["4/5 l m/o 6"] 90 | }, 91 | { 92 | "amount": 3, 93 | "date": "2009-01-02T00:00:00.000Z", 94 | "description": "t u/w", 95 | "originalText": ["1/2 t u/w 3"] 96 | }, 97 | { 98 | "amount": 9, 99 | "date": "2009-07-08T00:00:00.000Z", 100 | "description": "z a/c", 101 | "originalText": ["7/8 z a/c 9"] 102 | }, 103 | { 104 | "amount": 6, 105 | "date": "2009-04-05T00:00:00.000Z", 106 | "description": "h i/k", 107 | "originalText": ["4/5 h i/k 6"] 108 | } 109 | ], 110 | "incomes": [ 111 | { 112 | "amount": -9, 113 | "date": "2009-07-08T00:00:00.000Z", 114 | "description": "h i-j", 115 | "originalText": ["7/8 h i-j -9"] 116 | }, 117 | { 118 | "amount": -3, 119 | "date": "2009-01-02T00:00:00.000Z", 120 | "description": "l m-n", 121 | "originalText": ["1/2 l m-n -3"] 122 | }, 123 | { 124 | "amount": -6, 125 | "date": "2009-04-05T00:00:00.000Z", 126 | "description": "p q-r", 127 | "originalText": ["4/5 p q-r -6"] 128 | } 129 | ], 130 | "name": "Sanitized files/sample-files/sanitized/chase-prime-visa-credit/basic.json", 131 | "startDate": "2009-07-05T00:00:00.000Z", 132 | "yearPrefix": 20 133 | }, 134 | "packageVersion": "2.0.0", 135 | "parserType": "chase-prime-visa-credit", 136 | "text": [ 137 | "c", 138 | "", 139 | "e", 140 | "g", 141 | "1-2-3-4", 142 | "h ", 143 | "i ", 144 | "j/k", 145 | "n", 146 | "q", 147 | "r-s", 148 | "", 149 | "u", 150 | "v 5", 151 | "x", 152 | "$6", 153 | "e", 154 | "h 7", 155 | "n 8", 156 | "9 1 2 3 4 5 6", 157 | "q", 158 | "y 7", 159 | "8 9 1 2 3 4 5", 160 | "e 6", 161 | "$7", 162 | "j 8", 163 | "9 1 2 3 4 5 6", 164 | "o 7", 165 | "r", 166 | "8 9 1 2 3 4 5", 167 | "y 6", 168 | "7/8/9", 169 | "g 1", 170 | "2 3 4 5 6 7 8", 171 | "- l 9", 172 | "1 2 3 4 5 6 7", 173 | "p", 174 | "s z", 175 | "a 8", 176 | "p", 177 | "a", 178 | "c $9", 179 | "l $1 o", 180 | "r x", 181 | "c 2 i", 182 | "v", 183 | "g", 184 | "n p", 185 | "w x/y", 186 | "c m", 187 | "u", 188 | "h", 189 | "o q", 190 | "z", 191 | "d", 192 | "n", 193 | "w", 194 | "z 3 a $4", 195 | "l", 196 | "m", 197 | "v", 198 | "g", 199 | "q", 200 | "b", 201 | "5-6-7-8,888.88", 202 | "k", 203 | "w", 204 | "e", 205 | "g", 206 | "i", 207 | "j", 208 | "account number: 9 1 2 3", 209 | "w", 210 | "y $4,444.44", 211 | "i", 212 | "k -$5", 213 | "u", 214 | "c", 215 | "d e$6", 216 | "7-8-9-1 ", 217 | "g $2", 218 | "i $3", 219 | "k $4,444.44", 220 | "m $5", 221 | "o $6", 222 | "opening/closing date 7/8/9 - 1/2/3", 223 | "u $4", 224 | "w $5", 225 | "z $6", 226 | "c $7", 227 | "f $8,888.88", 228 | "l $9", 229 | "", 230 | "1 n 2 o 3 4 5/6/7 p 8 q 9 1 s 2 ", 231 | "3 ", 232 | "", 233 | "u 4", 234 | "x 5/6/7", 235 | "c", 236 | " e 8-9", 237 | "g $1", 238 | "i j/k", 239 | "o", 240 | "q $2", 241 | "account number: 3 4 5 6", 242 | "", 243 | "$", 244 | "u", 245 | "v/e", 246 | "7 f 8 g ", 247 | "i", 248 | "9 n", 249 | "p q-1", 250 | "s", 251 | "u 2", 252 | "x 3-4", 253 | "d", 254 | "f 5-6-7-8", 255 | "g 9-1-2-3", 256 | "h 4-5-6-7", 257 | "i/j", 258 | "l m-8-9-1", 259 | "o 2", 260 | "q 3", 261 | "u", 262 | "x 4-5", 263 | "z 6-7", 264 | "8-9-1-2", 265 | "", 266 | "l ", 267 | "m/y", 268 | "a", 269 | "c", 270 | "e", 271 | "3-4-5-6", 272 | "f ", 273 | "g ", 274 | "h/i", 275 | "l", 276 | "o", 277 | "", 278 | "q", 279 | "s", 280 | "u", 281 | "v w a $ b", 282 | "payments and other credits", 283 | "7/8 h i-j -9", 284 | "1/2 l m-n -3", 285 | "4/5 p q-r -6", 286 | "", 287 | "PURCHASE", 288 | "7/8 t u/w 9", 289 | "y 1-2-3", 290 | "4/5 z a/c 6", 291 | "e 7-8-9", 292 | "1/2 f g/i 3", 293 | "k 4-5-6", 294 | "7/8 n o/q 9", 295 | "s 1-2-3", 296 | "4/5 v w/y 6", 297 | "a 7-8-9", 298 | "1/2 f 3-4-5 g 6", 299 | "7/8 j k/m 9", 300 | "o 1-2-3", 301 | "4/5 r s/u 6", 302 | "w 7-8-9", 303 | "1/2 z a/c 3", 304 | "e 4-5-6", 305 | "7/8 f g/i 9", 306 | "k 1-2-3", 307 | "4/5 n o/q 6", 308 | "s 7-8-9", 309 | "1/2 v w/y 3", 310 | "a 4-5-6", 311 | "7/8 d e/g 9", 312 | "i 1-2-3", 313 | "4/5 l m/o 6", 314 | "q 7-8-9", 315 | "1/2 t u/w 3", 316 | "y 4-5-6", 317 | "7/8 z a/c 9", 318 | "e 1-2-3", 319 | "4/5 h i/k 6", 320 | "m 7-8-9", 321 | "", 322 | "1 totals year-to-date", 323 | "u 2 $3", 324 | "y 4 $5", 325 | "z-a-k", 326 | "l-m-w", 327 | "a", 328 | "e", 329 | "", 330 | "g", 331 | "i", 332 | "m (n) v", 333 | "w x", 334 | "z a", 335 | "b d", 336 | "e", 337 | "f (g) i", 338 | "", 339 | "PURCHASES", 340 | "k l(m)(n) - 6 - - 7 - ", 341 | "p", 342 | "r s(t)(u) - 8 - - 9 - ", 343 | "w", 344 | "y z(a)(b) - 1 - - 2 - ", 345 | "3 f", 346 | "(g) j", 347 | "(k) o (q r)", 348 | "(s) x (z a)", 349 | "v", 350 | "f", 351 | "", 352 | "h j 4 l 5/6/7", 353 | "8 n 9 o 1 2 3/4/5 p 6 q 7 8 s 9 ", 354 | "u", 355 | "y", 356 | "c", 357 | "e", 358 | "f k $ l m", 359 | "p", 360 | "1/2 r s/t 3 4", 361 | "5/6 u v/w 7 8", 362 | "", 363 | "z 9 a 1 2 3/4/5 b 6 c 7 8 e 9 " 364 | ] 365 | } 366 | -------------------------------------------------------------------------------- /files/sample-files/sanitized/chase-prime-visa-credit/newer.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "Sanitized files/sample-files/sanitized/chase-prime-visa-credit/newer.json", 3 | "output": { 4 | "accountSuffix": "2", 5 | "endDate": "2002-09-04T00:00:00.000Z", 6 | "expenses": [ 7 | { 8 | "amount": 2, 9 | "date": "2008-09-01T00:00:00.000Z", 10 | "description": "r s/u", 11 | "originalText": ["9/1 r s/u 2"] 12 | }, 13 | { 14 | "amount": 8888.88, 15 | "date": "2008-06-07T00:00:00.000Z", 16 | "description": "x y/a", 17 | "originalText": ["6/7 x y/a 8,888.88"] 18 | }, 19 | { 20 | "amount": 5, 21 | "date": "2008-03-04T00:00:00.000Z", 22 | "description": "d e/g", 23 | "originalText": ["3/4 d e/g 5"] 24 | }, 25 | { 26 | "amount": 2, 27 | "date": "2008-09-01T00:00:00.000Z", 28 | "description": "l m/o", 29 | "originalText": ["9/1 l m/o 2"] 30 | }, 31 | { 32 | "amount": 8888.88, 33 | "date": "2008-06-07T00:00:00.000Z", 34 | "description": "t u/w", 35 | "originalText": ["6/7 t u/w 8,888.88"] 36 | }, 37 | { 38 | "amount": 5, 39 | "date": "2008-03-04T00:00:00.000Z", 40 | "description": "b c/e", 41 | "originalText": ["3/4 b c/e 5"] 42 | }, 43 | { 44 | "amount": 2, 45 | "date": "2008-09-01T00:00:00.000Z", 46 | "description": "j k/m", 47 | "originalText": ["9/1 j k/m 2"] 48 | }, 49 | { 50 | "amount": 8888.88, 51 | "date": "2008-06-07T00:00:00.000Z", 52 | "description": "p q/s", 53 | "originalText": ["6/7 p q/s 8,888.88"] 54 | }, 55 | { 56 | "amount": 5, 57 | "date": "2008-03-04T00:00:00.000Z", 58 | "description": "x y/a", 59 | "originalText": ["3/4 x y/a 5"] 60 | } 61 | ], 62 | "incomes": [ 63 | { 64 | "amount": -5, 65 | "date": "2008-03-04T00:00:00.000Z", 66 | "description": "h i-j", 67 | "originalText": ["3/4 h i-j -5"] 68 | }, 69 | { 70 | "amount": -8888.88, 71 | "date": "2008-06-07T00:00:00.000Z", 72 | "description": "l m-n", 73 | "originalText": ["6/7 l m-n -8,888.88"] 74 | } 75 | ], 76 | "name": "Sanitized files/sample-files/sanitized/chase-prime-visa-credit/newer.json", 77 | "startDate": "2008-06-04T00:00:00.000Z", 78 | "yearPrefix": 20 79 | }, 80 | "packageVersion": "2.0.0", 81 | "parserType": "chase-prime-visa-credit", 82 | "text": [ 83 | "c", 84 | "", 85 | " e", 86 | "i", 87 | "m", 88 | "n p", 89 | "q s", 90 | "1-2-3-4", 91 | "t ", 92 | "u ", 93 | "v/w", 94 | "z", 95 | "c", 96 | "d-e", 97 | "", 98 | "g", 99 | "h 5", 100 | "j", 101 | "$6", 102 | "q", 103 | "t 7", 104 | "w", 105 | "c 8", 106 | "9 1 2 3 4 5 6", 107 | "k 7", 108 | "$8,888.88", 109 | "9 1 2 3 4 5 6", 110 | "q 7", 111 | "t", 112 | "y 8", 113 | "9 1 2 3 4 5 6", 114 | "7/8/9", 115 | "d 1", 116 | "2 3 4 5 6 7 8", 117 | "k 9", 118 | "- p 1", 119 | "2 3 4 5 6 7 8", 120 | "9 1 2 3 4 5 6 t", 121 | "u 7", 122 | "x e", 123 | "p", 124 | "e", 125 | "n $8 q", 126 | "s $9", 127 | "x 1 d", 128 | "g m", 129 | "x", 130 | "k", 131 | "r s/t", 132 | "a c", 133 | "", 134 | "g q", 135 | "d", 136 | "l", 137 | "u", 138 | "b d", 139 | "n", 140 | "r", 141 | "a", 142 | "l", 143 | "o 2 p $3", 144 | "y", 145 | "z", 146 | "k", 147 | "v", 148 | "f", 149 | "o", 150 | "4-5-6-7", 151 | "a", 152 | "i", 153 | "j", 154 | "l", 155 | "n", 156 | "y", 157 | "account number: 8 9 1 2", 158 | "k", 159 | "m $3", 160 | "w", 161 | "y -$4,444.44", 162 | "g", 163 | "5-6-7-8,888.88 ", 164 | "h i$9", 165 | "k $1", 166 | "m $2", 167 | "o $3", 168 | "q $4,444.44", 169 | "s $5", 170 | "opening/closing date 6/7/8 - 9/1/2", 171 | "y $3", 172 | "a $4", 173 | "d $5", 174 | "g $6", 175 | "j $7", 176 | "p $8,888.88", 177 | "", 178 | "9 r 1 s 2 3 4/5/6 t 7 u 8 9 w 1 ", 179 | "2 ", 180 | "", 181 | "3", 182 | "", 183 | "y 4", 184 | "b 5/6/7", 185 | "f", 186 | " h 8-9", 187 | "j $1", 188 | "k/l", 189 | "p", 190 | "r $2", 191 | "account number: 3 4 5 6", 192 | "", 193 | "$", 194 | "v", 195 | "w/f", 196 | "7 g 8 h ", 197 | "j", 198 | "9 o", 199 | "q r-1", 200 | "t", 201 | "v 2", 202 | "y 3-4", 203 | "", 204 | "z", 205 | "f", 206 | "i", 207 | "k 5-6-7-8", 208 | "l 9-1-2-3", 209 | "n", 210 | "q t", 211 | "v w-4-5-6", 212 | "x/y", 213 | "z 7-8-9-1", 214 | "b 2", 215 | "d 3", 216 | "i", 217 | "l 4-5", 218 | "n 6-7", 219 | "", 220 | "a ", 221 | "b/n", 222 | "o", 223 | " q", 224 | "u", 225 | "y", 226 | "z b", 227 | "c e", 228 | "8-9-1-2", 229 | "f ", 230 | "g ", 231 | "h/i", 232 | "l", 233 | "o", 234 | "", 235 | "q", 236 | "s", 237 | "u", 238 | "v w a $ b", 239 | "", 240 | "payments and other credits", 241 | "3/4 h i-j -5", 242 | "6/7 l m-n -8,888.88", 243 | "", 244 | "PURCHASE", 245 | "9/1 r s/u 2", 246 | "w 3-4-5", 247 | "", 248 | "6/7 x y/a 8,888.88", 249 | "c 9-1-2", 250 | "", 251 | "3/4 d e/g 5", 252 | "i 6-7-8", 253 | "", 254 | "9/1 l m/o 2", 255 | "q 3-4-5", 256 | "", 257 | "6/7 t u/w 8,888.88", 258 | "y 9-1-2", 259 | "", 260 | "3/4 b c/e 5", 261 | "g 6-7-8", 262 | "", 263 | "9/1 j k/m 2", 264 | "o 3-4-5", 265 | "", 266 | "6/7 p q/s 8,888.88", 267 | "u 9-1-2", 268 | "", 269 | "3/4 x y/a 5", 270 | "c 6-7-8", 271 | "", 272 | "9 totals year-to-date", 273 | "k 1 $2", 274 | "o 3 $4,444.44", 275 | "p-q-a", 276 | "b-c-m", 277 | "q", 278 | "u", 279 | "", 280 | "x", 281 | "b (c) k", 282 | "l m", 283 | "o p", 284 | "q s", 285 | "t", 286 | "u (v) x", 287 | "", 288 | "PURCHASES", 289 | "z a(b)(c) - 5 - - 6 - ", 290 | "e", 291 | "g h(i)(j) - 7 - - 8 - ", 292 | "l", 293 | "n o(p)(q) - 9 - - 1 - ", 294 | "2 u", 295 | "(v) y", 296 | "(z) d (f g)", 297 | "(h) m (o p)", 298 | "i", 299 | "t", 300 | "", 301 | "v", 302 | "z", 303 | "d", 304 | "f", 305 | "g l $ m n", 306 | "q", 307 | "3/4 s t/u 5 6", 308 | "7/8 w x/y 9 1", 309 | "", 310 | "a c 2 e 3/4/5", 311 | "6 g 7 h 8 9 1/2/3 i 4 j 5 6 l 7 " 312 | ] 313 | } 314 | -------------------------------------------------------------------------------- /files/sample-files/sanitized/chase-prime-visa-credit/no-payments.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "Sanitized files/sample-files/sanitized/chase-prime-visa-credit/no-payments.json", 3 | "output": { 4 | "accountSuffix": "5", 5 | "endDate": "2001-08-12T00:00:00.000Z", 6 | "expenses": [ 7 | { 8 | "amount": 7, 9 | "date": "2007-05-06T00:00:00.000Z", 10 | "description": "v w/y", 11 | "originalText": ["5/6 v w/y 7"] 12 | } 13 | ], 14 | "incomes": [], 15 | "name": "Sanitized files/sample-files/sanitized/chase-prime-visa-credit/no-payments.json", 16 | "startDate": "2007-05-03T00:00:00.000Z", 17 | "yearPrefix": 20 18 | }, 19 | "packageVersion": "2.0.0", 20 | "parserType": "chase-prime-visa-credit", 21 | "text": [ 22 | "c", 23 | "", 24 | "e", 25 | "g", 26 | "1-2-3-4", 27 | "h ", 28 | "i ", 29 | "j/k", 30 | "n", 31 | "q", 32 | "r-s", 33 | "", 34 | "u", 35 | "v 5", 36 | "x", 37 | "$6", 38 | "e", 39 | "h 7", 40 | "k", 41 | "q 8", 42 | "9 1 2 3 4 5 6", 43 | "y 7", 44 | "$8,888.88", 45 | "9 1 2 3 4 5 6", 46 | "e 7", 47 | "h", 48 | "m 8", 49 | "9 1 2 3 4 5 6", 50 | "7/8/9", 51 | "r 1", 52 | "2 3 4 5 6 7 8", 53 | "y 9", 54 | "- d 1", 55 | "2 3 4 5 6 7 8", 56 | "9 1 2 3 4 5 6 h", 57 | "i 7", 58 | "l s", 59 | "d", 60 | "s", 61 | "b $8 e", 62 | "g $9", 63 | "l 1 r", 64 | "c", 65 | "j k/l", 66 | "n", 67 | "p", 68 | "c", 69 | "account number: 2 3 4 5", 70 | "n", 71 | "p $6", 72 | "z", 73 | "b $7", 74 | "k", 75 | "v", 76 | "w x$8,888.88", 77 | "g", 78 | "i $9", 79 | "t", 80 | "v $1", 81 | "g", 82 | "p", 83 | "r $2", 84 | "d", 85 | "f $3", 86 | "n", 87 | "p $4,444.44 q", 88 | "opening/closing date 5/6/7 - 8/9/1", 89 | "e", 90 | "h $2", 91 | "r", 92 | "b", 93 | "d $3", 94 | "l", 95 | "o $4", 96 | "5-6-7-8,888.88 ", 97 | "r $9", 98 | "u $1", 99 | "a $2", 100 | "", 101 | "3 c 4 d 5 6 7/8/9 e 1 f 2 3 h 4 ", 102 | "5 ", 103 | "", 104 | "j 6", 105 | "m 7/8/9", 106 | "r", 107 | " t 1-2", 108 | "v $3", 109 | "x y/z", 110 | "d", 111 | "f $4,444.44", 112 | "account number: 5 6 7 8", 113 | "", 114 | "$", 115 | "j", 116 | "k/t", 117 | "9 u 1 v ", 118 | "x", 119 | "2 c", 120 | "e f-3", 121 | "h", 122 | "j 4", 123 | "m 5-6", 124 | "s", 125 | "u 7-8-9-1", 126 | "v 2-3-4-5", 127 | "x y-6-7-8", 128 | "z/a", 129 | "b 9-1-2-3", 130 | "d 4", 131 | "f 5", 132 | "k", 133 | "n 6-7", 134 | "p 8-9", 135 | "", 136 | "b ", 137 | "c/o", 138 | "q", 139 | "s", 140 | "u", 141 | "1-2-3-4", 142 | "v ", 143 | "w ", 144 | "x/y", 145 | "b", 146 | "e", 147 | "", 148 | "g", 149 | "i", 150 | "k", 151 | "l m q $ r", 152 | "PURCHASE", 153 | "5/6 v w/y 7", 154 | "a 8-9-1", 155 | "", 156 | "2 totals year-to-date", 157 | "i 3 $4,444.44", 158 | "m 5 $6", 159 | "n-o-y", 160 | "z-a-k", 161 | "o", 162 | "s", 163 | "", 164 | "u", 165 | "w", 166 | "a (b) j", 167 | "k l", 168 | "n o", 169 | "p r", 170 | "s", 171 | "t (u) w", 172 | "", 173 | "PURCHASES", 174 | "y z(a)(b) - 7 - - 8 - ", 175 | "d", 176 | "f g(h)(i) - 9 - - 1 - ", 177 | "k", 178 | "m n(o)(p) - 2 - - 3 - ", 179 | "4 t", 180 | "(u) x", 181 | "(y) c (e f)", 182 | "(g) l (n o)", 183 | "j", 184 | "t", 185 | "", 186 | "v", 187 | "z", 188 | "d", 189 | "f", 190 | "g l $ m n", 191 | "q", 192 | "5/6 s t/u 7 8", 193 | "", 194 | "w y 9 a 1/2/3", 195 | "4 c 5 d 6 7 8/9/1 e 2 f 3 4 h 5 " 196 | ] 197 | } 198 | -------------------------------------------------------------------------------- /files/sample-files/sanitized/chase-prime-visa-credit/short.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "Sanitized files/sample-files/sanitized/chase-prime-visa-credit/short.json", 3 | "output": { 4 | "accountSuffix": "5", 5 | "endDate": "2001-08-12T00:00:00.000Z", 6 | "expenses": [ 7 | { 8 | "amount": 7, 9 | "date": "2007-05-06T00:00:00.000Z", 10 | "description": "m n/p", 11 | "originalText": ["5/6 m n/p 7"] 12 | }, 13 | { 14 | "amount": 4444.44, 15 | "date": "2007-02-03T00:00:00.000Z", 16 | "description": "s t/v", 17 | "originalText": ["2/3 s t/v 4,444.44"] 18 | }, 19 | { 20 | "amount": 1, 21 | "date": "2007-08-09T00:00:00.000Z", 22 | "description": "a b/d", 23 | "originalText": ["8/9 a b/d 1"] 24 | } 25 | ], 26 | "incomes": [ 27 | { 28 | "amount": -1, 29 | "date": "2007-08-09T00:00:00.000Z", 30 | "description": "c d-e", 31 | "originalText": ["8/9 c d-e -1"] 32 | }, 33 | { 34 | "amount": -4444.44, 35 | "date": "2007-02-03T00:00:00.000Z", 36 | "description": "g h-i", 37 | "originalText": ["2/3 g h-i -4,444.44"] 38 | } 39 | ], 40 | "name": "Sanitized files/sample-files/sanitized/chase-prime-visa-credit/short.json", 41 | "startDate": "2007-05-03T00:00:00.000Z", 42 | "yearPrefix": 20 43 | }, 44 | "packageVersion": "2.0.0", 45 | "parserType": "chase-prime-visa-credit", 46 | "text": [ 47 | "c", 48 | "", 49 | " e", 50 | "i", 51 | "m", 52 | "n p", 53 | "q s", 54 | "1-2-3-4", 55 | "t ", 56 | "u ", 57 | "v/w", 58 | "z", 59 | "c", 60 | "d-e", 61 | "", 62 | "g", 63 | "h 5", 64 | "j", 65 | "$6", 66 | "q", 67 | "t 7", 68 | "w", 69 | "c 8", 70 | "9 1 2 3 4 5 6", 71 | "k 7", 72 | "$8,888.88", 73 | "9 1 2 3 4 5 6", 74 | "q 7", 75 | "t", 76 | "y 8", 77 | "9 1 2 3 4 5 6", 78 | "7/8/9", 79 | "d 1", 80 | "2 3 4 5 6 7 8", 81 | "k 9", 82 | "- p 1", 83 | "2 3 4 5 6 7 8", 84 | "9 1 2 3 4 5 6 t", 85 | "u 7", 86 | "x e", 87 | "p", 88 | "e", 89 | "n $8 q", 90 | "s $9", 91 | "x 1 d", 92 | "o", 93 | "v w/x", 94 | "a", 95 | "n", 96 | "account number: 2 3 4 5", 97 | "y", 98 | "a $6", 99 | "k", 100 | "m -$7", 101 | "v", 102 | "g", 103 | "h i$8,888.88", 104 | "r", 105 | "t $9", 106 | "e", 107 | "g $1", 108 | "r", 109 | "a", 110 | "c $2", 111 | "o", 112 | "q $3", 113 | "y", 114 | "a $4,444.44 b", 115 | "opening/closing date 5/6/7 - 8/9/1", 116 | "p", 117 | "s $2", 118 | "c", 119 | "m", 120 | "o $3", 121 | "w", 122 | "z $4", 123 | "5-6-7-8,888.88 ", 124 | "c $9", 125 | "f $1", 126 | "l $2", 127 | "", 128 | "3 n 4 o 5 6 7/8/9 p 1 q 2 3 s 4 ", 129 | "5 ", 130 | "", 131 | "6", 132 | "", 133 | "u 7", 134 | "x 8/9/1", 135 | "b", 136 | " d 2-3", 137 | "f $4,444.44", 138 | "g/h", 139 | "l", 140 | "n $5", 141 | "account number: 6 7 8 9", 142 | "", 143 | "$", 144 | "r", 145 | "s/b", 146 | "1 c 2 d ", 147 | "f", 148 | "3 k", 149 | "m n-4", 150 | "p", 151 | "r 5", 152 | "u 6-7", 153 | "", 154 | "8 9", 155 | "a", 156 | "d", 157 | "f 1-2-3-4", 158 | "g 5-6-7-8", 159 | "i", 160 | "l o", 161 | "q r-9-1-2", 162 | "s/t", 163 | "u 3-4-5-6", 164 | "w 7", 165 | "y 8", 166 | "d", 167 | "g 9-1", 168 | "i 2-3", 169 | "", 170 | "v ", 171 | "w/i", 172 | "j", 173 | " l", 174 | "p", 175 | "t", 176 | "u w", 177 | "x z", 178 | "4-5-6-7", 179 | "a ", 180 | "b ", 181 | "c/d", 182 | "g", 183 | "j", 184 | "", 185 | "l", 186 | "n", 187 | "p", 188 | "q r v $ w", 189 | "", 190 | "payments and other credits", 191 | "8/9 c d-e -1", 192 | "2/3 g h-i -4,444.44", 193 | "", 194 | "PURCHASE", 195 | "5/6 m n/p 7", 196 | "r 8-9-1", 197 | "", 198 | "2/3 s t/v 4,444.44", 199 | "x 5-6-7", 200 | "", 201 | "8/9 a b/d 1", 202 | "f 2-3-4", 203 | "", 204 | "5 totals year-to-date", 205 | "n 6 $7", 206 | "r 8 $9", 207 | "s-t-d", 208 | "e-f-p", 209 | "t", 210 | "x", 211 | "", 212 | "z", 213 | "b", 214 | "f (g) o", 215 | "p q", 216 | "s t", 217 | "u w", 218 | "x", 219 | "y (z) b", 220 | "", 221 | "PURCHASES", 222 | "d e(f)(g) - 1 - - 2 - ", 223 | "i", 224 | "k l(m)(n) - 3 - - 4 - ", 225 | "p", 226 | "r s(t)(u) - 5 - - 6 - ", 227 | "7 y", 228 | "(z) c", 229 | "(d) h (j k)", 230 | "(l) q (s t)", 231 | "m", 232 | "x", 233 | "", 234 | "z", 235 | "d", 236 | "h", 237 | "j", 238 | "k p $ q r", 239 | "u", 240 | "8/9 w x/y 1 2", 241 | "", 242 | "a c 3 e 4/5/6", 243 | "7 g 8 h 9 1 2/3/4 i 5 j 6 7 l 8 " 244 | ] 245 | } 246 | -------------------------------------------------------------------------------- /files/sample-files/sanitized/citi-costco-visa-credit/basic-2.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "Sanitized files/sample-files/sanitized/citi-costco-visa-credit/basic-2.json", 3 | "output": { 4 | "accountSuffix": "2", 5 | "endDate": "2008-06-07T00:00:00.000Z", 6 | "expenses": [ 7 | { 8 | "amount": 5, 9 | "date": "2005-03-04T00:00:00.000Z", 10 | "description": "h i j", 11 | "originalText": ["1/2 3/4 h i j $5"] 12 | }, 13 | { 14 | "amount": 1, 15 | "date": "2005-08-09T00:00:00.000Z", 16 | "description": "l m n", 17 | "originalText": ["6/7 8/9 l m n $1"] 18 | }, 19 | { 20 | "amount": 6, 21 | "date": "2005-04-05T00:00:00.000Z", 22 | "description": "p q r", 23 | "originalText": ["2/3 4/5 p q r $6"] 24 | } 25 | ], 26 | "incomes": [ 27 | { 28 | "amount": 6, 29 | "date": "2005-04-05T00:00:00.000Z", 30 | "description": "x", 31 | "originalText": ["4/5 x -$6"] 32 | }, 33 | { 34 | "amount": 9, 35 | "date": "2005-07-08T00:00:00.000Z", 36 | "description": "b", 37 | "originalText": ["7/8 b -$9"] 38 | } 39 | ], 40 | "name": "Sanitized files/sample-files/sanitized/citi-costco-visa-credit/basic-2.json", 41 | "startDate": "2005-03-04T00:00:00.000Z", 42 | "yearPrefix": 20 43 | }, 44 | "packageVersion": "2.0.0", 45 | "parserType": "citi-costco-visa-credit", 46 | "text": [ 47 | "b", 48 | "d 1 account number ending in: 2", 49 | "billing period: 3/4/5-6/7/8", 50 | "", 51 | "k l", 52 | "o $9 ", 53 | "s 1/2/t $3 ", 54 | "w 4/5/6", 55 | "f $7 j ", 56 | "", 57 | "x", 58 | "k $8 p ", 59 | "x ", 60 | "e 9-1-2-3 ", 61 | "", 62 | "k ", 63 | "n ", 64 | "o p ", 65 | "x", 66 | "z", 67 | "b ", 68 | "e", 69 | "g i (4)", 70 | "p ", 71 | "r ", 72 | "u ", 73 | "", 74 | "y 5", 75 | "", 76 | "a", 77 | "c", 78 | "e 6", 79 | "7 h", 80 | "j 8-9 ", 81 | "l", 82 | "n o 1-2", 83 | "p r 3-4-5-6", 84 | "s-t-w 7-8-9-1", 85 | "y", 86 | "", 87 | "a", 88 | "c", 89 | "f 2", 90 | "h $3 ", 91 | "", 92 | "Account Summary", 93 | "k l ", 94 | "m n", 95 | "p", 96 | "payments, credits and adjustments", 97 | "4/5 x -$6", 98 | "7/8 b -$9", 99 | "", 100 | "d ", 101 | "standard purchases", 102 | "1/2 3/4 h i j $5", 103 | "6/7 8/9 l m n $1", 104 | "2/3 4/5 p q r $6", 105 | "", 106 | "t", 107 | "y $7", 108 | "", 109 | "a", 110 | "f $8,888.88", 111 | "", 112 | "9 g h-i-j", 113 | " n 1 $2 ", 114 | " r 3 $4,444.44 ", 115 | "", 116 | "u y 5 ", 117 | "c (d) l", 118 | "", 119 | "p ", 120 | " r s (t) w y", 121 | " z", 122 | " b $6 (c) $7", 123 | "d (e)", 124 | " f", 125 | " h $8,888.88 (i) $9", 126 | "j (k)", 127 | "", 128 | "o (p) a ", 129 | "(b) g (h) o (p ", 130 | "q r).", 131 | "", 132 | "t", 133 | "g ", 134 | "l", 135 | "b ", 136 | "h 2/3/4 p ", 137 | "r", 138 | "u ", 139 | "g ", 140 | "h(i) r", 141 | "u v ", 142 | "y", 143 | "z c ", 144 | "j o ", 145 | "s ", 146 | "e ", 147 | "i ", 148 | "(k l) . s ", 149 | "u", 150 | "k ", 151 | "q ", 152 | "r g ", 153 | "j ", 154 | "w ", 155 | "z ", 156 | "k l ", 157 | "m $6 (o ", 158 | "t", 159 | "v ", 160 | "z a g ", 161 | "j k)", 162 | "m q $7", 163 | "u ", 164 | "f h 8 i ", 165 | "l ", 166 | "a ", 167 | "e", 168 | "f o (p q) t ", 169 | "x ", 170 | "l m ", 171 | "a ", 172 | "b g ", 173 | "v ", 174 | "z ", 175 | "m ", 176 | "o", 177 | "a f ", 178 | "i ", 179 | "v ", 180 | "z ", 181 | "e p ", 182 | "r u ", 183 | "g", 184 | "h", 185 | "j", 186 | "m ", 187 | "q t x ", 188 | "a ", 189 | "b f r ", 190 | "v ", 191 | "h ", 192 | "l m ", 193 | "t", 194 | "y ", 195 | "f", 196 | "j ", 197 | "m o ", 198 | "x", 199 | "b ", 200 | "f j p ", 201 | "s ", 202 | "w c d e f", 203 | "h", 204 | "m 9 u ", 205 | "y z ", 206 | "p q ", 207 | "t ", 208 | "i ", 209 | "m ", 210 | "r", 211 | "u ", 212 | "h ", 213 | "l m ", 214 | "o", 215 | "s", 216 | "g ", 217 | "i ", 218 | "l", 219 | "o Account Summary", 220 | "d ", 221 | "g ", 222 | "p v ", 223 | "y z ", 224 | "o ", 225 | "u ", 226 | "a", 227 | "d ", 228 | "s ", 229 | "v ", 230 | "a", 231 | "c", 232 | "m", 233 | "", 234 | "n-1", 235 | "", 236 | "p ", 237 | "t ", 238 | "x ", 239 | "a ", 240 | "e ", 241 | "h ", 242 | "l ", 243 | "p q-r ", 244 | "u ", 245 | "w ", 246 | "z ", 247 | "c ", 248 | "g ", 249 | "j ", 250 | "l ", 251 | "n ", 252 | "2 p ", 253 | "q 3 r 4 ", 254 | "t 5", 255 | "v ", 256 | "y ", 257 | "c ", 258 | "g ", 259 | "h l ", 260 | "o ", 261 | "s t ", 262 | "x " 263 | ] 264 | } 265 | -------------------------------------------------------------------------------- /files/sample-files/sanitized/citi-costco-visa-credit/basic.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "Sanitized files/sample-files/sanitized/citi-costco-visa-credit/basic.json", 3 | "output": { 4 | "accountSuffix": "2", 5 | "endDate": "2008-06-07T00:00:00.000Z", 6 | "expenses": [ 7 | { 8 | "amount": 7, 9 | "date": "2005-05-06T00:00:00.000Z", 10 | "description": "c d e", 11 | "originalText": ["3/4 5/6 c d e $7"] 12 | }, 13 | { 14 | "amount": 3, 15 | "date": "2008-01-02T00:00:00.000Z", 16 | "description": "g h i", 17 | "originalText": ["8/9 1/2 g h i $3"] 18 | }, 19 | { 20 | "amount": 4444.44, 21 | "date": "2005-04-05T00:00:00.000Z", 22 | "description": "l 6 7 8 9\no 1-2-3\np", 23 | "originalText": ["4/5 l 6 7 8 9 ", "o 1-2-3", "$4,444.44 ", "p "] 24 | } 25 | ], 26 | "incomes": [ 27 | { 28 | "amount": 8888.88, 29 | "date": "2005-06-07T00:00:00.000Z", 30 | "description": "s", 31 | "originalText": ["6/7 s -$8,888.88"] 32 | }, 33 | { 34 | "amount": 2, 35 | "date": "2005-09-01T00:00:00.000Z", 36 | "description": "w", 37 | "originalText": ["9/1 w -$2"] 38 | } 39 | ], 40 | "name": "Sanitized files/sample-files/sanitized/citi-costco-visa-credit/basic.json", 41 | "startDate": "2005-03-04T00:00:00.000Z", 42 | "yearPrefix": 20 43 | }, 44 | "packageVersion": "2.0.0", 45 | "parserType": "citi-costco-visa-credit", 46 | "text": [ 47 | "b", 48 | "d 1 account number ending in: 2", 49 | "billing period: 3/4/5-6/7/8", 50 | "", 51 | "k l", 52 | "o $9 ", 53 | "s 1/2/t $3 ", 54 | "w 4/5/6", 55 | "", 56 | "k", 57 | "x $7 c ", 58 | "k ", 59 | "r 8-9-1-2 ", 60 | "", 61 | "$3 s - ", 62 | "w", 63 | "z ", 64 | "b 4 ", 65 | "e 5-6 g", 66 | "j", 67 | "n", 68 | "p ", 69 | "", 70 | "t 7", 71 | "", 72 | "v", 73 | "x", 74 | "z 8", 75 | "9 c", 76 | "e 1-2 ", 77 | "g", 78 | "i j 3-4", 79 | "k m 5-6-7-8", 80 | "n-o-r 9-1-2-3", 81 | "t", 82 | "", 83 | "v", 84 | "x", 85 | "a 4", 86 | "c $5 ", 87 | "", 88 | "Account Summary", 89 | "f g ", 90 | "h i", 91 | "k", 92 | "payments, credits and adjustments", 93 | "6/7 s -$8,888.88", 94 | "9/1 w -$2", 95 | "", 96 | "y ", 97 | "standard purchases", 98 | "3/4 5/6 c d e $7", 99 | "8/9 1/2 g h i $3", 100 | "4/5 l 6 7 8 9 ", 101 | "o 1-2-3", 102 | "$4,444.44 ", 103 | "p ", 104 | "", 105 | "r", 106 | "w $5", 107 | "", 108 | "y", 109 | "d $6", 110 | "", 111 | "7 e f-g-h", 112 | " l 8 $9 ", 113 | " p 1 $2 ", 114 | "", 115 | "s w 3 ", 116 | "a (b) j", 117 | "", 118 | "n ", 119 | " p q (r) u w", 120 | " x", 121 | " z $4,444.44 (a) $5", 122 | "b (c)", 123 | " d", 124 | " f $6 (g) $7", 125 | "h (i)", 126 | "", 127 | "m (n) y ", 128 | "(z) e (f) m (n ", 129 | "o p).", 130 | "", 131 | "r", 132 | "e ", 133 | "j", 134 | "z ", 135 | "f 9/1/2 n ", 136 | "p", 137 | "s ", 138 | "e ", 139 | "f(g) p", 140 | "s t ", 141 | "w", 142 | "x a ", 143 | "h m ", 144 | "q ", 145 | "c ", 146 | "g ", 147 | "(i j) . q ", 148 | "s", 149 | "i ", 150 | "o ", 151 | "p e ", 152 | "h ", 153 | "u ", 154 | "x ", 155 | "i j ", 156 | "k $4,444.44 (m ", 157 | "r", 158 | "t ", 159 | "x y e ", 160 | "h i)", 161 | "k o $5", 162 | "s ", 163 | "d f 6 g ", 164 | "j ", 165 | "y ", 166 | "c", 167 | "d m (n o) r ", 168 | "v ", 169 | "j k ", 170 | "y ", 171 | "z e ", 172 | "t ", 173 | "x ", 174 | "k ", 175 | "m", 176 | "y d ", 177 | "g ", 178 | "t ", 179 | "x ", 180 | "c n ", 181 | "p s ", 182 | "e", 183 | "f", 184 | "h", 185 | "k ", 186 | "o r v ", 187 | "y ", 188 | "z d p ", 189 | "t ", 190 | "f ", 191 | "j k ", 192 | "r", 193 | "w ", 194 | "d", 195 | "h ", 196 | "k m ", 197 | "v", 198 | "z ", 199 | "d h n ", 200 | "q ", 201 | "u a b c d", 202 | "f", 203 | "k 7 s ", 204 | "w x ", 205 | "n o ", 206 | "r ", 207 | "g ", 208 | "k ", 209 | "p", 210 | "s ", 211 | "f ", 212 | "j k ", 213 | "m", 214 | "q", 215 | "e ", 216 | "g ", 217 | "j", 218 | "m Account Summary", 219 | "b ", 220 | "e ", 221 | "n t ", 222 | "w x ", 223 | "m ", 224 | "s ", 225 | "y", 226 | "b ", 227 | "q ", 228 | "t ", 229 | "y", 230 | "a", 231 | "k", 232 | "", 233 | "l-8", 234 | "", 235 | "n ", 236 | "r ", 237 | "v ", 238 | "y ", 239 | "c ", 240 | "f ", 241 | "j ", 242 | "n o-p ", 243 | "s ", 244 | "u ", 245 | "x ", 246 | "a ", 247 | "e ", 248 | "h ", 249 | "j ", 250 | "l ", 251 | "9 n ", 252 | "o 1 p 2 ", 253 | "r 3", 254 | "t ", 255 | "w ", 256 | "a ", 257 | "e ", 258 | "f j ", 259 | "m n ", 260 | "s ", 261 | "x" 262 | ] 263 | } 264 | -------------------------------------------------------------------------------- /files/sample-files/sanitized/citi-costco-visa-credit/no-payments.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "Sanitized files/sample-files/sanitized/citi-costco-visa-credit/no-payments.json", 3 | "output": { 4 | "accountSuffix": "2", 5 | "endDate": "2008-06-07T00:00:00.000Z", 6 | "expenses": [ 7 | { 8 | "amount": 8888.88, 9 | "date": "2005-06-07T00:00:00.000Z", 10 | "description": "v w x", 11 | "originalText": ["4/5 6/7 v w x $8,888.88"] 12 | } 13 | ], 14 | "incomes": [], 15 | "name": "Sanitized files/sample-files/sanitized/citi-costco-visa-credit/no-payments.json", 16 | "startDate": "2005-03-04T00:00:00.000Z", 17 | "yearPrefix": 20 18 | }, 19 | "packageVersion": "2.0.0", 20 | "parserType": "citi-costco-visa-credit", 21 | "text": [ 22 | "b", 23 | "d 1 account number ending in: 2", 24 | "billing period: 3/4/5-6/7/8", 25 | "", 26 | "k l", 27 | "o $9 ", 28 | "s 1/2/t $3 ", 29 | "w 4/5/6", 30 | "f $7 j ", 31 | "", 32 | "x", 33 | "k $8 p ", 34 | "x ", 35 | "e 9-1-2-3 ", 36 | "", 37 | "k ", 38 | "n ", 39 | "o p ", 40 | "x", 41 | "z", 42 | "b ", 43 | "e", 44 | "g i (4)", 45 | "p ", 46 | "r ", 47 | "u ", 48 | "", 49 | "y 5", 50 | "", 51 | "a", 52 | "c", 53 | "e 6", 54 | "7 h", 55 | "j 8-9 ", 56 | "l", 57 | "n o 1-2", 58 | "p r 3-4-5-6", 59 | "s-t-w 7-8-9-1", 60 | "y", 61 | "", 62 | "a", 63 | "c", 64 | "f 2", 65 | "h $3 ", 66 | "", 67 | "Account Summary", 68 | "k l ", 69 | "m n", 70 | "p", 71 | "", 72 | "r ", 73 | "standard purchases", 74 | "4/5 6/7 v w x $8,888.88", 75 | "", 76 | "z", 77 | "e $9", 78 | "", 79 | "g", 80 | "l $1", 81 | "", 82 | "2 m n-o-p", 83 | " t 3 $4,444.44 ", 84 | " x 5 $6 ", 85 | "", 86 | "a e 7 ", 87 | "i (j) r", 88 | "", 89 | "v ", 90 | " x y (z) c e", 91 | " f", 92 | " h $8,888.88 (i) $9", 93 | "j (k)", 94 | " l", 95 | " n $1 (o) $2", 96 | "p (q)", 97 | "", 98 | "u (v) g ", 99 | "(h) m (n) u (v ", 100 | "w x).", 101 | "", 102 | "z", 103 | "n 4 ", 104 | "c ", 105 | "k", 106 | "x ", 107 | "c", 108 | "s ", 109 | "y 5/6/7 g ", 110 | "i", 111 | "l ", 112 | "x ", 113 | "y(z) i", 114 | "m ", 115 | "p", 116 | "t ", 117 | "f ", 118 | "j ", 119 | "v ", 120 | "z ", 121 | "(b c) . j ", 122 | "l", 123 | "b ", 124 | "h ", 125 | "x ", 126 | "a ", 127 | "n ", 128 | "q ", 129 | "c ", 130 | "d $9 (f ", 131 | "k", 132 | "m ", 133 | "x ", 134 | "a b)", 135 | "h $1", 136 | "l ", 137 | "y 2 ", 138 | "b ", 139 | "q ", 140 | "u", 141 | "k ", 142 | "o ", 143 | "f ", 144 | "w ", 145 | "c ", 146 | "s ", 147 | "w ", 148 | "f", 149 | "h", 150 | "k ", 151 | "m", 152 | "q ", 153 | "b ", 154 | "g ", 155 | "x ", 156 | "y", 157 | "k ", 158 | "r", 159 | "u ", 160 | "b", 161 | "e ", 162 | "i ", 163 | "r", 164 | "w ", 165 | "k ", 166 | "p ", 167 | "c", 168 | "g ", 169 | "l 3 t ", 170 | "y ", 171 | "p ", 172 | "t ", 173 | "i ", 174 | "l ", 175 | "q", 176 | "s", 177 | "f ", 178 | "j ", 179 | "l", 180 | "o ", 181 | "c ", 182 | "g ", 183 | "j", 184 | "m ", 185 | "z ", 186 | "e ", 187 | "t ", 188 | "x", 189 | "m ", 190 | "o ", 191 | "u", 192 | "x Account Summary", 193 | "o ", 194 | "r ", 195 | "w", 196 | "a ", 197 | "k", 198 | "p ", 199 | "s ", 200 | "v ", 201 | "x", 202 | "", 203 | "y-4", 204 | "", 205 | "a ", 206 | "f ", 207 | "j ", 208 | "m ", 209 | "q ", 210 | "u ", 211 | "w ", 212 | "z ", 213 | "c ", 214 | "g ", 215 | "j ", 216 | "l ", 217 | "n ", 218 | "5 p ", 219 | "q 6 r 7 ", 220 | "t 8", 221 | "v ", 222 | "y ", 223 | "c ", 224 | "g ", 225 | "l ", 226 | "o ", 227 | "t ", 228 | "x " 229 | ] 230 | } 231 | -------------------------------------------------------------------------------- /files/sample-files/sanitized/citi-costco-visa-credit/post-date.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "Sanitized files/sample-files/sanitized/citi-costco-visa-credit/post-date.json", 3 | "output": { 4 | "accountSuffix": "2", 5 | "endDate": "2008-06-07T00:00:00.000Z", 6 | "expenses": [ 7 | { 8 | "amount": 1, 9 | "date": "2005-08-09T00:00:00.000Z", 10 | "description": "q r s", 11 | "originalText": ["6/7 8/9 q r s $1"] 12 | }, 13 | { 14 | "amount": 6, 15 | "date": "2005-04-05T00:00:00.000Z", 16 | "description": "v w x", 17 | "originalText": ["2/3 4/5 v w x $6"] 18 | } 19 | ], 20 | "incomes": [ 21 | { 22 | "amount": 5, 23 | "date": "2005-03-04T00:00:00.000Z", 24 | "description": "j", 25 | "originalText": ["3/4 j -$5"] 26 | } 27 | ], 28 | "name": "Sanitized files/sample-files/sanitized/citi-costco-visa-credit/post-date.json", 29 | "startDate": "2005-03-04T00:00:00.000Z", 30 | "yearPrefix": 20 31 | }, 32 | "packageVersion": "2.0.0", 33 | "parserType": "citi-costco-visa-credit", 34 | "text": [ 35 | "b", 36 | "d 1 account number ending in: 2", 37 | "billing period: 3/4/5-6/7/8", 38 | "", 39 | "k l", 40 | "o $9 ", 41 | "s 1/2/t $3 ", 42 | "w 4/5/6", 43 | "", 44 | "k", 45 | "x $7 c ", 46 | "k ", 47 | "r 8-9-1-2 ", 48 | "", 49 | "$3 s - ", 50 | "w", 51 | "z ", 52 | "b", 53 | "e", 54 | "g ", 55 | "", 56 | "k 4", 57 | "", 58 | "m", 59 | "o", 60 | "q 5", 61 | "6 t", 62 | "v 7-8 ", 63 | "x", 64 | "z a 9-1", 65 | "b d 2-3-4-5", 66 | "e-f-i 6-7-8-9", 67 | "k", 68 | "", 69 | "m", 70 | "o", 71 | "r 1", 72 | "t $2 ", 73 | "", 74 | "Account Summary", 75 | "w x ", 76 | "y z", 77 | "b", 78 | "payments, credits and adjustments", 79 | "3/4 j -$5", 80 | "", 81 | "l ", 82 | "standard purchases", 83 | "6/7 8/9 q r s $1", 84 | "2/3 4/5 v w x $6", 85 | "", 86 | "z", 87 | "e $7", 88 | "", 89 | "g", 90 | "l $8,888.88", 91 | "", 92 | "9 m n-o-p", 93 | " t 1 $2 ", 94 | " x 3 $4,444.44 ", 95 | "", 96 | "a e 5 ", 97 | "i (j) r", 98 | "", 99 | "v ", 100 | " x y (z) c e", 101 | " f", 102 | " h $6 (i) $7", 103 | "j (k)", 104 | " l", 105 | " n $8,888.88 (o) $9", 106 | "p (q)", 107 | "", 108 | "u (v) g ", 109 | "(h) m (n) u (v ", 110 | "w x).", 111 | "", 112 | "z", 113 | "l ", 114 | "n $2 s 3 v ", 115 | "j ", 116 | "k", 117 | "x ", 118 | "c", 119 | "p ", 120 | "t", 121 | "j ", 122 | "p 4/5/6 x ", 123 | "z", 124 | "c ", 125 | "o ", 126 | "p(q) z", 127 | "d ", 128 | "g", 129 | "k ", 130 | "w ", 131 | "a ", 132 | "m ", 133 | "q ", 134 | "(s t) . a ", 135 | "c", 136 | "s ", 137 | "y ", 138 | "o ", 139 | "r ", 140 | "e ", 141 | "h ", 142 | "t ", 143 | "u $8,888.88 (w ", 144 | "b", 145 | "d ", 146 | "o ", 147 | "r s)", 148 | "y $9", 149 | "c ", 150 | "p 1 ", 151 | "s ", 152 | "h ", 153 | "l", 154 | "b ", 155 | "f ", 156 | "w ", 157 | "n ", 158 | "t ", 159 | "j ", 160 | "n ", 161 | "w", 162 | "y", 163 | "b ", 164 | "d", 165 | "h ", 166 | "s ", 167 | "x ", 168 | "o ", 169 | "p", 170 | "b ", 171 | "i", 172 | "l ", 173 | "s", 174 | "v ", 175 | "z ", 176 | "i", 177 | "n ", 178 | "b ", 179 | "g ", 180 | "t", 181 | "x ", 182 | "c 2 k ", 183 | "p ", 184 | "g ", 185 | "k ", 186 | "z ", 187 | "c ", 188 | "h", 189 | "j", 190 | "w ", 191 | "a ", 192 | "c", 193 | "f ", 194 | "t ", 195 | "x ", 196 | "a", 197 | "d ", 198 | "q ", 199 | "v ", 200 | "k ", 201 | "o", 202 | "d ", 203 | "f ", 204 | "l", 205 | "o Account Summary", 206 | "f ", 207 | "i ", 208 | "n", 209 | "r ", 210 | "b", 211 | "g ", 212 | "j ", 213 | "m ", 214 | "o", 215 | "", 216 | "p-3", 217 | "", 218 | "r ", 219 | "w ", 220 | "a ", 221 | "d ", 222 | "h ", 223 | "l ", 224 | "n ", 225 | "q ", 226 | "t ", 227 | "x ", 228 | "a ", 229 | "c ", 230 | "e ", 231 | "4 g ", 232 | "h 5 i 6 ", 233 | "k 7", 234 | "m ", 235 | "p ", 236 | "t ", 237 | "x ", 238 | "c ", 239 | "f ", 240 | "k ", 241 | "o " 242 | ] 243 | } 244 | -------------------------------------------------------------------------------- /files/sample-files/sanitized/paypal/basic.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "Sanitized files/sample-files/sanitized/paypal/basic.json", 3 | "output": { 4 | "accountSuffix": "l", 5 | "endDate": "0004-05-03T00:00:00.000Z", 6 | "expenses": [ 7 | { 8 | "amount": 3, 9 | "baseAmount": 1, 10 | "date": "0009-07-08T00:00:00.000Z", 11 | "description": "h\nj k-4 5 USD\nn", 12 | "fees": 2, 13 | "originalText": ["7/8/9 h USD -1 2 -3", " j k-4 5 USD", "n"] 14 | }, 15 | { 16 | "amount": 8888.88, 17 | "baseAmount": 6, 18 | "date": "0005-03-04T00:00:00.000Z", 19 | "description": "b\ng -\nh i-9\nk", 20 | "fees": 7, 21 | "originalText": ["3/4/5 b USD -6 7 -8,888.88", " g -", " h i-9", "k"] 22 | }, 23 | { 24 | "amount": 6, 25 | "baseAmount": 4444.44, 26 | "date": "0003-01-02T00:00:00.000Z", 27 | "description": "n\np q-7 8,888.88 USD\nt", 28 | "fees": 5, 29 | "originalText": ["1/2/3 n USD -4,444.44 5 -6", " p q-7 8,888.88 USD", "t"] 30 | }, 31 | { 32 | "amount": 5, 33 | "baseAmount": 3, 34 | "date": "0002-09-01T00:00:00.000Z", 35 | "description": "x\nz a-6 7 USD\nd", 36 | "fees": 4444.44, 37 | "originalText": ["9/1/2 x USD -3 4,444.44 -5", " z a-6 7 USD", "d"] 38 | }, 39 | { 40 | "amount": 4444.44, 41 | "baseAmount": 2, 42 | "date": "0001-08-09T00:00:00.000Z", 43 | "description": "i\nk l-5 6 USD\no", 44 | "fees": 3, 45 | "originalText": ["8/9/1 i USD -2 3 -4,444.44", " k l-5 6 USD", "o"] 46 | } 47 | ], 48 | "incomes": [ 49 | { 50 | "amount": 2, 51 | "baseAmount": 9, 52 | "date": "0008-06-07T00:00:00.000Z", 53 | "description": "r\nu\nw", 54 | "fees": 1, 55 | "originalText": ["6/7/8 r USD 9 1 2", "u", "w"] 56 | } 57 | ], 58 | "name": "Sanitized files/sample-files/sanitized/paypal/basic.json", 59 | "startDate": "0002-05-01T00:00:00.000Z", 60 | "yearPrefix": 20 61 | }, 62 | "packageVersion": "2.0.0", 63 | "parserType": "paypal", 64 | "text": [ 65 | "b", 66 | "d", 67 | "", 68 | "statement period i", 69 | "j 1 2 - k 3 4 l", 70 | "", 71 | "n", 72 | " ", 73 | " ", 74 | " ", 75 | " ", 76 | " ", 77 | "p", 78 | " ", 79 | "USD", 80 | "", 81 | "s 5", 82 | "", 83 | "u 6", 84 | "", 85 | "w", 86 | "", 87 | "DATE DESCRIPTION CURRENCY AMOUNT FEES TOTAL", 88 | "", 89 | "7/8/9 h USD -1 2 -3", 90 | " j k-4 5 USD", 91 | "n", 92 | "", 93 | "6/7/8 r USD 9 1 2", 94 | "u", 95 | "w", 96 | "", 97 | "3/4/5 b USD -6 7 -8,888.88", 98 | " g -", 99 | " h i-9", 100 | "k", 101 | "", 102 | "1/2/3 n USD -4,444.44 5 -6", 103 | " p q-7 8,888.88 USD", 104 | "t", 105 | "", 106 | "9/1/2 x USD -3 4,444.44 -5", 107 | " z a-6 7 USD", 108 | "d", 109 | "", 110 | "8/9/1 i USD -2 3 -4,444.44", 111 | " k l-5 6 USD", 112 | "o", 113 | "", 114 | " ", 115 | " ", 116 | " ", 117 | " ", 118 | "u", 119 | "g", 120 | " ", 121 | " ", 122 | " ", 123 | "a", 124 | " ", 125 | " ", 126 | " ", 127 | "j k-o p-u USD f", 128 | "k l-USD n", 129 | " ", 130 | " ", 131 | " ", 132 | "d (7-8-9), e (2-3-4) i (l", 133 | "o 5 q 6-7).", 134 | " ", 135 | " ", 136 | " ", 137 | "h (9-1-2) l (q", 138 | "r 3 t 4-5).", 139 | " ", 140 | " ", 141 | " ", 142 | "you must notify us no later than 7 s 8-x", 143 | "k 9 b", 144 | "1 n 2 s (v 3 d e). h", 145 | "", 146 | "Page 1", 147 | "k", 148 | "m", 149 | "", 150 | "statement period r", 151 | "s 6 7 - t 8 9 u", 152 | "", 153 | "w", 154 | "z", 155 | "1 i", 156 | " ", 157 | " ", 158 | " ", 159 | "l m-u v-f 2-3-4-5 (k", 160 | "m n-w x).", 161 | " ", 162 | "", 163 | "Page 2" 164 | ] 165 | } 166 | -------------------------------------------------------------------------------- /files/sample-files/sanitized/paypal/big-expense.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "Sanitized files/sample-files/sanitized/paypal/big-expense.json", 3 | "output": { 4 | "accountSuffix": "l", 5 | "endDate": "0004-05-03T00:00:00.000Z", 6 | "expenses": [ 7 | { 8 | "amount": 3, 9 | "baseAmount": 1, 10 | "date": "0009-07-08T00:00:00.000Z", 11 | "description": "g\nl -\nm n-4 5 USD\nq", 12 | "fees": 2, 13 | "originalText": ["7/8/9 g USD -1 2 -3", " l -", " m n-4 5 USD", "q"] 14 | }, 15 | { 16 | "amount": 2, 17 | "baseAmount": 9, 18 | "date": "0008-06-07T00:00:00.000Z", 19 | "description": "t\ny -\nz a-3 4,444.44\nUSD\nd", 20 | "fees": 1, 21 | "originalText": [ 22 | "6/7/8 t USD -9 1 -2", 23 | " y -", 24 | " z a-3 4,444.44", 25 | "USD", 26 | "d" 27 | ] 28 | }, 29 | { 30 | "amount": 1, 31 | "baseAmount": 8888.88, 32 | "date": "0007-05-06T00:00:00.000Z", 33 | "description": "i\nm\nq -\nr s-2 3 USD\nv", 34 | "fees": 9, 35 | "originalText": [ 36 | "5/6/7 i USD -8,888.88 9 -1", 37 | "m", 38 | " q -", 39 | " r s-2 3 USD", 40 | "v" 41 | ] 42 | }, 43 | { 44 | "amount": 9, 45 | "baseAmount": 7, 46 | "date": "0006-04-05T00:00:00.000Z", 47 | "description": "z\ne -\nf g-1 2 USD\nj", 48 | "fees": 8888.88, 49 | "originalText": ["4/5/6 z USD -7 8,888.88 -9", " e -", " f g-1 2 USD", "j"] 50 | } 51 | ], 52 | "incomes": [], 53 | "name": "Sanitized files/sample-files/sanitized/paypal/big-expense.json", 54 | "startDate": "0002-05-01T00:00:00.000Z", 55 | "yearPrefix": 20 56 | }, 57 | "packageVersion": "2.0.0", 58 | "parserType": "paypal", 59 | "text": [ 60 | "b", 61 | "d", 62 | "", 63 | "statement period i", 64 | "j 1 2 - k 3 4 l", 65 | "", 66 | "n", 67 | " ", 68 | " ", 69 | " ", 70 | " ", 71 | " ", 72 | "p", 73 | " ", 74 | "USD", 75 | "", 76 | "s 5", 77 | "", 78 | "u 6", 79 | "", 80 | "w", 81 | "", 82 | "DATE DESCRIPTION CURRENCY AMOUNT FEES TOTAL", 83 | "", 84 | "7/8/9 g USD -1 2 -3", 85 | " l -", 86 | " m n-4 5 USD", 87 | "q", 88 | "", 89 | "6/7/8 t USD -9 1 -2", 90 | " y -", 91 | " z a-3 4,444.44", 92 | "USD", 93 | "d", 94 | "", 95 | "5/6/7 i USD -8,888.88 9 -1", 96 | "m", 97 | " q -", 98 | " r s-2 3 USD", 99 | "v", 100 | "", 101 | "4/5/6 z USD -7 8,888.88 -9", 102 | " e -", 103 | " f g-1 2 USD", 104 | "j", 105 | "", 106 | " ", 107 | " ", 108 | " ", 109 | " ", 110 | "p", 111 | "b", 112 | " ", 113 | " ", 114 | " ", 115 | "v", 116 | " ", 117 | " ", 118 | " ", 119 | "e f-j k-p USD a", 120 | "f g-USD i", 121 | " ", 122 | " ", 123 | " ", 124 | "y (3-4-5), z (7-8-9) d (g", 125 | "j 1 l 2-3).", 126 | " ", 127 | " ", 128 | " ", 129 | "c (5-6-7) g (l", 130 | "m 8 o 9-1).", 131 | " ", 132 | " ", 133 | " ", 134 | "you must notify us no later than 3 n 4-s", 135 | "f 5 w", 136 | "6 i 7 n (q 8 y z). c", 137 | "f", 138 | "1 o", 139 | " ", 140 | " ", 141 | " ", 142 | "r s-a b-l 2-3-4-5 (q", 143 | "s t-c d).", 144 | " ", 145 | "", 146 | "Page 1" 147 | ] 148 | } 149 | -------------------------------------------------------------------------------- /files/sample-files/sanitized/paypal/long.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "Sanitized files/sample-files/sanitized/paypal/long.json", 3 | "output": { 4 | "accountSuffix": "l", 5 | "endDate": "0004-05-03T00:00:00.000Z", 6 | "expenses": [ 7 | { 8 | "amount": 1, 9 | "baseAmount": 8888.88, 10 | "date": "0007-05-06T00:00:00.000Z", 11 | "description": "l\nq -\nr s-2 3 USD\nv", 12 | "fees": 9, 13 | "originalText": ["5/6/7 l USD -8,888.88 9 -1", " q -", " r s-2 3 USD", "v"] 14 | }, 15 | { 16 | "amount": 6, 17 | "baseAmount": 4444.44, 18 | "date": "0003-01-02T00:00:00.000Z", 19 | "description": "h\nm -\nn o-7\nq", 20 | "fees": 5, 21 | "originalText": ["1/2/3 h USD -4,444.44 5 -6", " m -", " n o-7", "q"] 22 | }, 23 | { 24 | "amount": 4444.44, 25 | "baseAmount": 2, 26 | "date": "0001-08-09T00:00:00.000Z", 27 | "description": "w\ny\nz a-5 6 USD\nd", 28 | "fees": 3, 29 | "originalText": ["8/9/1 w USD -2 3 -4,444.44", "y", " z a-5 6 USD", "d"] 30 | }, 31 | { 32 | "amount": 3, 33 | "baseAmount": 1, 34 | "date": "0009-07-08T00:00:00.000Z", 35 | "description": "j\nl\nn", 36 | "fees": 2, 37 | "originalText": ["7/8/9 j USD -1 2 -3", "l", "n"] 38 | }, 39 | { 40 | "amount": 9, 41 | "baseAmount": 7, 42 | "date": "0006-04-05T00:00:00.000Z", 43 | "description": "s\nu v-1 2 USD\ny", 44 | "fees": 8888.88, 45 | "originalText": ["4/5/6 s USD -7 8,888.88 -9", " u v-1 2 USD", "y"] 46 | }, 47 | { 48 | "amount": 8888.88, 49 | "baseAmount": 6, 50 | "date": "0005-03-04T00:00:00.000Z", 51 | "description": "c\ne f-9 1 USD\ni", 52 | "fees": 7, 53 | "originalText": ["3/4/5 c USD -6 7 -8,888.88", " e f-9 1 USD", "i"] 54 | }, 55 | { 56 | "amount": 7, 57 | "baseAmount": 5, 58 | "date": "0004-02-03T00:00:00.000Z", 59 | "description": "n\np q-8 9 USD\nt", 60 | "fees": 6, 61 | "originalText": ["2/3/4 n USD -5 6 -7", " p q-8 9 USD", "t"] 62 | }, 63 | { 64 | "amount": 6, 65 | "baseAmount": 4444.44, 66 | "date": "0003-01-02T00:00:00.000Z", 67 | "description": "y\na b-7 8,888.88 USD\ne", 68 | "fees": 5, 69 | "originalText": ["1/2/3 y USD -4,444.44 5 -6", " a b-7 8,888.88 USD", "e"] 70 | }, 71 | { 72 | "amount": 5, 73 | "baseAmount": 3, 74 | "date": "0002-09-01T00:00:00.000Z", 75 | "description": "a\nf -\ng h-6\nj", 76 | "fees": 4444.44, 77 | "originalText": ["9/1/2 a USD -3 4,444.44 -5", " f -", " g h-6", "j"] 78 | } 79 | ], 80 | "incomes": [ 81 | { 82 | "amount": 9, 83 | "baseAmount": 7, 84 | "date": "0006-04-05T00:00:00.000Z", 85 | "description": "z\nc", 86 | "fees": 8888.88, 87 | "originalText": ["4/5/6 z USD 7 8,888.88 9", "c"] 88 | }, 89 | { 90 | "amount": 5, 91 | "baseAmount": 3, 92 | "date": "0002-09-01T00:00:00.000Z", 93 | "description": "l\nn\no p-6\nr", 94 | "fees": 4444.44, 95 | "originalText": ["9/1/2 l USD 3 4,444.44 5", "n", " o p-6", "r"] 96 | }, 97 | { 98 | "amount": 8888.88, 99 | "baseAmount": 6, 100 | "date": "0005-03-04T00:00:00.000Z", 101 | "description": "s\nv", 102 | "fees": 7, 103 | "originalText": ["3/4/5 s USD 6 7 8,888.88", "v"] 104 | } 105 | ], 106 | "name": "Sanitized files/sample-files/sanitized/paypal/long.json", 107 | "startDate": "0002-05-01T00:00:00.000Z", 108 | "yearPrefix": 20 109 | }, 110 | "packageVersion": "2.0.0", 111 | "parserType": "paypal", 112 | "text": [ 113 | "b", 114 | "d", 115 | "", 116 | "statement period i", 117 | "j 1 2 - k 3 4 l", 118 | "", 119 | "v", 120 | "", 121 | " ", 122 | " ", 123 | " ", 124 | "x", 125 | " ", 126 | " ", 127 | " ", 128 | " ", 129 | " ", 130 | " ", 131 | " ", 132 | "", 133 | "z", 134 | "", 135 | "DATE DESCRIPTION CURRENCY AMOUNT FEES TOTAL", 136 | "", 137 | "5/6/7 l USD -8,888.88 9 -1", 138 | " q -", 139 | " r s-2 3 USD", 140 | "v", 141 | "", 142 | "4/5/6 z USD 7 8,888.88 9", 143 | "c", 144 | "", 145 | "1/2/3 h USD -4,444.44 5 -6", 146 | " m -", 147 | " n o-7", 148 | "q", 149 | "", 150 | "8/9/1 w USD -2 3 -4,444.44", 151 | "y", 152 | " z a-5 6 USD", 153 | "d", 154 | "", 155 | "7/8/9 j USD -1 2 -3", 156 | "l", 157 | "n", 158 | "", 159 | "4/5/6 s USD -7 8,888.88 -9", 160 | " u v-1 2 USD", 161 | "y", 162 | "", 163 | "3/4/5 c USD -6 7 -8,888.88", 164 | " e f-9 1 USD", 165 | "i", 166 | "", 167 | "2/3/4 n USD -5 6 -7", 168 | " p q-8 9 USD", 169 | "t", 170 | "", 171 | "1/2/3 y USD -4,444.44 5 -6", 172 | " a b-7 8,888.88 USD", 173 | "e", 174 | "", 175 | "9/1/2 l USD 3 4,444.44 5", 176 | "n", 177 | " o p-6", 178 | "r", 179 | "", 180 | "Page 1", 181 | "u", 182 | "w", 183 | "", 184 | "statement period b", 185 | "c 8 9 - d 1 2 e", 186 | "", 187 | "g", 188 | "", 189 | "i", 190 | "", 191 | "DATE DESCRIPTION CURRENCY AMOUNT FEES TOTAL", 192 | "", 193 | "3/4/5 s USD 6 7 8,888.88", 194 | "v", 195 | "", 196 | "9/1/2 a USD -3 4,444.44 -5", 197 | " f -", 198 | " g h-6", 199 | "j", 200 | "", 201 | " ", 202 | " ", 203 | " ", 204 | " ", 205 | " ", 206 | " ", 207 | " ", 208 | " ", 209 | " ", 210 | " ", 211 | " ", 212 | " ", 213 | " ", 214 | " ", 215 | " ", 216 | " ", 217 | " ", 218 | " ", 219 | " ", 220 | " ", 221 | " ", 222 | " ", 223 | " ", 224 | " ", 225 | " ", 226 | " ", 227 | " ", 228 | " ", 229 | " ", 230 | " ", 231 | " ", 232 | " ", 233 | " ", 234 | " ", 235 | " ", 236 | " ", 237 | " ", 238 | " ", 239 | " ", 240 | " ", 241 | " ", 242 | " ", 243 | " ", 244 | " ", 245 | " ", 246 | " ", 247 | " ", 248 | " ", 249 | " ", 250 | " ", 251 | " ", 252 | " ", 253 | " ", 254 | " ", 255 | " ", 256 | " ", 257 | "d", 258 | " ", 259 | " ", 260 | " ", 261 | "t (7-8-9), u (2-3-4) y (b", 262 | "e 5 g 6-7).", 263 | " ", 264 | " ", 265 | " ", 266 | "x (9-1-2) b (g", 267 | "h 3 j 4-5).", 268 | " ", 269 | " ", 270 | " ", 271 | "you must notify us no later than 7 i 8-n", 272 | "a 9 r", 273 | "1 d 2 i (l 3 t u). x", 274 | "a", 275 | "5 j", 276 | " ", 277 | " ", 278 | " ", 279 | "m n-v w-g 6-7-8-9 (l", 280 | "n o-x y).", 281 | " ", 282 | "", 283 | "Page 2" 284 | ] 285 | } 286 | -------------------------------------------------------------------------------- /files/sample-files/sanitized/paypal/nothing.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "Sanitized files/sample-files/sanitized/paypal/nothing.json", 3 | "output": { 4 | "accountSuffix": "l", 5 | "endDate": "0004-05-03T00:00:00.000Z", 6 | "expenses": [], 7 | "incomes": [], 8 | "name": "Sanitized files/sample-files/sanitized/paypal/nothing.json", 9 | "startDate": "0002-05-01T00:00:00.000Z", 10 | "yearPrefix": 20 11 | }, 12 | "packageVersion": "2.0.0", 13 | "parserType": "paypal", 14 | "text": [ 15 | "b", 16 | "d", 17 | "", 18 | "statement period i", 19 | "j 1 2 - k 3 4 l", 20 | "", 21 | " ", 22 | " ", 23 | " ", 24 | " ", 25 | " ", 26 | " ", 27 | " ", 28 | " ", 29 | " ", 30 | " ", 31 | " ", 32 | " ", 33 | "b", 34 | " ", 35 | " m", 36 | " ", 37 | " ", 38 | " ", 39 | " ", 40 | " ", 41 | " ", 42 | " ", 43 | " ", 44 | " ", 45 | " ", 46 | " ", 47 | " ", 48 | " ", 49 | " ", 50 | " ", 51 | " ", 52 | " ", 53 | " ", 54 | " ", 55 | " ", 56 | " ", 57 | " ", 58 | " ", 59 | " ", 60 | " ", 61 | " ", 62 | " ", 63 | " ", 64 | " ", 65 | " ", 66 | " ", 67 | " ", 68 | " ", 69 | " ", 70 | " ", 71 | " ", 72 | " ", 73 | " ", 74 | " ", 75 | " ", 76 | " ", 77 | " ", 78 | " ", 79 | " ", 80 | " ", 81 | " ", 82 | " ", 83 | " ", 84 | " ", 85 | " ", 86 | " ", 87 | " ", 88 | " ", 89 | " ", 90 | " ", 91 | " ", 92 | " ", 93 | " ", 94 | " ", 95 | " ", 96 | " ", 97 | " ", 98 | "v w-a b-g USD r", 99 | "w x-USD z", 100 | " ", 101 | " ", 102 | " ", 103 | "p (5-6-7), q (9-1-2) u (x", 104 | "a 3 c 4-5).", 105 | " ", 106 | " ", 107 | " ", 108 | "t (7-8-9) x (c", 109 | "d 1 f 2-3).", 110 | " ", 111 | " ", 112 | " ", 113 | "you must notify us no later than 5 e 6-j", 114 | "w 7 n", 115 | "8 z 9 e (h 1 p q). t", 116 | "w", 117 | "3 f", 118 | " ", 119 | " ", 120 | " ", 121 | "i j-r s-c 4-5-6-7 (h", 122 | "j k-t u).", 123 | " ", 124 | "", 125 | "Page 1" 126 | ] 127 | } 128 | -------------------------------------------------------------------------------- /files/sample-files/sanitized/usaa-bank/basic.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "Sanitized files/sample-files/sanitized/usaa-bank/basic.json", 3 | "output": { 4 | "accountSuffix": "678", 5 | "endDate": "2005-03-04T00:00:00.000Z", 6 | "expenses": [ 7 | { 8 | "amount": 3, 9 | "date": "2005-01-02T00:00:00.000Z", 10 | "description": "o\ns 4", 11 | "originalText": ["1/2 3 o", " s 4"] 12 | }, 13 | { 14 | "amount": 7, 15 | "date": "2003-05-06T00:00:00.000Z", 16 | "description": "v", 17 | "originalText": ["5/6 7 v"] 18 | }, 19 | { 20 | "amount": 1, 21 | "date": "2003-08-09T00:00:00.000Z", 22 | "description": "z\nc\nd f 2", 23 | "originalText": ["8/9 1 z", " c", " d f 2"] 24 | }, 25 | { 26 | "amount": 5, 27 | "date": "2005-03-04T00:00:00.000Z", 28 | "description": "h 6\nl m", 29 | "originalText": ["3/4 5 h 6", " l m"] 30 | }, 31 | { 32 | "amount": 9, 33 | "date": "2003-07-08T00:00:00.000Z", 34 | "description": "o 1\ns t", 35 | "originalText": ["7/8 9 o 1", " s t"] 36 | }, 37 | { 38 | "amount": 4444.44, 39 | "date": "2005-02-03T00:00:00.000Z", 40 | "description": "v 5\nw y z", 41 | "originalText": ["2/3 4,444.44 v 5", " w y z"] 42 | }, 43 | { 44 | "amount": 8888.88, 45 | "date": "2003-06-07T00:00:00.000Z", 46 | "description": "b 9\nd e f", 47 | "originalText": ["6/7 8,888.88 b 9", " d e f"] 48 | }, 49 | { 50 | "amount": 3, 51 | "date": "2005-01-02T00:00:00.000Z", 52 | "description": "j\no 5", 53 | "originalText": ["1/2 3 j", " o 5"] 54 | }, 55 | { 56 | "amount": 4444.44, 57 | "date": "2005-02-03T00:00:00.000Z", 58 | "description": "j 5\nk m", 59 | "originalText": ["2/3 4,444.44 j 5", " k m"] 60 | }, 61 | { 62 | "amount": 8888.88, 63 | "date": "2003-06-07T00:00:00.000Z", 64 | "description": "o 9\ns t", 65 | "originalText": ["6/7 8,888.88 o 9", " s t"] 66 | }, 67 | { 68 | "amount": 3, 69 | "date": "2005-01-02T00:00:00.000Z", 70 | "description": "v 4\nz a", 71 | "originalText": ["1/2 3 v 4", " z a"] 72 | }, 73 | { 74 | "amount": 7, 75 | "date": "2003-05-06T00:00:00.000Z", 76 | "description": "e\ni 8", 77 | "originalText": ["5/6 7 e", " i 8"] 78 | }, 79 | { 80 | "amount": 2, 81 | "date": "2002-09-01T00:00:00.000Z", 82 | "description": "m\np\nq s 3", 83 | "originalText": ["9/1 2 m", " p", " q s 3"] 84 | }, 85 | { 86 | "amount": 6, 87 | "date": "2003-04-05T00:00:00.000Z", 88 | "description": "u 7\nv x y", 89 | "originalText": ["4/5 6 u 7", " v x y"] 90 | }, 91 | { 92 | "amount": 1, 93 | "date": "2003-08-09T00:00:00.000Z", 94 | "description": "a 2\nb d e", 95 | "originalText": ["8/9 1 a 2", " b d e"] 96 | }, 97 | { 98 | "amount": 5, 99 | "date": "2005-03-04T00:00:00.000Z", 100 | "description": "g 6\nh/l", 101 | "originalText": ["3/4 5 g 6", " h/l"] 102 | }, 103 | { 104 | "amount": 9, 105 | "date": "2003-07-08T00:00:00.000Z", 106 | "description": "n 1\np q r", 107 | "originalText": ["7/8 9 n 1", " p q r"] 108 | } 109 | ], 110 | "incomes": [ 111 | { 112 | "amount": 2, 113 | "date": "2002-09-01T00:00:00.000Z", 114 | "description": "d\nFROM g\nh j 3", 115 | "from": "g", 116 | "originalText": ["9/1 2 d", " FROM g", " h j 3"] 117 | }, 118 | { 119 | "amount": 6, 120 | "date": "2003-04-05T00:00:00.000Z", 121 | "description": "l 7\nm n o", 122 | "originalText": ["4/5 6 l 7", " m n o"] 123 | }, 124 | { 125 | "amount": 1, 126 | "date": "2003-08-09T00:00:00.000Z", 127 | "description": "q 2\nw", 128 | "originalText": ["8/9 1 q 2", " w"] 129 | }, 130 | { 131 | "amount": 5, 132 | "date": "2005-03-04T00:00:00.000Z", 133 | "description": "y 6\ne", 134 | "originalText": ["3/4 5 y 6", " e"] 135 | }, 136 | { 137 | "amount": 9, 138 | "date": "2003-07-08T00:00:00.000Z", 139 | "description": "g", 140 | "originalText": ["7/8 9 g"] 141 | } 142 | ], 143 | "name": "Sanitized files/sample-files/sanitized/usaa-bank/basic.json", 144 | "startDate": "2002-09-01T00:00:00.000Z", 145 | "yearPrefix": 20 146 | }, 147 | "packageVersion": "2.0.0", 148 | "parserType": "usaa-bank", 149 | "text": [ 150 | "a", 151 | "b", 152 | "c", 153 | "d", 154 | "", 155 | "e 1", 156 | "g", 157 | "2 l", 158 | "o 3", 159 | "", 160 | " 4", 161 | " 5", 162 | "", 163 | "ACCOUNT NUMBER ACCOUNT TYPE STATEMENT PERIOD", 164 | "", 165 | "6-7-8 9/1/2 -3/4/5", 166 | " x", 167 | "", 168 | "z", 169 | "b c e", 170 | "f h j", 171 | "k", 172 | "m p q u v", 173 | "w", 174 | " 6 7 8,888.88 9 1 2 3", 175 | "j 4 q", 176 | "", 177 | "r s", 178 | "u (v) w (x)", 179 | "y z", 180 | "b", 181 | " 5 6", 182 | "e", 183 | " 7 8,888.88", 184 | "g h/t", 185 | "", 186 | " DEPOSITS AND OTHER CREDITS", 187 | "z", 188 | "9/1 2 d", 189 | " FROM g", 190 | " h j 3", 191 | "4/5 6 l 7", 192 | " m n o", 193 | "8/9 1 q 2", 194 | " w", 195 | "3/4 5 y 6", 196 | " e", 197 | "7/8 9 g", 198 | "", 199 | " other debits", 200 | "k", 201 | "1/2 3 o", 202 | " s 4", 203 | "5/6 7 v", 204 | "8/9 1 z", 205 | " c", 206 | " d f 2", 207 | "3/4 5 h 6", 208 | " l m", 209 | "7/8 9 o 1", 210 | " s t", 211 | "2/3 4,444.44 v 5", 212 | " w y z", 213 | "6/7 8,888.88 b 9", 214 | " d e f", 215 | "1/2 3 j", 216 | "4-k", 217 | " o 5", 218 | "p", 219 | "t", 220 | "6 v", 221 | "y 7-8", 222 | "9-1-2", 223 | "", 224 | "k 3 q ", 225 | "y", 226 | "", 227 | "s", 228 | "s", 229 | "j 4 m", 230 | "y", 231 | ".", 232 | "f (g h).", 233 | ".", 234 | "b", 235 | "l", 236 | ".", 237 | "u", 238 | "", 239 | "k 9 r", 240 | "t", 241 | "w", 242 | "", 243 | "l", 244 | "", 245 | "n", 246 | "p", 247 | "(u", 248 | "y z)", 249 | "(1) c", 250 | "d e", 251 | "$", 252 | "(h i)", 253 | "", 254 | "(2) n", 255 | "o", 256 | "q (r s)", 257 | "", 258 | "$", 259 | "(3) t", 260 | "", 261 | "(4) x", 262 | "-", 263 | "y (z a)", 264 | "", 265 | "$", 266 | "(5) d", 267 | "", 268 | "f", 269 | "", 270 | "(6) i", 271 | "$", 272 | "", 273 | "(7) m", 274 | "r", 275 | "w", 276 | "x", 277 | "y", 278 | "z (a b)", 279 | "", 280 | "(8) f", 281 | "g", 282 | "h", 283 | "j (k l)", 284 | "", 285 | "(9) o", 286 | "-", 287 | "p", 288 | "(q r)", 289 | "", 290 | "$", 291 | "(1) u", 292 | "v", 293 | "w $", 294 | "f", 295 | "", 296 | "g 2 i 3 u", 297 | "", 298 | "-z", 299 | "-e", 300 | "-k", 301 | "-q", 302 | "", 303 | "e", 304 | "4-5", 305 | "f", 306 | "g", 307 | "h", 308 | "i", 309 | "j", 310 | "", 311 | "k 6", 312 | "m", 313 | "7 r", 314 | "u 8", 315 | "", 316 | " 9", 317 | " 1", 318 | "", 319 | "ACCOUNT NUMBER ACCOUNT TYPE STATEMENT PERIOD", 320 | "", 321 | "2-3-4 5/6/7 -8/9/1", 322 | " d", 323 | "", 324 | " other debits", 325 | "h", 326 | "2/3 4,444.44 j 5", 327 | " k m", 328 | "6/7 8,888.88 o 9", 329 | " s t", 330 | "1/2 3 v 4", 331 | " z a", 332 | "5/6 7 e", 333 | " i 8", 334 | "9/1 2 m", 335 | " p", 336 | " q s 3", 337 | "4/5 6 u 7", 338 | " v x y", 339 | "8/9 1 a 2", 340 | " b d e", 341 | "3/4 5 g 6", 342 | " h/l", 343 | "7/8 9 n 1", 344 | " p q r", 345 | "", 346 | " ACCOUNT BALANCE SUMMARY", 347 | " v w", 348 | " 2/3 4,444.44 5/6 7", 349 | " 8/9 1 2/3 4,444.44", 350 | " 5/6 7 8/9 1", 351 | " 2/3 4,444.44 5/6 7", 352 | " 8/9 1 2/3 4,444.44", 353 | " 5/6 7 8/9 1", 354 | "", 355 | " e h p", 356 | " z", 357 | " 2 i j", 358 | " p 3", 359 | "", 360 | "4-q", 361 | "r" 362 | ] 363 | } 364 | -------------------------------------------------------------------------------- /files/temp-output/.gitignore: -------------------------------------------------------------------------------- 1 | * 2 | !.gitignore 3 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "statement-parser", 3 | "version": "2.0.1", 4 | "description": "Parse bank and credit card statements.", 5 | "keywords": [ 6 | "pdf", 7 | "bank", 8 | "credit card", 9 | "finances", 10 | "finance", 11 | "financial", 12 | "pdf to json", 13 | "citi", 14 | "chase", 15 | "usaa", 16 | "costco", 17 | "prime", 18 | "paypal", 19 | "parser", 20 | "statement", 21 | "statements" 22 | ], 23 | "homepage": "https://github.com/electrovir/statement-parser", 24 | "bugs": { 25 | "url": "https://github.com/electrovir/statement-parser/issues" 26 | }, 27 | "repository": { 28 | "type": "git", 29 | "url": "https://github.com/electrovir/statement-parser" 30 | }, 31 | "license": "MIT", 32 | "author": { 33 | "name": "electrovir", 34 | "url": "https://github.com/electrovir" 35 | }, 36 | "main": "dist/src/index.js", 37 | "typings": "dist/src/index.d.ts", 38 | "scripts": { 39 | "format": "virmator format", 40 | "prepublishOnly": "npm run test:full", 41 | "sanitize": "virmator compile && node dist/sanitizer/sanitize-for-test-file-cli.js", 42 | "sanitize:all": "./bulk-sanitize.sh files/downloads", 43 | "sanitize:no-compile": "node dist/sanitizer/sanitize-for-test-file-cli.js", 44 | "spellcheck": "virmator spellcheck", 45 | "test": "virmator test", 46 | "test:file": "./test-specific-file.sh", 47 | "test:full": "npm run test && npm run spellcheck && npm run format check && npm run update-docs -- --check", 48 | "update-docs": "virmator code-in-markdown README.md" 49 | }, 50 | "dependencies": { 51 | "augment-vir": "^1.3.0", 52 | "fs-extra": "^10.0.0", 53 | "fsm-vir": "^1.0.1", 54 | "pdf-text-reader": "^3.0.0", 55 | "pdfjs-dist": "2.10.377" 56 | }, 57 | "devDependencies": { 58 | "@types/fs-extra": "^9.0.13", 59 | "@types/node": "^16.11.4", 60 | "@types/pdfjs-dist": "^2.7.5", 61 | "@types/pdfkit": "^0.12.1", 62 | "pdfkit": "^0.13.0", 63 | "test-vir": "^0.2.3", 64 | "virmator": "^1.3.7" 65 | }, 66 | "engines": { 67 | "node": ">=12", 68 | "npm": ">=7" 69 | } 70 | } 71 | -------------------------------------------------------------------------------- /src/augments/date.ts: -------------------------------------------------------------------------------- 1 | import {createDateFromUtcIsoFormat} from 'augment-vir'; 2 | import {isSanitizerMode} from '../global'; 3 | 4 | /** 5 | * Creates a date object that's between the two dates given. If a valid date cannot be created with 6 | * the given inputs, a date is created that is at least earlier than the given endDate. 7 | * 8 | * @param startDate Optional. The earlier (date wise) bounds for creating the new Date object. If 9 | * this is not provided, a date is created that is as close to, but earlier than, the given endDate. 10 | * @param endDate Required. The later bounds for creating the new Date object. 11 | * @param monthNumber Month number for the new Date object. This is 1 indexed. So a `1` here 12 | * corresponds to January. 13 | * @param dayNumber Number for the month for the new Date object. This is 1 indexed; the first day 14 | * of the mont is `1`. 15 | */ 16 | export function dateWithinRange( 17 | startDate: Date | undefined, 18 | endDate: Date, 19 | monthNumber: number, 20 | dayNumber: Number, 21 | ): Date { 22 | const errorString = `${JSON.stringify({ 23 | startDate, 24 | endDate, 25 | monthNumber, 26 | dayNumber, 27 | })}`; 28 | const month = monthNumber < 10 ? `0${monthNumber}` : String(monthNumber); 29 | const day = dayNumber < 10 ? `0${dayNumber}` : String(dayNumber); 30 | 31 | if (!startDate || startDate.getUTCFullYear() === endDate.getUTCFullYear()) { 32 | const newDate = createDateFromUtcIsoFormat(`${endDate.getUTCFullYear()}-${month}-${day}`); 33 | if (newDate <= endDate) { 34 | return newDate; 35 | } else { 36 | return createDateFromUtcIsoFormat(`${endDate.getUTCFullYear() - 1}-${month}-${day}`); 37 | } 38 | } else if (startDate) { 39 | const dateFromStartYear = createDateFromUtcIsoFormat( 40 | `${startDate.getUTCFullYear()}-${month}-${day}`, 41 | ); 42 | const dateFromStartYearPlus = createDateFromUtcIsoFormat( 43 | `${startDate.getUTCFullYear() + 1}-${month}-${day}`, 44 | ); 45 | const dateFromEndYear = createDateFromUtcIsoFormat( 46 | `${endDate.getUTCFullYear()}-${month}-${day}`, 47 | ); 48 | if (dateFromStartYear <= endDate && startDate <= dateFromStartYear) { 49 | return dateFromStartYear; 50 | } else if (dateFromEndYear <= endDate && startDate <= dateFromEndYear) { 51 | return dateFromEndYear; 52 | } else if (dateFromStartYearPlus <= endDate && startDate <= dateFromStartYearPlus) { 53 | return dateFromStartYearPlus; 54 | } else { 55 | if (isSanitizerMode()) { 56 | return dateFromStartYear; 57 | } else { 58 | throw new Error( 59 | `Invalid potential dates generated, none fit between start and end: ${errorString}`, 60 | ); 61 | } 62 | } 63 | } else { 64 | throw new Error(`Invalid inputs: ${errorString}`); 65 | } 66 | } 67 | -------------------------------------------------------------------------------- /src/bash-scripting.ts: -------------------------------------------------------------------------------- 1 | import {exec} from 'child_process'; 2 | 3 | export async function runBashCommand(command: string, acceptStderr = false): Promise { 4 | return new Promise((resolve, reject) => { 5 | exec(command, {shell: 'bash'}, (error, stdout, stderr) => { 6 | if (error) { 7 | return reject(error); 8 | } else if (stderr) { 9 | if (acceptStderr) { 10 | return resolve(stderr); 11 | } else { 12 | return reject(stderr); 13 | } 14 | } else { 15 | return resolve(stdout); 16 | } 17 | }); 18 | }); 19 | } 20 | -------------------------------------------------------------------------------- /src/global.ts: -------------------------------------------------------------------------------- 1 | let sanitizerMode = false; 2 | 3 | export function isSanitizerMode(): boolean { 4 | return sanitizerMode; 5 | } 6 | 7 | export function setSanitizerMode(): boolean { 8 | const original = sanitizerMode; 9 | sanitizerMode = true; 10 | return original; 11 | } 12 | 13 | export function unsetSanitizerMode(): boolean { 14 | const original = sanitizerMode; 15 | sanitizerMode = false; 16 | return original; 17 | } 18 | -------------------------------------------------------------------------------- /src/index.ts: -------------------------------------------------------------------------------- 1 | export * from './parser/all-parsers'; 2 | export * from './parser/parse-api'; 3 | export * from './parser/parsed-output'; 4 | export * from './parser/parser-function'; 5 | export * from './parser/parser-options'; 6 | -------------------------------------------------------------------------------- /src/package-contents.test.ts: -------------------------------------------------------------------------------- 1 | import {safeMatch} from 'augment-vir'; 2 | import {testGroup} from 'test-vir'; 3 | import {runBashCommand} from './bash-scripting'; 4 | 5 | const packCommand = `npm pack --dry-run`; 6 | 7 | const startTrigger = 'npm notice === Tarball Contents === '; 8 | const endTrigger = 'npm notice === Tarball Details === '; 9 | 10 | async function getRawPackFileList(): Promise { 11 | const packOutput: string[] = (await runBashCommand(packCommand, true)).split('\n'); 12 | 13 | const startTriggerIndex = packOutput.indexOf(startTrigger); 14 | const startIndex = startTriggerIndex > -1 ? startTriggerIndex : 0; 15 | const endTriggerIndex = packOutput.indexOf(endTrigger); 16 | const endIndex = endTriggerIndex > -1 ? endTriggerIndex : packOutput.length - 1; 17 | 18 | return packOutput.slice(startIndex, endIndex + 1); 19 | } 20 | 21 | const fileLineRegExp = /npm notice [\d\.,]+\w+?B\s+(.+?)\s*$/; 22 | 23 | async function extractPackFiles(): Promise { 24 | const raw = await getRawPackFileList(); 25 | const lines = raw.slice(1, raw.length - 1); 26 | 27 | const extractedFiles = lines.map((line) => { 28 | const [, fileName] = safeMatch(line, fileLineRegExp); 29 | 30 | if (!fileName) { 31 | throw new Error(`Could not match npm pack file line "${line}" with ${fileLineRegExp}`); 32 | } 33 | return fileName.trim(); 34 | }); 35 | return extractedFiles; 36 | } 37 | 38 | testGroup(async (runTest) => { 39 | runTest({ 40 | description: 'verify that we can read package contents without error', 41 | test: async () => { 42 | await runBashCommand(packCommand, true); 43 | }, 44 | }); 45 | 46 | runTest({ 47 | description: 'pack file list has contents', 48 | expect: true, 49 | test: async () => { 50 | return !!(await getRawPackFileList()).length; 51 | }, 52 | }); 53 | 54 | runTest({ 55 | description: 'pack file list includes terminator strings', 56 | expect: [startTrigger, endTrigger], 57 | test: async () => { 58 | const rawFiles = await getRawPackFileList(); 59 | return [rawFiles[0], rawFiles[rawFiles.length - 1]]; 60 | }, 61 | }); 62 | 63 | runTest({ 64 | description: 'pack file list includes terminator strings', 65 | expect: [startTrigger, endTrigger], 66 | test: async () => { 67 | const rawFiles = await getRawPackFileList(); 68 | return [rawFiles[0], rawFiles[rawFiles.length - 1]]; 69 | }, 70 | }); 71 | 72 | runTest({ 73 | description: 'correct number of files extracted', 74 | expect: 2, 75 | test: async () => { 76 | const rawFiles = await getRawPackFileList(); 77 | const packFiles = await extractPackFiles(); 78 | return rawFiles.length - packFiles.length; 79 | }, 80 | }); 81 | 82 | runTest({ 83 | description: 'extracted file names are trimmed', 84 | expect: await extractPackFiles(), 85 | test: async () => { 86 | const packFiles = await extractPackFiles(); 87 | return packFiles.map((file) => file.trim()); 88 | }, 89 | }); 90 | 91 | const importantFiles: string[] = [ 92 | 'LICENSE', 93 | 'README.md', 94 | 'package.json', 95 | 'dist/index.js', 96 | 'dist/index.d.ts', 97 | ]; 98 | 99 | runTest({ 100 | description: 'no important files should be missing', 101 | expect: [], 102 | test: async () => { 103 | const packFiles = await extractPackFiles(); 104 | 105 | return importantFiles.filter((file) => !packFiles.includes(file)); 106 | }, 107 | }); 108 | 109 | runTest({ 110 | description: 'no other non-dist and non-important files are included', 111 | expect: [], 112 | test: async () => { 113 | const packFiles = await extractPackFiles(); 114 | 115 | return packFiles.filter((file) => { 116 | if (importantFiles.includes(file)) { 117 | return false; 118 | } else if (file.startsWith(`dist/`)) { 119 | return false; 120 | } else { 121 | return true; 122 | } 123 | }); 124 | }, 125 | }); 126 | 127 | // allow this intentionally misspelled word root 128 | // cSpell:ignore sanitiz 129 | const badFilePartialMatches: (string | RegExp)[] = [ 130 | '.pdf', 131 | 'sanitiz', 132 | 'bash', 133 | /package.*\.ts/i, 134 | /readme.*\.ts/i, 135 | 'paths', 136 | '.test.js', 137 | '.test.d.ts', 138 | ]; 139 | 140 | runTest({ 141 | description: 'no bad files are included', 142 | expect: [], 143 | test: async () => { 144 | const packFiles = await extractPackFiles(); 145 | 146 | return packFiles.filter((file) => { 147 | // if any bad matches are found, include this file 148 | return badFilePartialMatches.some((matcher) => { 149 | if (matcher instanceof RegExp) { 150 | return !!file.match(matcher); 151 | } else { 152 | return file.includes(matcher); 153 | } 154 | }); 155 | }); 156 | }, 157 | }); 158 | 159 | runTest({ 160 | description: 'all files in dist are either js or .d.ts files', 161 | expect: [], 162 | test: async () => { 163 | const packFiles = await extractPackFiles(); 164 | 165 | const badFiles = packFiles.filter((file) => { 166 | if (file.startsWith(`dist/`)) { 167 | return !file.endsWith('.d.ts') && !file.endsWith('.js'); 168 | } else { 169 | return false; 170 | } 171 | }); 172 | return badFiles; 173 | }, 174 | }); 175 | 176 | runTest({ 177 | description: 'no .js file should be missing a companion .d.ts file', 178 | expect: [], 179 | test: async () => { 180 | const packFiles = await extractPackFiles(); 181 | const jsFiles = packFiles.filter((file) => file.endsWith('.js')); 182 | const missingCompanionTsFiles = jsFiles.filter((jsFile) => { 183 | return !packFiles.includes(jsFile.replace(/\.js$/, '.d.ts')); 184 | }); 185 | 186 | return missingCompanionTsFiles; 187 | }, 188 | }); 189 | }); 190 | -------------------------------------------------------------------------------- /src/package-version.ts: -------------------------------------------------------------------------------- 1 | import {readFileSync} from 'fs-extra'; 2 | import {packageJson} from './repo-paths'; 3 | 4 | const packageVersion: string = JSON.parse(readFileSync(packageJson).toString()).version; 5 | if (!packageVersion) { 6 | throw new Error(`Package version was not found.`); 7 | } 8 | 9 | export function getPackageVersion() { 10 | return packageVersion; 11 | } 12 | -------------------------------------------------------------------------------- /src/parser/all-parsers.ts: -------------------------------------------------------------------------------- 1 | import {getEnumTypedValues} from 'augment-vir'; 2 | import { 3 | ChaseCreditCardParsingOptions, 4 | chasePrimeVisaCreditCardParser, 5 | } from './implemented-parsers/chase-prime-visa-credit-card-parser'; 6 | import {citiCostcoVisaCreditCardParser} from './implemented-parsers/citi-costco-visa-credit-card-parser'; 7 | import {PaypalOutput, paypalStatementParser} from './implemented-parsers/paypal-parser'; 8 | import { 9 | usaaBankAccountStatementParser, 10 | UsaaBankOutput, 11 | } from './implemented-parsers/usaa-bank-account-parser'; 12 | import { 13 | usaaVisaCreditCardStatementParser, 14 | UsaaVisaCreditOutput, 15 | } from './implemented-parsers/usaa-visa-credit-card-parser'; 16 | import {ParsedOutput} from './parsed-output'; 17 | import {BaseParserOptions, CombineWithBaseParserOptions} from './parser-options'; 18 | 19 | export enum ParserType { 20 | ChasePrimeVisaCredit = 'chase-prime-visa-credit', 21 | CitiCostcoVisaCredit = 'citi-costco-visa-credit', 22 | UsaaBank = 'usaa-bank', 23 | UsaaVisaCredit = 'usaa-visa-credit', 24 | Paypal = 'paypal', 25 | } 26 | 27 | export function isParserType(x: any): x is ParserType { 28 | if (typeof x === 'string' && getEnumTypedValues(ParserType).includes(x as ParserType)) { 29 | return true; 30 | } 31 | 32 | return false; 33 | } 34 | 35 | export interface AllParserOptions extends Record> { 36 | [ParserType.ChasePrimeVisaCredit]: Partial< 37 | CombineWithBaseParserOptions 38 | >; 39 | [ParserType.CitiCostcoVisaCredit]: Partial; 40 | [ParserType.Paypal]: Partial; 41 | [ParserType.UsaaBank]: Partial; 42 | [ParserType.UsaaVisaCredit]: Partial; 43 | } 44 | 45 | export interface AllParserOutput extends Record { 46 | [ParserType.ChasePrimeVisaCredit]: ParsedOutput; 47 | [ParserType.CitiCostcoVisaCredit]: ParsedOutput; 48 | [ParserType.Paypal]: PaypalOutput; 49 | [ParserType.UsaaBank]: UsaaBankOutput; 50 | [ParserType.UsaaVisaCredit]: UsaaVisaCreditOutput; 51 | } 52 | 53 | export const parsers = { 54 | [ParserType.ChasePrimeVisaCredit]: chasePrimeVisaCreditCardParser, 55 | [ParserType.CitiCostcoVisaCredit]: citiCostcoVisaCreditCardParser, 56 | [ParserType.Paypal]: paypalStatementParser, 57 | [ParserType.UsaaBank]: usaaBankAccountStatementParser, 58 | [ParserType.UsaaVisaCredit]: usaaVisaCreditCardStatementParser, 59 | } as const; 60 | -------------------------------------------------------------------------------- /src/parser/implemented-parsers/chase-prime-visa-credit-card-parser.ts: -------------------------------------------------------------------------------- 1 | import {createDateFromSlashFormat, safeMatch, stripCommasFromNumberString} from 'augment-vir'; 2 | import {dateWithinRange} from '../../augments/date'; 3 | import {ParsedOutput, ParsedTransaction} from '../parsed-output'; 4 | import {CombineWithBaseParserOptions} from '../parser-options'; 5 | import {createStatementParser} from '../statement-parser'; 6 | 7 | enum State { 8 | Header = 'header', 9 | Payment = 'payment', 10 | Purchase = 'purchase', 11 | End = 'end', 12 | } 13 | 14 | const ChaseParsingTriggers = { 15 | Payments: 'payments and other credits', 16 | Purchase: /^\s*purchase\s*$/i, 17 | Purchases: /^\s*purchases\s*$/i, 18 | Totals: 'totals year-to-date', 19 | AccountNumber: 'account number:', 20 | OpeningClosingDate: 'opening/closing date', 21 | }; 22 | 23 | const accountNumberRegExp = new RegExp(`${ChaseParsingTriggers.AccountNumber} .+(\\d{1,4})$`, 'i'); 24 | 25 | const closingDateRegExp = new RegExp( 26 | `${ChaseParsingTriggers.OpeningClosingDate}\\s+(\\d{1,2}/\\d{1,2}/\\d{1,2})\\s+-\\s+(\\d{1,2}/\\d{1,2}/\\d{1,2})`, 27 | 'i', 28 | ); 29 | 30 | export type ChaseCreditCardParsingOptions = { 31 | includeMultiLineDescriptions: boolean; 32 | }; 33 | 34 | export const defaultChaseCreditCardParserOptions: Required< 35 | Readonly 36 | > = { 37 | includeMultiLineDescriptions: true, 38 | }; 39 | 40 | export const chasePrimeVisaCreditCardParser = createStatementParser< 41 | State, 42 | ParsedOutput, 43 | ChaseCreditCardParsingOptions 44 | >({ 45 | action: performStateAction, 46 | next: nextState, 47 | initialState: State.Header, 48 | endState: State.End, 49 | defaultParserOptions: defaultChaseCreditCardParserOptions, 50 | parserKeywords: Object.values(ChaseParsingTriggers), 51 | }); 52 | 53 | function processTransactionLine( 54 | line: string, 55 | startDate: Date, 56 | endDate: Date, 57 | ): ParsedTransaction | string { 58 | const [, date, description, amount] = safeMatch( 59 | line, 60 | /^(\d{1,2}\/\d{1,2})\s+(\S.+?)\s+([\.\d,\-]+)$/, 61 | ); 62 | if (date && description && amount) { 63 | const [month, day] = date.split('/'); 64 | return { 65 | amount: Number(stripCommasFromNumberString(amount)), 66 | description, 67 | date: dateWithinRange(startDate, endDate, Number(month), Number(day)), 68 | originalText: [line], 69 | }; 70 | } else { 71 | return line; 72 | } 73 | } 74 | 75 | function performStateAction( 76 | currentState: State, 77 | line: string, 78 | output: ParsedOutput, 79 | parserOptions: CombineWithBaseParserOptions, 80 | ) { 81 | if (currentState === State.Header) { 82 | const [, startDateString, endDateString] = safeMatch(line, closingDateRegExp); 83 | const [, accountNumber] = safeMatch(line, accountNumberRegExp); 84 | 85 | if (startDateString && endDateString) { 86 | const startDate = createDateFromSlashFormat(startDateString, parserOptions.yearPrefix); 87 | const endDate = createDateFromSlashFormat(endDateString, parserOptions.yearPrefix); 88 | // Chase statements sometimes include transactions a few days outside of the statement range. 89 | startDate.setDate(startDate.getDate() - 3); 90 | endDate.setDate(endDate.getDate() + 3); 91 | output.startDate = startDate; 92 | output.endDate = endDate; 93 | } else if (accountNumber && !output.accountSuffix) { 94 | output.accountSuffix = accountNumber; 95 | } 96 | } else if (currentState === State.Payment || currentState === State.Purchase) { 97 | if (!output.endDate || !output.startDate) { 98 | throw new Error('Started reading transactions but got no start or end dates.'); 99 | } 100 | 101 | const array = currentState === State.Payment ? output.incomes : output.expenses; 102 | 103 | const result = processTransactionLine(line, output.startDate, output.endDate); 104 | 105 | if (typeof result !== 'string') { 106 | array.push(result); 107 | } 108 | } 109 | 110 | return output; 111 | } 112 | 113 | function nextState(currentState: State, line: string): State { 114 | line = line.toLowerCase(); 115 | 116 | switch (currentState) { 117 | case State.Header: 118 | if (line === ChaseParsingTriggers.Payments) { 119 | return State.Payment; 120 | } else if ( 121 | line.match(ChaseParsingTriggers.Purchase) || 122 | line.match(ChaseParsingTriggers.Purchases) 123 | ) { 124 | return State.Purchase; 125 | } 126 | break; 127 | case State.Payment: 128 | if ( 129 | line.match(ChaseParsingTriggers.Purchase) || 130 | line.match(ChaseParsingTriggers.Purchases) 131 | ) { 132 | return State.Purchase; 133 | } 134 | break; 135 | case State.Purchase: 136 | if (line.includes(ChaseParsingTriggers.Totals)) { 137 | return State.End; 138 | } 139 | break; 140 | } 141 | 142 | return currentState; 143 | } 144 | -------------------------------------------------------------------------------- /src/parser/implemented-parsers/citi-costco-visa-credit-card-parser.ts: -------------------------------------------------------------------------------- 1 | import { 2 | collapseSpaces, 3 | createDateFromSlashFormat, 4 | getEnumTypedValues, 5 | Overwrite, 6 | safeMatch, 7 | stripCommasFromNumberString, 8 | } from 'augment-vir'; 9 | import {parsePageItems} from 'pdf-text-reader'; 10 | import {TextItem} from 'pdfjs-dist/types/src/display/api'; 11 | import {dateWithinRange} from '../../augments/date'; 12 | import {getPdfDocument} from '../../pdf/read-pdf'; 13 | import {ParsedOutput, ParsedTransaction} from '../parsed-output'; 14 | import {CombineWithBaseParserOptions} from '../parser-options'; 15 | import {createStatementParser} from '../statement-parser'; 16 | 17 | enum State { 18 | Header = 'header', 19 | Payment = 'payment', 20 | Purchase = 'purchase', 21 | PurchaseFiller = 'purchase filler', 22 | End = 'end', 23 | } 24 | 25 | enum ParsingTriggers { 26 | BillingPeriod = 'billing period:', 27 | AccountNumber = 'account number ending in:', 28 | Payments = 'payments, credits and adjustments', 29 | Purchases = 'standard purchases', 30 | AccountSummary = 'Account Summary', 31 | } 32 | 33 | const billingPeriodRegExp = new RegExp( 34 | `^\\s*${ParsingTriggers.BillingPeriod}\\s+(\\d{1,2}/\\d{1,2}/\\d{1,2})-(\\d{1,2}/\\d{1,2}/\\d{1,2})\\s*$`, 35 | 'i', 36 | ); 37 | 38 | const accountNumberRegExp = new RegExp(`${ParsingTriggers.AccountNumber}\\s+(\\S+)\\s*$`, 'i'); 39 | 40 | type CitiCostcoVisaCreditIntermediateTransaction = Overwrite< 41 | ParsedTransaction, 42 | { 43 | amount: number | undefined; 44 | } 45 | >; 46 | 47 | export const citiCostcoVisaCreditCardParser = createStatementParser({ 48 | action: performStateAction, 49 | next: nextState, 50 | initialState: State.Header, 51 | endState: State.End, 52 | parserKeywords: getEnumTypedValues(ParsingTriggers), 53 | pdfProcessing: readCitiCostcoVisaPdf, 54 | outputValidation: outputValidation, 55 | }); 56 | 57 | async function readCitiCostcoVisaPdf(path: string): Promise { 58 | const doc = await getPdfDocument(path); 59 | const pageCount = doc.numPages; 60 | 61 | let pages: string[][] = []; 62 | 63 | /** 64 | * The costco card has a right column with costco rewards information that totally screws up the 65 | * parsing of actual transactions and payments. Here, we find where that column is so that it 66 | * can be removed. 67 | */ 68 | const firstPageItems = (await (await doc.getPage(1)).getTextContent()).items; 69 | const rightColumnItem = firstPageItems.find( 70 | (item) => 'str' in item && item.str === ParsingTriggers.AccountSummary, 71 | ); 72 | if (!rightColumnItem) { 73 | throw new Error('Could not find right column.'); 74 | } 75 | const columnX = Math.floor('transform' in rightColumnItem && rightColumnItem.transform[4]); 76 | 77 | for (let i = 0; i < pageCount; i++) { 78 | const pageItems = (await (await doc.getPage(i + 1)).getTextContent()).items; 79 | const filteredItems = pageItems.filter((item): item is TextItem => { 80 | if (!('str' in item)) { 81 | return false; 82 | } 83 | // filter out the right column 84 | const beforeColumn = item.transform[4] < columnX; 85 | const justSpaces = item.str.match(/^\s+$/); 86 | return !justSpaces && beforeColumn; 87 | }); 88 | pages.push(parsePageItems(filteredItems).lines); 89 | pages = pages.concat(); 90 | } 91 | 92 | return pages; 93 | } 94 | 95 | function outputValidation(output: ParsedOutput) { 96 | // Verifying that the "lineParse as BaseTransaction" assumption below is true 97 | output.incomes.forEach((income) => { 98 | if (income.amount === undefined) { 99 | throw new Error(`Invalid amount for income transaction: ${income}`); 100 | } 101 | }); 102 | output.expenses.forEach((expense) => { 103 | if (expense.amount === undefined) { 104 | throw new Error(`Invalid amount for expense transaction: ${expense}`); 105 | } 106 | }); 107 | } 108 | 109 | const amountRegExp = /^-?\$([\d,\.]+)\s*$/i; 110 | 111 | function parseAmount(input: string, negate: boolean): number { 112 | const [, amountMatch] = safeMatch(input, amountRegExp); 113 | 114 | if (amountMatch) { 115 | const amount = Number(stripCommasFromNumberString(amountMatch)); 116 | let multiplier = negate ? -1 : 1; 117 | 118 | if (input[0] === '-') { 119 | multiplier *= -1; 120 | } 121 | 122 | return amount * multiplier; 123 | } else { 124 | throw new Error(`Failed to parse a dollar amount: "${input}"`); 125 | } 126 | } 127 | 128 | function parseTransactionLine( 129 | line: string, 130 | output: ParsedOutput, 131 | negate: boolean, 132 | ): string | number | CitiCostcoVisaCreditIntermediateTransaction { 133 | if (!output.startDate || !output.endDate) { 134 | throw new Error( 135 | `Tried to parse a transaction but no start date (${output.startDate}) or end date (${output.endDate}) were found yet`, 136 | ); 137 | } 138 | 139 | const [, monthString, dayString, description, amountString] = safeMatch( 140 | line, 141 | /(?:\d{1,2}\/\d{1,2}\s*)?(\d{1,2})\/(\d{1,2})\s+(\S.+)\s+(-?\$[\d\.,]+)?\s*$/i, 142 | ); 143 | 144 | if (description) { 145 | const transaction: CitiCostcoVisaCreditIntermediateTransaction = { 146 | date: dateWithinRange( 147 | output.startDate, 148 | output.endDate, 149 | Number(monthString), 150 | Number(dayString), 151 | ), 152 | amount: undefined, 153 | description: collapseSpaces(description), 154 | originalText: [line], 155 | }; 156 | if (amountString) { 157 | transaction.amount = parseAmount(amountString, negate); 158 | } 159 | 160 | return transaction; 161 | } else { 162 | const amountMatch = line.match(amountRegExp); 163 | if (amountMatch) { 164 | return parseAmount(line, negate); 165 | } else { 166 | return collapseSpaces(line); 167 | } 168 | } 169 | } 170 | 171 | function performStateAction( 172 | currentState: State, 173 | line: string, 174 | output: ParsedOutput, 175 | parserOptions: CombineWithBaseParserOptions, 176 | ) { 177 | if (currentState === State.Header) { 178 | const [, startDateString, endDateString] = safeMatch(line, billingPeriodRegExp); 179 | const [, accountSuffixString] = safeMatch(line, accountNumberRegExp); 180 | if (startDateString && endDateString) { 181 | output.startDate = createDateFromSlashFormat(startDateString, parserOptions.yearPrefix); 182 | output.endDate = createDateFromSlashFormat(endDateString, parserOptions.yearPrefix); 183 | } else if (accountSuffixString) { 184 | output.accountSuffix = accountSuffixString; 185 | } 186 | } else if (line !== '' && (currentState === State.Purchase || currentState === State.Payment)) { 187 | const array = currentState === State.Purchase ? output.expenses : output.incomes; 188 | 189 | const lineParse = parseTransactionLine(line, output, currentState === State.Payment); 190 | const lastTransaction: CitiCostcoVisaCreditIntermediateTransaction | undefined = 191 | array[array.length - 1]; 192 | 193 | if (typeof lineParse === 'string' && lastTransaction) { 194 | lastTransaction.description += '\n' + lineParse; 195 | lastTransaction.originalText.push(line); 196 | } else if (typeof lineParse === 'number' && lastTransaction) { 197 | lastTransaction.amount = lineParse; 198 | lastTransaction.originalText.push(line); 199 | } else { 200 | // because a transaction's amount may not be on its first line, we must make sure we actually got the amount 201 | // before moving onto the next transaction 202 | if (lastTransaction && lastTransaction.amount === undefined) { 203 | throw new Error(`Moving onto next transaction but last one's amount is still undefined. 204 | last transaction: ${lastTransaction} 205 | current line: "${line}"`); 206 | } 207 | // This assumption is not always true! However, it should become true later. 208 | // It must be verified later that it indeed did come true. 209 | array.push(lineParse as ParsedTransaction); 210 | } 211 | } 212 | 213 | return output; 214 | } 215 | 216 | function nextState(currentState: State, line: string): State { 217 | line = line.toLowerCase(); 218 | 219 | switch (currentState) { 220 | case State.Header: 221 | if (line === ParsingTriggers.Payments) { 222 | return State.Payment; 223 | } else if (line === ParsingTriggers.Purchases) { 224 | return State.Purchase; 225 | } 226 | break; 227 | case State.Payment: 228 | if (line === '') { 229 | return State.PurchaseFiller; 230 | } 231 | break; 232 | case State.PurchaseFiller: 233 | if (line === ParsingTriggers.Purchases) { 234 | return State.Purchase; 235 | } 236 | break; 237 | case State.Purchase: 238 | if (line === '') { 239 | return State.End; 240 | } 241 | break; 242 | } 243 | 244 | return currentState; 245 | } 246 | -------------------------------------------------------------------------------- /src/parser/implemented-parsers/example-parser.ts: -------------------------------------------------------------------------------- 1 | import {createDateFromUtcIsoFormat, safeMatch, stripCommasFromNumberString} from 'augment-vir'; 2 | import {ParsedOutput, ParsedTransaction} from '../parsed-output'; 3 | import {createStatementParser} from '../statement-parser'; 4 | 5 | enum State { 6 | Header = 'header', 7 | InnerState = 'inner-state', 8 | End = 'end', 9 | } 10 | 11 | export const exampleStatementParser = createStatementParser({ 12 | action: performStateAction, 13 | next: nextState, 14 | initialState: State.Header, 15 | endState: State.End, 16 | parserKeywords: [], 17 | }); 18 | 19 | const validPaymentRegExp = /(\d{2}\/\d{2})\s+(.+)\$([-,.\d]+)/; 20 | 21 | function readPayment(line: string): ParsedTransaction | undefined { 22 | const [, dateString, descriptionString, amountString] = safeMatch(line, validPaymentRegExp); 23 | 24 | if (dateString && descriptionString && amountString) { 25 | return { 26 | amount: Number(stripCommasFromNumberString(amountString)), 27 | description: descriptionString, 28 | date: createDateFromUtcIsoFormat(dateString), 29 | originalText: [line], 30 | }; 31 | } else { 32 | return undefined; 33 | } 34 | } 35 | 36 | function performStateAction(currentState: State, line: string, output: ParsedOutput) { 37 | if (currentState === State.InnerState && line.match(validPaymentRegExp)) { 38 | const transaction = readPayment(line); 39 | if (transaction) { 40 | output.incomes.push(transaction); 41 | } 42 | } 43 | 44 | return output; 45 | } 46 | 47 | function nextState(currentState: State, line: string): State { 48 | line = line.toLowerCase(); 49 | 50 | switch (currentState) { 51 | case State.Header: 52 | return State.InnerState; 53 | case State.InnerState: 54 | if (line === 'end inner state') { 55 | return State.End; 56 | } 57 | break; 58 | case State.End: 59 | break; 60 | } 61 | 62 | return currentState; 63 | } 64 | -------------------------------------------------------------------------------- /src/parser/implemented-parsers/paypal-parser.ts: -------------------------------------------------------------------------------- 1 | import { 2 | collapseSpaces, 3 | createDateFromNamedCommaFormat, 4 | createDateFromSlashFormat, 5 | getEnumTypedValues, 6 | safeMatch, 7 | stripCommasFromNumberString, 8 | } from 'augment-vir'; 9 | import {isSanitizerMode} from '../../global'; 10 | import {ParsedOutput, ParsedTransaction} from '../parsed-output'; 11 | import {createStatementParser} from '../statement-parser'; 12 | 13 | enum State { 14 | Header = 'header', 15 | HeaderData = 'header-data', 16 | Activity = 'activity', 17 | ExpenseInside = 'expense-inside', 18 | IncomeInside = 'income-inside', 19 | ActivityHeader = 'activity-header', 20 | End = 'end', 21 | } 22 | 23 | enum ParsingTriggers { 24 | Usd = 'USD', 25 | MustNotify = 'you must notify us no later than', 26 | Statement = 'statement period', 27 | } 28 | 29 | const pageEndRegExp = /^page\s+\d+$/i; 30 | const activityHeader = /date\s+description\s+currency\s+amount\s+fees\s+total/i; 31 | const headerDataLineRegExp = 32 | /(\w{1,3} \d{1,2},? \d{1,4})\s*-\s*(\w{1,3} \d{1,2},? \d{1,4})\s*(.+)$/; 33 | const transactionStartRegExp = new RegExp( 34 | `^(\\d{1,2}/\\d{1,2}/\\d{1,4})\\s+(.+?)${ParsingTriggers.Usd}\\s+([-,.\\d]+)\\s+([-,.\\d]+)\\s+([-,.\\d]+)$`, 35 | 'i', 36 | ); 37 | 38 | export type PaypalTransaction = ParsedTransaction & { 39 | baseAmount: number; 40 | fees: number; 41 | }; 42 | 43 | export type PaypalOutput = ParsedOutput; 44 | 45 | export const paypalStatementParser = createStatementParser({ 46 | action: performStateAction, 47 | next: nextState, 48 | initialState: State.Header, 49 | endState: State.End, 50 | parserKeywords: [...getEnumTypedValues(ParsingTriggers), activityHeader, pageEndRegExp], 51 | }); 52 | 53 | function performStateAction(currentState: State, line: string, output: PaypalOutput) { 54 | const lastExpense = output.expenses[output.expenses.length - 1]; 55 | const lastIncome = output.incomes[output.incomes.length - 1]; 56 | 57 | if (currentState === State.HeaderData && !output.startDate) { 58 | const [, startDate, endDate, accountId] = safeMatch(line, headerDataLineRegExp); 59 | if (startDate && endDate && accountId) { 60 | output.startDate = createDateFromNamedCommaFormat(startDate, isSanitizerMode()); 61 | output.endDate = createDateFromNamedCommaFormat(endDate, isSanitizerMode()); 62 | output.accountSuffix = accountId; 63 | } 64 | } else if (currentState === State.Activity) { 65 | const [, date, description, amountString, fees, total] = safeMatch( 66 | line, 67 | transactionStartRegExp, 68 | ); 69 | if (date && description && amountString && fees && total) { 70 | const amount = Number(stripCommasFromNumberString(amountString)); 71 | const newTransaction: PaypalTransaction = { 72 | date: createDateFromSlashFormat(date), 73 | description: collapseSpaces(description), 74 | // this assumption that we can always use absolute value here may be wrong 75 | amount: Math.abs(Number(stripCommasFromNumberString(total))), 76 | fees: Math.abs(Number(stripCommasFromNumberString(fees))), 77 | baseAmount: Math.abs(amount), 78 | originalText: [line], 79 | }; 80 | const array = amount < 0 ? output.expenses : output.incomes; 81 | 82 | array.push(newTransaction); 83 | } 84 | } else if (currentState === State.ExpenseInside && line !== '' && lastExpense) { 85 | lastExpense.description += '\n' + collapseSpaces(line); 86 | lastExpense.originalText.push(line); 87 | } else if (currentState === State.IncomeInside && line !== '' && lastIncome) { 88 | lastIncome.description += '\n' + collapseSpaces(line); 89 | lastIncome.originalText.push(line); 90 | } 91 | 92 | return output; 93 | } 94 | 95 | function nextState(currentState: State, line: string): State { 96 | line = line.toLowerCase(); 97 | 98 | if (line.includes(ParsingTriggers.MustNotify)) { 99 | return State.End; 100 | } 101 | 102 | switch (currentState) { 103 | case State.Header: 104 | if (line.includes(ParsingTriggers.Statement)) { 105 | return State.HeaderData; 106 | } else if (line.match(activityHeader)) { 107 | return State.ActivityHeader; 108 | } 109 | break; 110 | case State.ActivityHeader: 111 | if (line === '') { 112 | return State.Activity; 113 | } 114 | break; 115 | case State.HeaderData: 116 | return State.Header; 117 | case State.ExpenseInside: 118 | if (line === '') { 119 | return State.Activity; 120 | } 121 | break; 122 | case State.IncomeInside: 123 | if (line === '') { 124 | return State.Activity; 125 | } 126 | break; 127 | case State.Activity: 128 | const amountMatch = safeMatch(line, transactionStartRegExp)[5]; 129 | if (amountMatch) { 130 | if (Number(stripCommasFromNumberString(amountMatch)) < 0) { 131 | return State.ExpenseInside; 132 | } else { 133 | return State.IncomeInside; 134 | } 135 | } else if (line.match(pageEndRegExp)) { 136 | return State.Header; 137 | } 138 | break; 139 | case State.End: 140 | break; 141 | } 142 | 143 | return currentState; 144 | } 145 | -------------------------------------------------------------------------------- /src/parser/implemented-parsers/usaa-bank-account-parser.ts: -------------------------------------------------------------------------------- 1 | import { 2 | collapseSpaces, 3 | createDateFromSlashFormat, 4 | getEnumTypedValues, 5 | safeMatch, 6 | stripCommasFromNumberString, 7 | } from 'augment-vir'; 8 | import {dateWithinRange} from '../../augments/date'; 9 | import {ParsedOutput, ParsedTransaction} from '../parsed-output'; 10 | import {CombineWithBaseParserOptions} from '../parser-options'; 11 | import {createStatementParser} from '../statement-parser'; 12 | 13 | enum State { 14 | PageHeader = 'header', 15 | StatementPeriod = 'statement-period', 16 | DepositHeaders = 'deposit-headers', 17 | Deposit = 'deposit', 18 | DebitHeaders = 'debit-headers', 19 | Debit = 'debit', 20 | Filler = 'filler', 21 | End = 'end', 22 | } 23 | 24 | enum ParsingTriggers { 25 | OtherDebits = 'other debits', 26 | } 27 | 28 | const otherDebitsRegExp = /^\s+other debits$/; 29 | const accountSummaryRegExp = /^\s+account balance summary$/i; 30 | const accountNumberHeaderRegExp = /account number\s+account type\s+statement period/i; 31 | const depositsRegExp = /^\s+deposits and other credits$/i; 32 | const fromRegExp = /^\s{2,}FROM\s+/; 33 | 34 | export type UsaaBankAccountTransaction = ParsedTransaction & { 35 | from: undefined | string; 36 | }; 37 | 38 | export type UsaaBankOutput = ParsedOutput; 39 | 40 | export const usaaBankAccountStatementParser = createStatementParser({ 41 | action: performStateAction, 42 | next: nextState, 43 | initialState: State.PageHeader, 44 | endState: State.End, 45 | parserKeywords: [ 46 | ...getEnumTypedValues(ParsingTriggers), 47 | otherDebitsRegExp, 48 | accountSummaryRegExp, 49 | accountNumberHeaderRegExp, 50 | depositsRegExp, 51 | fromRegExp, 52 | ], 53 | }); 54 | 55 | const validTransactionLineRegex = /(?:^\d{1,2}\/\d{1,2}\s+|^\s{2,})/; 56 | 57 | function performStateAction( 58 | currentState: State, 59 | line: string, 60 | output: UsaaBankOutput, 61 | parserOptions: CombineWithBaseParserOptions, 62 | ) { 63 | if (currentState === State.StatementPeriod && line !== '') { 64 | const [, accountSuffix, startDateString, endDateString] = safeMatch( 65 | line, 66 | /([\d-]+)\s+.+?(\d{1,2}\/\d{1,2}\/\d{1,2}).+?(\d{1,2}\/\d{1,2}\/\d{1,2})/, 67 | ); 68 | if (accountSuffix && startDateString && endDateString) { 69 | output.accountSuffix = accountSuffix.replace(/-/g, '').slice(-4); 70 | if (!output.accountSuffix.match(/\d+/)) { 71 | throw new Error(`Invalid account suffix: "${output.accountSuffix}"`); 72 | } 73 | output.startDate = createDateFromSlashFormat(startDateString, parserOptions.yearPrefix); 74 | output.endDate = createDateFromSlashFormat(endDateString, parserOptions.yearPrefix); 75 | } else { 76 | throw new Error( 77 | `Start and end date were not found in line for "${State.StatementPeriod}" state: "${line}"`, 78 | ); 79 | } 80 | } else if ( 81 | (currentState === State.Debit || currentState === State.Deposit) && 82 | line.match(validTransactionLineRegex) 83 | ) { 84 | const array = currentState === State.Debit ? output.expenses : output.incomes; 85 | 86 | const [, dateString, amountString, descriptionString] = safeMatch( 87 | line, 88 | /^(\d{1,2}\/\d{1,2})\s+((?:\d+|,|\.)+)\s+(.*)$/, 89 | ); 90 | const currentDebit = array[array.length - 1]; 91 | if (dateString && amountString && descriptionString) { 92 | if (!output.startDate || !output.endDate) { 93 | throw new Error( 94 | `Missing start/end date: ${JSON.stringify({ 95 | start: output.startDate, 96 | end: output.endDate, 97 | })}`, 98 | ); 99 | } 100 | // start line of debit 101 | const parts = dateString.split('/'); 102 | const date = dateWithinRange( 103 | output.startDate, 104 | output.endDate, 105 | Number(parts[0]), 106 | Number(parts[1]), 107 | ); 108 | array.push({ 109 | date: date, 110 | amount: Number(stripCommasFromNumberString(amountString)), 111 | description: collapseSpaces(descriptionString).trim(), 112 | from: undefined, 113 | originalText: [line], 114 | }); 115 | } else if (currentDebit) { 116 | /* 117 | * Assume that the current line is the last line for the current debit. 118 | * "from" is always the last line, so shift the current "from" to "method" since it wasn't the last line. 119 | */ 120 | currentDebit.description += '\n' + collapseSpaces(line).trim(); 121 | if (line.match(fromRegExp)) { 122 | currentDebit.from = collapseSpaces(line.replace(fromRegExp, '')).trim(); 123 | } 124 | currentDebit.originalText.push(line); 125 | } 126 | } 127 | return output; 128 | } 129 | 130 | function nextState(currentState: State, line: string): State { 131 | line = line.toLowerCase(); 132 | 133 | if (line.match(accountSummaryRegExp)) { 134 | return State.End; 135 | } 136 | 137 | switch (currentState) { 138 | case State.PageHeader: 139 | if (line.match(accountNumberHeaderRegExp)) { 140 | return State.StatementPeriod; 141 | } else if (line.match(depositsRegExp)) { 142 | return State.DepositHeaders; 143 | } 144 | break; 145 | case State.StatementPeriod: 146 | if (line !== '') { 147 | return State.PageHeader; 148 | } 149 | break; 150 | case State.DepositHeaders: 151 | return State.Deposit; 152 | case State.Deposit: 153 | if (line === '') { 154 | return State.Filler; 155 | } else if (line.match(otherDebitsRegExp)) { 156 | return State.DebitHeaders; 157 | } 158 | break; 159 | case State.DebitHeaders: 160 | return State.Debit; 161 | case State.Debit: 162 | if (line === '') { 163 | return State.Filler; 164 | } 165 | break; 166 | case State.Filler: 167 | if (line.match(otherDebitsRegExp)) { 168 | return State.DebitHeaders; 169 | } 170 | break; 171 | case State.End: 172 | break; 173 | } 174 | 175 | return currentState; 176 | } 177 | -------------------------------------------------------------------------------- /src/parser/implemented-parsers/usaa-visa-credit-card-parser.ts: -------------------------------------------------------------------------------- 1 | import {createDateFromSlashFormat, safeMatch, stripCommasFromNumberString} from 'augment-vir'; 2 | import {dateWithinRange} from '../../augments/date'; 3 | import {ParsedOutput, ParsedTransaction} from '../parsed-output'; 4 | import {CombineWithBaseParserOptions} from '../parser-options'; 5 | import {createStatementParser} from '../statement-parser'; 6 | 7 | enum State { 8 | Header = 'header', 9 | PaymentHeader = 'payment-header', 10 | Payment = 'payment', 11 | PaymentFiller = 'payment-filler', 12 | CreditHeader = 'credit-header', 13 | Credit = 'credit', 14 | CreditFiller = 'credit-filler', 15 | CreditStartedFiller = 'credit-started-filler', 16 | End = 'end', 17 | } 18 | 19 | const PreserveKeywords = { 20 | TotalPayments: 'total payments and credits for this period', 21 | AccountNumber: /^Account Number\s+/, 22 | Payments: /^\s*payments and credits\s*$/i, 23 | TransactionsContinued: /^\s*transactions \(continued\)\s*$/i, 24 | Transactions: /^\s*transactions\s*$/i, 25 | StatementClosingDate: 'statement closing date', 26 | }; 27 | 28 | export type UsaaVisaCreditCardTransaction = ParsedTransaction & { 29 | postDate: Date; 30 | referenceNumber: string; 31 | }; 32 | 33 | export type UsaaVisaCreditOutput = ParsedOutput; 34 | 35 | const tableHeadersRegExp = /^trans date\s*post date/i; 36 | const creditsEndRegExp = /^\s*total transactions for/i; 37 | const paymentsEndRegExp = new RegExp(`(?:^${PreserveKeywords.TotalPayments}\\s+\\$)|(?:^$)`, 'i'); 38 | const extractAccountNumberRegExp = new RegExp( 39 | `${PreserveKeywords.AccountNumber.source}.+(\\d{1,4})$`, 40 | 'i', 41 | ); 42 | const closingDateRegExp = new RegExp( 43 | `${PreserveKeywords.StatementClosingDate}\\s+(\\d{1,2}/\\d{1,2}/\\d{1,2})`, 44 | 'i', 45 | ); 46 | const feesRegExp = /^\s*fees\s*$/i; 47 | 48 | export const usaaVisaCreditCardStatementParser = createStatementParser( 49 | { 50 | action: performStateAction, 51 | next: nextState, 52 | initialState: State.Header, 53 | endState: State.End, 54 | parserKeywords: [ 55 | // most of the RegExps are not included here because they capture sensitive information 56 | ...Object.values(PreserveKeywords), 57 | tableHeadersRegExp, 58 | creditsEndRegExp, 59 | feesRegExp, 60 | ], 61 | }, 62 | ); 63 | 64 | const transactionRegExp = 65 | /^(\d{1,2}\/\d{1,2})\s+(\d{1,2}\/\d{1,2})\s+(\S.*?)\s+?(\S.*?)\s+\$((?:\d+|,|\.)+)\-?$/; 66 | 67 | function processTransactionLine( 68 | line: string, 69 | endDate: Date, 70 | ): UsaaVisaCreditCardTransaction | string { 71 | const [, transactionDate, postDate, referenceNumber, description, amount] = safeMatch( 72 | line, 73 | transactionRegExp, 74 | ); 75 | if (transactionDate && postDate && referenceNumber && description && amount) { 76 | const [transactionMonth, transactionDay] = transactionDate.split('/'); 77 | const [postMonth, postDay] = postDate.split('/'); 78 | return { 79 | date: dateWithinRange( 80 | undefined, 81 | endDate, 82 | Number(transactionMonth), 83 | Number(transactionDay), 84 | ), 85 | postDate: dateWithinRange(undefined, endDate, Number(postMonth), Number(postDay)), 86 | amount: Number(stripCommasFromNumberString(amount)), 87 | description, 88 | referenceNumber, 89 | originalText: [line], 90 | }; 91 | } else { 92 | return line; 93 | } 94 | } 95 | 96 | function performStateAction( 97 | currentState: State, 98 | line: string, 99 | output: UsaaVisaCreditOutput, 100 | parserOptions: CombineWithBaseParserOptions, 101 | ) { 102 | if ( 103 | (currentState === State.Credit && !line.match(creditsEndRegExp)) || 104 | (currentState === State.Payment && !line.match(paymentsEndRegExp)) || 105 | // read expenses if in this state and the line matches a transaction 106 | (currentState === State.CreditStartedFiller && line.match(transactionRegExp)) 107 | ) { 108 | if (!output.endDate) { 109 | throw new Error('Started reading transactions but got no statement close date.'); 110 | } 111 | // Critical ternary here that sets the array to expenses even if the above State.CREDIT_STARTED_FILLER condition 112 | // is true 113 | const array = currentState === State.Payment ? output.incomes : output.expenses; 114 | 115 | const result = processTransactionLine(line, output.endDate); 116 | 117 | if (typeof result === 'string') { 118 | const lastTransaction = array[array.length - 1]; 119 | if (result && lastTransaction) { 120 | lastTransaction.description += '\n' + result; 121 | lastTransaction.originalText.push(line); 122 | } 123 | } else { 124 | array.push(result); 125 | } 126 | } else if (currentState === State.Header) { 127 | const [, closingDateString] = safeMatch(line, closingDateRegExp); 128 | const [, accountNumberString] = safeMatch(line, extractAccountNumberRegExp); 129 | if (closingDateString) { 130 | output.endDate = createDateFromSlashFormat(closingDateString, parserOptions.yearPrefix); 131 | } else if (accountNumberString && !output.accountSuffix) { 132 | output.accountSuffix = accountNumberString; 133 | } 134 | } 135 | 136 | return output; 137 | } 138 | 139 | function nextState(currentState: State, line: string): State { 140 | line = line.toLowerCase(); 141 | 142 | switch (currentState) { 143 | case State.Header: 144 | if (line.match(PreserveKeywords.Payments)) { 145 | return State.PaymentHeader; 146 | } 147 | break; 148 | case State.PaymentHeader: 149 | if (line.match(tableHeadersRegExp)) { 150 | return State.Payment; 151 | } 152 | break; 153 | case State.Payment: 154 | // use this regex here so that it can be shared with performStateAction 155 | if (line.match(paymentsEndRegExp)) { 156 | if (line === '') { 157 | return State.PaymentFiller; 158 | } else { 159 | return State.CreditFiller; 160 | } 161 | } 162 | break; 163 | case State.PaymentFiller: 164 | if (line.match(PreserveKeywords.TransactionsContinued)) { 165 | return State.Payment; 166 | } 167 | break; 168 | case State.CreditFiller: 169 | if (line.match(PreserveKeywords.Transactions)) { 170 | return State.CreditHeader; 171 | } else if (line.match(feesRegExp)) { 172 | return State.End; 173 | } 174 | break; 175 | case State.CreditStartedFiller: 176 | if ( 177 | line.match(PreserveKeywords.TransactionsContinued) || 178 | line.match(transactionRegExp) 179 | ) { 180 | return State.Credit; 181 | } 182 | break; 183 | case State.CreditHeader: 184 | if (line.match(tableHeadersRegExp)) { 185 | return State.Credit; 186 | } 187 | break; 188 | case State.Credit: 189 | if (line.match(creditsEndRegExp)) { 190 | return State.CreditFiller; 191 | } else if (line === '') { 192 | return State.CreditStartedFiller; 193 | } 194 | case State.End: 195 | break; 196 | } 197 | 198 | return currentState; 199 | } 200 | -------------------------------------------------------------------------------- /src/parser/parse-api.ts: -------------------------------------------------------------------------------- 1 | import {AllParserOptions, parsers, ParserType} from './all-parsers'; 2 | import {ParsedOutput} from './parsed-output'; 3 | import {ParsePdfFunctionInput} from './parser-function'; 4 | 5 | export type StatementPdf = 6 | | { 7 | parserInput: ParsePdfFunctionInput; 8 | /** Type is always required. Without it, the package doesn't know which parser to use. */ 9 | type: ParserType.ChasePrimeVisaCredit; 10 | } 11 | | { 12 | parserInput: ParsePdfFunctionInput; 13 | /** Type is always required. Without it, the package doesn't know which parser to use. */ 14 | type: ParserType.CitiCostcoVisaCredit; 15 | } 16 | | { 17 | parserInput: ParsePdfFunctionInput; 18 | /** Type is always required. Without it, the package doesn't know which parser to use. */ 19 | type: ParserType.UsaaBank; 20 | } 21 | | { 22 | parserInput: ParsePdfFunctionInput; 23 | /** Type is always required. Without it, the package doesn't know which parser to use. */ 24 | type: ParserType.UsaaVisaCredit; 25 | } 26 | | { 27 | parserInput: ParsePdfFunctionInput; 28 | /** Type is always required. Without it, the package doesn't know which parser to use. */ 29 | type: ParserType.Paypal; 30 | }; 31 | 32 | export type ParsedPdf = Readonly< 33 | StatementPdf & { 34 | data: ParsedOutput; 35 | } 36 | >; 37 | 38 | export async function parsePdfs( 39 | pdfs: StatementPdf[], 40 | debug = false, 41 | ): Promise[]>> { 42 | const parsedPdfs: Readonly[]> = await Promise.all( 43 | pdfs.map(async (pdf) => { 44 | return { 45 | ...pdf, 46 | data: await parsers[pdf.type].parsePdf({ 47 | debug, 48 | ...pdf.parserInput, 49 | }), 50 | }; 51 | }), 52 | ); 53 | 54 | return parsedPdfs; 55 | } 56 | -------------------------------------------------------------------------------- /src/parser/parsed-output.ts: -------------------------------------------------------------------------------- 1 | export type ParsedTransaction = { 2 | date: Date; 3 | amount: number; 4 | description: string; 5 | originalText: string[]; 6 | }; 7 | 8 | export type ParsedOutput = { 9 | /** 10 | * For credit cards, an "income" transaction is a payment on the credit card. For bank accounts 11 | * or debit cards, an "income" is a deposit. 12 | */ 13 | incomes: T[]; 14 | /** 15 | * For credit cards, an "expense" is a purchase or credit charge. For bank accounts or debit 16 | * cards, an "expense" is a withdrawal or debit charge. 17 | */ 18 | expenses: T[]; 19 | accountSuffix: string; 20 | name: string | undefined; 21 | /** 22 | * YearPrefix is the first two digits of the current year accountSuffix is the last digits of 23 | * the account number (this is usually 4 digits long) 24 | */ 25 | yearPrefix: number; 26 | startDate?: Date | undefined; 27 | endDate?: Date | undefined; 28 | }; 29 | 30 | export type InitOutput = Partial< 31 | Omit 32 | >; 33 | -------------------------------------------------------------------------------- /src/parser/parser-function.ts: -------------------------------------------------------------------------------- 1 | import {ParsedOutput} from './parsed-output'; 2 | import {CombineWithBaseParserOptions} from './parser-options'; 3 | 4 | export type SharedParserFunctionInputs = { 5 | /** 6 | * Optional debug property to see LOTS of output which shows the internal state machine 7 | * progressing over each line of the file. 8 | */ 9 | debug?: boolean | undefined; 10 | /** 11 | * Optional input that provides additional parser configuration. Each parser type has slightly 12 | * different parser options. 13 | */ 14 | parserOptions?: Partial> | undefined; 15 | /** 16 | * Optional name property to help identify the pdf if any errors occur. (By default file paths 17 | * will be used in errors so this is only for human readability if desired.) 18 | */ 19 | name?: string | undefined; 20 | }; 21 | 22 | /** Parse PDF files directly. */ 23 | 24 | export type ParsePdfFunctionInput = { 25 | /** FilePath is always required. What would the parser do without it? */ 26 | filePath: string; 27 | } & SharedParserFunctionInputs; 28 | 29 | export type ParsePdfFunction< 30 | OutputType extends ParsedOutput, 31 | ParserOptions extends object | undefined = undefined, 32 | > = (input: Readonly>) => Promise>; 33 | 34 | /** Parse text directly. */ 35 | 36 | export type ParseTextFunctionInput = { 37 | textLines: string[]; 38 | } & SharedParserFunctionInputs; 39 | 40 | export type ParseTextFunction< 41 | OutputType extends ParsedOutput, 42 | ParserOptions extends object | undefined = undefined, 43 | > = (input: Readonly>) => Readonly; 44 | -------------------------------------------------------------------------------- /src/parser/parser-options.ts: -------------------------------------------------------------------------------- 1 | export type BaseParserOptions = { 2 | /** 3 | * YearPrefix Most statements don't include the full year so we must pass in the first two 4 | * numbers of the year so we know what millennium we're in. Example: for the year 2010, use 20. 5 | * For 1991, use 19. 6 | */ 7 | yearPrefix: number; 8 | }; 9 | 10 | export const defaultBaseParserOptions: Required> = { 11 | /** Every parser includes this option. See Year prefix section in the README for details. */ 12 | yearPrefix: 20, 13 | } as const; 14 | 15 | export type CombineWithBaseParserOptions = 16 | Required>; 17 | 18 | export function collapseDefaultParserOptions( 19 | inputDefaultParserOptions?: Required>, 20 | ): CombineWithBaseParserOptions { 21 | return { 22 | ...defaultBaseParserOptions, 23 | ...(inputDefaultParserOptions || {}), 24 | } as CombineWithBaseParserOptions; 25 | } 26 | 27 | export type ParserKeyword = string | RegExp; 28 | -------------------------------------------------------------------------------- /src/parser/parser-state-machine.ts: -------------------------------------------------------------------------------- 1 | import {IfEquals} from 'augment-vir'; 2 | import { 3 | createStateMachine, 4 | handleErrorFunction, 5 | nextStateFunction, 6 | performStateActionFunction, 7 | } from 'fsm-vir'; 8 | import {InitOutput, ParsedOutput} from './parsed-output'; 9 | import {SharedParserFunctionInputs} from './parser-function'; 10 | import { 11 | BaseParserOptions, 12 | collapseDefaultParserOptions, 13 | CombineWithBaseParserOptions, 14 | } from './parser-options'; 15 | 16 | export type performParseActionFunction< 17 | StateType, 18 | OutputType extends ParsedOutput, 19 | ParserOptions extends object | undefined = undefined, 20 | > = ( 21 | currentState: StateType, 22 | input: string, 23 | lastOutput: OutputType, 24 | parserOptions: CombineWithBaseParserOptions, 25 | ) => OutputType; 26 | 27 | export type nextParseStateFunction< 28 | StateType, 29 | ParserOptions extends object | undefined = undefined, 30 | > = ( 31 | currentState: StateType, 32 | input: string, 33 | parserOptions: CombineWithBaseParserOptions, 34 | ) => StateType; 35 | 36 | export type ParserInitInput< 37 | StateType, 38 | OutputType extends ParsedOutput, 39 | ParserOptions extends object | undefined = undefined, 40 | > = { 41 | action: performParseActionFunction; 42 | next: nextParseStateFunction; 43 | endState: StateType; 44 | initialState: StateType; 45 | initOutput?: Readonly>; 46 | defaultParserOptions?: IfEquals< 47 | ParserOptions, 48 | BaseParserOptions, 49 | undefined, 50 | Readonly> 51 | >; 52 | }; 53 | 54 | export type CreateStateMachineInput< 55 | StateType, 56 | OutputType extends ParsedOutput, 57 | ParserOptions extends object | undefined = undefined, 58 | > = ParserInitInput & 59 | SharedParserFunctionInputs; 60 | 61 | export type StateMachineParserFunction = ( 62 | inputs: Readonly, 63 | ) => Readonly; 64 | 65 | /** 66 | * This creates a state machine. The state machine is a Mealy machine but outputs are generated 67 | * independent of the state transition. As you can see in the arguments, the "action" function 68 | * (which generates outputs) is distinct from the "next" function, which calculates the next state. 69 | * The implementation of "action" is of course left to you though, so you can totally just ignore 70 | * the current value and make this a Moore machine. 71 | */ 72 | export function createParserStateMachine< 73 | StateType, 74 | OutputType extends ParsedOutput, 75 | ParserOptions extends object | undefined = undefined, 76 | >({ 77 | action, 78 | next, 79 | initialState, 80 | endState, 81 | name, 82 | initOutput, 83 | parserOptions: inputParserOptions, 84 | defaultParserOptions, 85 | debug = false, 86 | }: Readonly< 87 | CreateStateMachineInput 88 | >): StateMachineParserFunction { 89 | const handleError: handleErrorFunction = (error) => { 90 | const errorName = name ?? `${String(error.currentValue?.[0]).substring(0, 10)}...`; 91 | const printError = error.stack ?? error.message; 92 | throw new Error(`Error parsing ${errorName} at "${error.currentValue}": ${printError}`); 93 | }; 94 | const defaultOptions: CombineWithBaseParserOptions = 95 | collapseDefaultParserOptions(defaultParserOptions); 96 | 97 | const parserOptions: CombineWithBaseParserOptions = { 98 | ...defaultOptions, 99 | ...(inputParserOptions ?? {}), 100 | }; 101 | 102 | const baseOutput: ParsedOutput = { 103 | incomes: [], 104 | expenses: [], 105 | name, 106 | yearPrefix: parserOptions.yearPrefix, 107 | accountSuffix: '', 108 | endDate: undefined, 109 | startDate: undefined, 110 | }; 111 | 112 | const startingOutput: Readonly = { 113 | ...baseOutput, 114 | ...(JSON.parse(JSON.stringify(initOutput || {})) as InitOutput), 115 | } as OutputType; 116 | 117 | const performStateAction: performStateActionFunction = ( 118 | currentState, 119 | input, 120 | lastOutput, 121 | ) => { 122 | return action(currentState, input, lastOutput, parserOptions); 123 | }; 124 | const calculateNextState: nextStateFunction = (currentState, input) => { 125 | return next(currentState, input, parserOptions); 126 | }; 127 | 128 | const stateMachine = createStateMachine({ 129 | performStateAction, 130 | calculateNextState, 131 | initialState, 132 | endState, 133 | handleError, 134 | initialOutput: startingOutput, 135 | }); 136 | 137 | return (inputs: Readonly): Readonly => { 138 | const machineResult = stateMachine.runMachine(inputs); 139 | 140 | if (debug) { 141 | machineResult.logs.forEach((log) => { 142 | console.log(log); 143 | }); 144 | } 145 | 146 | if (machineResult.aborted) { 147 | if (debug) { 148 | machineResult.errors.forEach((error) => { 149 | console.error(error); 150 | }); 151 | } 152 | throw new Error(machineResult.errors.join('\n')); 153 | } 154 | 155 | return machineResult.output; 156 | }; 157 | } 158 | -------------------------------------------------------------------------------- /src/parser/statement-parser.ts: -------------------------------------------------------------------------------- 1 | import {flatten2dArray} from 'augment-vir'; 2 | import {readPdf} from '../pdf/read-pdf'; 3 | import {ParsedOutput} from './parsed-output'; 4 | import { 5 | ParsePdfFunction, 6 | ParsePdfFunctionInput, 7 | ParseTextFunction, 8 | ParseTextFunctionInput, 9 | } from './parser-function'; 10 | import {ParserKeyword} from './parser-options'; 11 | import { 12 | createParserStateMachine, 13 | CreateStateMachineInput, 14 | ParserInitInput, 15 | } from './parser-state-machine'; 16 | 17 | export type ConvertPdfToTextFunction = (filePath: string) => Promise; 18 | 19 | export type StatementParser< 20 | OutputType extends ParsedOutput, 21 | ParserOptions extends object | undefined = undefined, 22 | > = { 23 | parsePdf: ParsePdfFunction; 24 | parseText: ParseTextFunction; 25 | convertPdfToText: (filePath: string) => Promise; 26 | parserKeywords: ParserKeyword[]; 27 | }; 28 | 29 | export type CreateStatementParserInput< 30 | StateType, 31 | OutputType extends ParsedOutput, 32 | ParserOptions extends object | undefined = undefined, 33 | > = { 34 | pdfProcessing?: (filePath: string) => Promise | string[][]; 35 | outputValidation?: (output: OutputType) => void; 36 | /** Keywords are used to preserve phrases in the statement text when sanitizing it for a test. */ 37 | parserKeywords: ParserKeyword[]; 38 | } & ParserInitInput; 39 | 40 | export const createStatementParserInputDefault: Required< 41 | Pick, 'pdfProcessing'> 42 | > = { 43 | async pdfProcessing(filePath: string): Promise { 44 | return await readPdf(filePath); 45 | }, 46 | }; 47 | 48 | export function createStatementParser< 49 | StateType, 50 | OutputType extends ParsedOutput, 51 | ParserOptions extends object | undefined = undefined, 52 | >( 53 | rawInputs: Readonly>, 54 | ): Readonly> { 55 | const inputs: Readonly> = { 56 | ...createStatementParserInputDefault, 57 | ...rawInputs, 58 | }; 59 | 60 | const pdfProcessing = inputs.pdfProcessing; 61 | 62 | if (!pdfProcessing) { 63 | throw new Error('Missing pdf processing method'); 64 | } 65 | 66 | const parseText: ParseTextFunction = ({ 67 | textLines, 68 | parserOptions: inputParserOptions, 69 | debug, 70 | name, 71 | }: ParseTextFunctionInput) => { 72 | const stateMachineInputs: Readonly< 73 | CreateStateMachineInput 74 | > = { 75 | // ParserInitInput is a subtype of inputs' type 76 | ...(inputs as ParserInitInput), 77 | name, 78 | debug, 79 | parserOptions: inputParserOptions, 80 | }; 81 | 82 | const runStateMachine = createParserStateMachine( 83 | stateMachineInputs, 84 | ); 85 | 86 | const output = runStateMachine(textLines); 87 | 88 | if (inputs.outputValidation) { 89 | inputs.outputValidation(output); 90 | } 91 | 92 | return output; 93 | }; 94 | 95 | const convertPdfToText: ConvertPdfToTextFunction = async ( 96 | filePath: string, 97 | ): Promise => { 98 | const pdfPages = await pdfProcessing(filePath); 99 | const textLines = flatten2dArray(pdfPages); 100 | 101 | return textLines; 102 | }; 103 | 104 | const parsePdf: ParsePdfFunction = async ({ 105 | filePath, 106 | parserOptions: inputParserOptions, 107 | debug, 108 | }: Readonly>) => { 109 | const textLines = await convertPdfToText(filePath); 110 | 111 | return parseText({ 112 | parserOptions: inputParserOptions, 113 | debug, 114 | name: filePath, 115 | textLines, 116 | }); 117 | }; 118 | 119 | const defaultParserOptionsWrapper = inputs.defaultParserOptions 120 | ? {defaultParserOptions: inputs.defaultParserOptions} 121 | : {}; 122 | 123 | const returnValue: Readonly> = { 124 | parsePdf, 125 | parseText, 126 | convertPdfToText, 127 | parserKeywords: inputs.parserKeywords, 128 | ...defaultParserOptionsWrapper, 129 | }; 130 | 131 | return returnValue; 132 | } 133 | -------------------------------------------------------------------------------- /src/pdf/generate-pdf.test.ts: -------------------------------------------------------------------------------- 1 | import {join} from 'path'; 2 | import {testGroup, TestInputObject} from 'test-vir'; 3 | import {tempOutputDir} from '../repo-paths'; 4 | import {generatePdfDocument} from './generate-pdf'; 5 | import {checkThatPdfExists, readPdf} from './read-pdf'; 6 | 7 | let testCounter = 0; 8 | 9 | function generateGeneratePdfTest( 10 | description: string, 11 | inputText: string[], 12 | expect?: string[], 13 | ): TestInputObject { 14 | return { 15 | description, 16 | expect: expect ?? inputText, 17 | test: async () => { 18 | const outputFilePath = join( 19 | tempOutputDir, 20 | `generate-pdf-text-output-${testCounter++}.pdf`, 21 | ); 22 | 23 | await generatePdfDocument(inputText, outputFilePath); 24 | 25 | checkThatPdfExists(outputFilePath); 26 | 27 | const writtenText = (await readPdf(outputFilePath))[0]; 28 | 29 | return writtenText; 30 | }, 31 | }; 32 | } 33 | 34 | testGroup({ 35 | description: generatePdfDocument.name, 36 | tests: (runTest) => { 37 | runTest(generateGeneratePdfTest('simple text', ['hello there'])); 38 | runTest( 39 | generateGeneratePdfTest('multiple lines of text', [ 40 | 'hello there', 41 | 'you are a bold one', 42 | 'you are doomed', 43 | 'oh I do not think so', 44 | 'indeed', 45 | ]), 46 | ); 47 | runTest( 48 | generateGeneratePdfTest( 49 | 'long line of text', 50 | [ 51 | 'hello there you are a bold one you are doomed oh I do not think so indeed so uncivilized', 52 | ], 53 | [ 54 | 'hello there you are a bold one you are ', 55 | 'doomed oh I do not think so indeed so ', 56 | 'uncivilized', 57 | ], 58 | ), 59 | ); 60 | }, 61 | }); 62 | -------------------------------------------------------------------------------- /src/pdf/generate-pdf.ts: -------------------------------------------------------------------------------- 1 | import {createWriteStream, ensureDir} from 'fs-extra'; 2 | import {dirname} from 'path'; 3 | import PDFDocument from 'pdfkit'; 4 | 5 | export async function generatePdfDocument( 6 | inputText: string[] | string, 7 | outputFilePath: string, 8 | fontSize: number = 25, 9 | ): Promise { 10 | return new Promise(async (resolve, reject) => { 11 | const pdf = new PDFDocument(); 12 | 13 | if (Array.isArray(inputText)) { 14 | inputText = inputText.join('\n'); 15 | } 16 | 17 | await ensureDir(dirname(outputFilePath)); 18 | 19 | const outputStream = createWriteStream(outputFilePath); 20 | 21 | pdf.pipe(outputStream); 22 | 23 | pdf.fontSize(fontSize).text(inputText, 100, 100); 24 | 25 | pdf.end(); 26 | 27 | outputStream.addListener('close', () => { 28 | resolve(outputFilePath); 29 | }); 30 | outputStream.addListener('error', (error) => { 31 | reject(error); 32 | }); 33 | }); 34 | } 35 | -------------------------------------------------------------------------------- /src/pdf/read-pdf.test.ts: -------------------------------------------------------------------------------- 1 | import {collapseSpaces} from 'augment-vir'; 2 | import {join} from 'path'; 3 | import {testGroup} from 'test-vir'; 4 | import {sampleFilesDir} from '../repo-paths'; 5 | import {readPdf} from './read-pdf'; 6 | 7 | testGroup({ 8 | description: readPdf.name, 9 | tests: (runTest) => { 10 | runTest({ 11 | description: 'can read pdfkit output', 12 | expect: 'Some text with an embedded font! PNG and JPEG images:', 13 | test: async () => { 14 | const pages = await readPdf(join(sampleFilesDir, 'pdfkit-out.pdf')); 15 | return collapseSpaces((pages[0] || []).join(' ')); 16 | }, 17 | }); 18 | }, 19 | }); 20 | -------------------------------------------------------------------------------- /src/pdf/read-pdf.ts: -------------------------------------------------------------------------------- 1 | import {existsSync} from 'fs-extra'; 2 | import {readPdfText} from 'pdf-text-reader'; 3 | import {getDocument, VerbosityLevel} from 'pdfjs-dist/legacy/build/pdf'; 4 | import {DocumentInitParameters, PDFDocumentProxy} from 'pdfjs-dist/types/src/display/api'; 5 | 6 | export async function readPdf(path: string): Promise { 7 | checkThatPdfExists(path); 8 | return (await readPdfText(createSource(path))).map((page) => page.lines); 9 | } 10 | 11 | export async function getPdfDocument(path: string): Promise { 12 | checkThatPdfExists(path); 13 | return await getDocument(createSource(path)).promise; 14 | } 15 | 16 | function createSource(path: string): DocumentInitParameters { 17 | return {url: path, verbosity: VerbosityLevel.ERRORS}; 18 | } 19 | 20 | export function checkThatPdfExists(filePath: string): void { 21 | if (!existsSync(filePath)) { 22 | throw new Error(`PDF file "${filePath}" does not exist`); 23 | } 24 | } 25 | -------------------------------------------------------------------------------- /src/readme-examples/all-options.example.ts: -------------------------------------------------------------------------------- 1 | import {parsePdfs, ParserType} from '..'; 2 | 3 | parsePdfs([ 4 | { 5 | parserInput: { 6 | /** FilePath is always required. What would the parser do without it? */ 7 | filePath: 'my/paypal/file.pdf', 8 | /** 9 | * Optional name property to help identify the pdf if any errors occur. (By default file 10 | * paths will be used in errors so this is only for human readability if desired.) 11 | */ 12 | name: 'pdf with all options', 13 | /** 14 | * Optional debug property to see LOTS of output which shows the internal state machine 15 | * progressing over each line of the file. 16 | */ 17 | debug: true, 18 | /** 19 | * Optional input that provides additional parser configuration. Each parser type has 20 | * slightly different parser options. 21 | */ 22 | parserOptions: { 23 | /** Every parser includes this option. See Year prefix section in the README for details. */ 24 | yearPrefix: 19, 25 | }, 26 | }, 27 | /** Type is always required. Without it, the package doesn't know which parser to use. */ 28 | type: ParserType.Paypal, 29 | }, 30 | { 31 | parserInput: { 32 | filePath: 'my/chase-prime-visa-credit/file.pdf', 33 | parserOptions: { 34 | /** 35 | * Example of an extra ParserType specific option that will change the parsing 36 | * behavior. This option is not valid for any of the other parser types except for 37 | * the ParserType.ChasePrimeVisaCredit parser. 38 | */ 39 | includeMultiLineDescriptions: true, 40 | }, 41 | }, 42 | type: ParserType.ChasePrimeVisaCredit, 43 | }, 44 | ]).then((result) => console.log(result)); 45 | -------------------------------------------------------------------------------- /src/readme-examples/api-simple-parse-inputs.example.ts: -------------------------------------------------------------------------------- 1 | import {ParserType, StatementPdf} from '..'; 2 | 3 | const myPdfToParse: StatementPdf = { 4 | parserInput: { 5 | /** 6 | * This is the only necessary parserInput property. For more examples of parserInput (such 7 | * as parserOptions), see the Examples section in the README. 8 | */ 9 | filePath: 'my/file/path.pdf', 10 | }, 11 | /** 12 | * Any ParserType can be assigned to the "type" property. See the Parsers section in the README 13 | * for more information. 14 | */ 15 | type: ParserType.CitiCostcoVisaCredit, 16 | }; 17 | -------------------------------------------------------------------------------- /src/readme-examples/api-simple-parse.example.ts: -------------------------------------------------------------------------------- 1 | import {parsePdfs, ParserType} from '..'; 2 | 3 | parsePdfs([ 4 | { 5 | parserInput: { 6 | filePath: 'files/downloads/myPdf.pdf', 7 | }, 8 | type: ParserType.ChasePrimeVisaCredit, 9 | }, 10 | ]).then((results) => console.log(results)); 11 | -------------------------------------------------------------------------------- /src/readme-examples/better-async.example.ts: -------------------------------------------------------------------------------- 1 | import {parsePdfs, ParserType} from '..'; 2 | 3 | async function main() { 4 | const results = await parsePdfs([ 5 | { 6 | parserInput: { 7 | filePath: 'my/paypal/file.pdf', 8 | }, 9 | type: ParserType.Paypal, 10 | }, 11 | ]); 12 | 13 | // do something with the result 14 | 15 | return results; 16 | } 17 | 18 | if (require.main === module) { 19 | main().catch((error) => { 20 | console.error(error); 21 | process.exit(1); 22 | }); 23 | } 24 | -------------------------------------------------------------------------------- /src/readme-examples/direct-parsing.example.ts: -------------------------------------------------------------------------------- 1 | import {parsers, ParserType} from '..'; 2 | 3 | const parser = parsers[ParserType.Paypal]; 4 | parser.parsePdf({filePath: 'my/paypal/file.pdf'}).then((result) => console.log(result)); 5 | -------------------------------------------------------------------------------- /src/readme-examples/direct-text-parsing.example.ts: -------------------------------------------------------------------------------- 1 | import {parsers, ParserType} from '..'; 2 | 3 | const parser = parsers[ParserType.Paypal]; 4 | parser.parseText({textLines: ['text here', 'line 2 here', 'line 3', 'etc.']}); 5 | -------------------------------------------------------------------------------- /src/readme-examples/parser-type.example.ts: -------------------------------------------------------------------------------- 1 | import {ParserType} from '..'; 2 | 3 | // possible ParserType keys 4 | ParserType.ChasePrimeVisaCredit; 5 | ParserType.CitiCostcoVisaCredit; 6 | ParserType.UsaaBank; 7 | ParserType.UsaaVisaCredit; 8 | ParserType.Paypal; 9 | -------------------------------------------------------------------------------- /src/repo-paths.test.ts: -------------------------------------------------------------------------------- 1 | import {join} from 'path'; 2 | import {testGroup} from 'test-vir'; 3 | import {filesDir, getAllRecursiveFiles} from './repo-paths'; 4 | 5 | testGroup({ 6 | description: getAllRecursiveFiles.name, 7 | tests: (runTest) => { 8 | runTest({ 9 | description: 'should get all files', 10 | expect: new Set([ 11 | join('example-sub-dir', 'example-sub-sub-dir', 'sub-sub-nothing'), 12 | join('example-sub-dir', 'sub-nothing'), 13 | join('example-sub-dir', 'sub-nothing-2'), 14 | join('example-sub-dir-2', 'sub-2-nothing'), 15 | 'README.md', 16 | 'top-nothing', 17 | ]), 18 | test: async () => { 19 | return await getAllRecursiveFiles(join(filesDir, 'example-dir')); 20 | }, 21 | }); 22 | }, 23 | }); 24 | -------------------------------------------------------------------------------- /src/repo-paths.ts: -------------------------------------------------------------------------------- 1 | import {readdir, stat} from 'fs-extra'; 2 | import {join} from 'path'; 3 | 4 | /** 5 | * Path to the repo's root. Does not use the package name because the source code could 6 | * theoretically be cloned into any folder. "src" is used for the ts source code files (so they CAN 7 | * be run directly without transpiling it into JS) and "dist" is used for the transpiled JS output directory. 8 | */ 9 | export const repoRootDir = __dirname.replace(/(?:src|dist).*/, ''); 10 | 11 | export const filesDir = join(repoRootDir, 'files'); 12 | 13 | export const sampleFilesDir = join(filesDir, 'sample-files'); 14 | export const dummyPdfPath = join(sampleFilesDir, 'dummy.pdf'); 15 | export const sanitizedFilesDir = join(sampleFilesDir, 'sanitized'); 16 | 17 | export const tempOutputDir = join(filesDir, 'temp-output'); 18 | export const temp_sanitizerRawTestFilePath = join(tempOutputDir, 'last-raw-text-for-sanitizer.txt'); 19 | export const temp_sanitizerSanitizedTextFilePath = join(tempOutputDir, 'last-sanitized-text.txt'); 20 | export const prettierConfigPath = join(repoRootDir, '.prettierrc.js'); 21 | 22 | export const packageJson = join(repoRootDir, 'package.json'); 23 | 24 | export async function getAllRecursiveFiles( 25 | parentDirectory: string, 26 | includeFolders = false, 27 | ): Promise> { 28 | const firstLevelFiles = await readdir(parentDirectory); 29 | 30 | return await firstLevelFiles.reduce(async (rawAccum: Promise>, child) => { 31 | const accum = await rawAccum; 32 | const path = join(parentDirectory, child); 33 | if ((await stat(path)).isDirectory()) { 34 | if (includeFolders) { 35 | accum.add(child); 36 | } 37 | (await getAllRecursiveFiles(path)).forEach((ancestor) => 38 | accum.add(join(child, ancestor)), 39 | ); 40 | } else { 41 | accum.add(child); 42 | } 43 | return accum; 44 | }, Promise.resolve(new Set())); 45 | } 46 | -------------------------------------------------------------------------------- /src/sanitizer/sanitize-for-test-file-cli.test.ts: -------------------------------------------------------------------------------- 1 | import {existsSync, move} from 'fs-extra'; 2 | import {basename, join} from 'path'; 3 | import {resolveTestGroups, testGroup} from 'test-vir'; 4 | import {ParserType} from '../parser/all-parsers'; 5 | import {generatePdfDocument} from '../pdf/generate-pdf'; 6 | import {dummyPdfPath, tempOutputDir} from '../repo-paths'; 7 | import {CliErrors, sanitizeForTestFileCli} from './sanitize-for-test-file-cli'; 8 | import {createSanitizedTestInput} from './sanitized-test'; 9 | 10 | function testTempOutputFile(args: string[]) { 11 | return async () => { 12 | const output = await sanitizeForTestFileCli(args, false); 13 | if (typeof output === 'string') { 14 | throw new Error(`Sanitization CLI didn't produce proper output.`); 15 | } 16 | const {sanitizedTestFilePath: filePath} = output; 17 | 18 | const testInput = createSanitizedTestInput(filePath); 19 | 20 | const testGroupOutput = testGroup({ 21 | description: `inner test for ${filePath}`, 22 | tests: (runTest) => { 23 | runTest(testInput); 24 | }, 25 | }); 26 | 27 | await resolveTestGroups(testGroupOutput); 28 | 29 | // if the output file was created, delete it 30 | if (existsSync(filePath)) { 31 | const movePath = join(tempOutputDir, basename(filePath)); 32 | await move(filePath, movePath, { 33 | overwrite: true, 34 | }); 35 | } else { 36 | throw new Error(`output file was not created: ${filePath}`); 37 | } 38 | }; 39 | } 40 | 41 | testGroup((runTest) => { 42 | runTest({ 43 | expectError: { 44 | errorMessage: CliErrors.MissingParserType, 45 | }, 46 | description: 'api rejects missing parser type', 47 | test: testTempOutputFile([]), 48 | }); 49 | runTest({ 50 | expectError: { 51 | errorMessage: CliErrors.InvalidParserType('invalid'), 52 | }, 53 | description: 'api rejects invalid parser type', 54 | test: testTempOutputFile(['invalid']), 55 | }); 56 | runTest({ 57 | expectError: { 58 | errorMessage: CliErrors.MissingPdfPath, 59 | }, 60 | description: 'api rejects missing PDF file path', 61 | test: testTempOutputFile([ParserType.Paypal]), 62 | }); 63 | runTest({ 64 | expectError: { 65 | errorMessage: CliErrors.InvalidPdfPath('missing-file'), 66 | }, 67 | description: 'api rejects invalid PDF file path', 68 | test: testTempOutputFile([ParserType.Paypal, 'missing-file']), 69 | }); 70 | runTest({ 71 | expectError: { 72 | errorMessage: `PDF file "missing-file.pdf" does not exist`, 73 | }, 74 | description: 'api rejects PDF file path that is not on disk', 75 | test: testTempOutputFile([ParserType.Paypal, 'missing-file.pdf']), 76 | }); 77 | runTest({ 78 | expectError: { 79 | errorMessage: CliErrors.MissingOutputFileName, 80 | }, 81 | description: 'api rejects missing output file name', 82 | test: testTempOutputFile([ParserType.Paypal, dummyPdfPath]), 83 | }); 84 | runTest({ 85 | expectError: { 86 | errorMessage: CliErrors.InvalidOutputFileName('output-file'), 87 | }, 88 | description: 'api rejects invalid output file name', 89 | test: testTempOutputFile([ParserType.Paypal, dummyPdfPath, 'output-file']), 90 | }); 91 | runTest({ 92 | description: 'parse dummy pdf file', 93 | expectError: { 94 | errorMessage: 95 | /Failed to parse the original PDF before trying to sanitize it: Error: EndStateNotReachedError: Reached end of input before hitting end state\. .+/, 96 | }, 97 | test: testTempOutputFile([ParserType.Paypal, dummyPdfPath, 'dummy-output-file.json']), 98 | }); 99 | runTest({ 100 | expectError: { 101 | errorMessage: CliErrors.InvalidDebugFlag('blahBlah'), 102 | }, 103 | description: 'api rejects invalid debug flag', 104 | test: testTempOutputFile([ 105 | ParserType.Paypal, 106 | dummyPdfPath, 107 | 'dummy-output-file.json', 108 | 'blahBlah', 109 | ]), 110 | }); 111 | 112 | async function testDebug(debug: boolean) { 113 | async function generateSampleValidPdf() { 114 | return await generatePdfDocument( 115 | [ 116 | 'statement closing date 4/5/6', 117 | 'Account Number 7 y z 8', 118 | 'Transactions', 119 | 'Payments and Credits', 120 | 'trans date post date', 121 | 'total payments and credits for this period $', 122 | 'fees', 123 | ], 124 | join(tempOutputDir, 'dummy-usaa-visa-credit.pdf'), 125 | 12, 126 | ); 127 | } 128 | 129 | const oldLog = console.log; 130 | const logs: any[] = []; 131 | console.log = function () { 132 | logs.push(arguments); 133 | }; 134 | 135 | try { 136 | await testTempOutputFile([ 137 | ParserType.UsaaVisaCredit, 138 | await generateSampleValidPdf(), 139 | 'dummy-output-file.json', 140 | debug ? '--debug' : '', 141 | ])(); 142 | } catch (error) { 143 | logs.forEach((args) => oldLog(...args)); 144 | throw error; 145 | } finally { 146 | console.log = oldLog; 147 | } 148 | 149 | return logs; 150 | } 151 | 152 | runTest({ 153 | description: 'does not print debug info when the debug flag is missing', 154 | expect: [], 155 | test: async () => { 156 | return await testDebug(false); 157 | }, 158 | }); 159 | 160 | runTest({ 161 | description: 'prints debug info when the debug flag is used', 162 | expect: true, 163 | test: async () => { 164 | return (await testDebug(true)).length > 20; 165 | }, 166 | }); 167 | }); 168 | -------------------------------------------------------------------------------- /src/sanitizer/sanitize-for-test-file-cli.ts: -------------------------------------------------------------------------------- 1 | import {getEnumTypedValues} from 'augment-vir'; 2 | import {extname, relative} from 'path'; 3 | import {isParserType, ParserType} from '../parser/all-parsers'; 4 | import {StatementPdf} from '../parser/parse-api'; 5 | import {checkThatPdfExists} from '../pdf/read-pdf'; 6 | import {repoRootDir} from '../repo-paths'; 7 | import {writeSanitizedTestFile} from './sanitized-test'; 8 | 9 | const parserArgExpected = `Expected one of the following: ${getEnumTypedValues(ParserType).join( 10 | ', ', 11 | )}`; 12 | 13 | export const CliErrors = { 14 | MissingParserType: `Missing parser type arg. ${parserArgExpected}`, 15 | InvalidParserType(inputParserType: string) { 16 | return `Invalid parser type "${inputParserType}". ${parserArgExpected}`; 17 | }, 18 | MissingPdfPath: `Missing input PDF file path.`, 19 | InvalidPdfPath(inputPdfPath: string) { 20 | return `Invalid PDF file path "${inputPdfPath}". Missing .pdf extension.`; 21 | }, 22 | MissingOutputFileName: `Missing output file name`, 23 | InvalidOutputFileName(inputOutputFileName: string) { 24 | return `Invalid output file name "${inputOutputFileName}". Missing .json extension.`; 25 | }, 26 | InvalidDebugFlag(inputDebugFlag: string) { 27 | return `Invalid debug flag "${inputDebugFlag}". Expected --debug.`; 28 | }, 29 | }; 30 | 31 | async function runSanitization({parserType, inputPdfFile, outputFileName, debug}: CliArgs) { 32 | const parserInput: StatementPdf = { 33 | parserInput: { 34 | filePath: relative(repoRootDir, inputPdfFile), 35 | debug, 36 | }, 37 | type: parserType, 38 | }; 39 | 40 | const {path, result} = await writeSanitizedTestFile(parserInput, outputFileName, debug); 41 | 42 | return {sanitizedTestFilePath: path, result}; 43 | } 44 | 45 | type CliArgs = { 46 | parserType: ParserType; 47 | inputPdfFile: string; 48 | outputFileName: string; 49 | debug: boolean; 50 | }; 51 | 52 | function getValidatedArgs(args: string[]): CliArgs { 53 | const parserTypeArg = args[0]; 54 | const inputPdfFilePathArg = args[1]; 55 | const outputFileNameArg = args[2]; 56 | const debugArg = args[3]; 57 | 58 | // validate parser type input 59 | if (!parserTypeArg) { 60 | throw new Error(CliErrors.MissingParserType); 61 | } 62 | if (!isParserType(parserTypeArg)) { 63 | throw new Error(CliErrors.InvalidParserType(parserTypeArg)); 64 | } 65 | 66 | // validate pdf file input 67 | if (!inputPdfFilePathArg) { 68 | throw new Error(CliErrors.MissingPdfPath); 69 | } 70 | if (extname(inputPdfFilePathArg) !== '.pdf') { 71 | throw new Error(CliErrors.InvalidPdfPath(inputPdfFilePathArg)); 72 | } 73 | checkThatPdfExists(inputPdfFilePathArg); 74 | 75 | // validate output file input 76 | if (!outputFileNameArg) { 77 | throw new Error(CliErrors.MissingOutputFileName); 78 | } 79 | if (extname(outputFileNameArg) !== '.json') { 80 | throw new Error(CliErrors.InvalidOutputFileName(outputFileNameArg)); 81 | } 82 | 83 | if (debugArg && debugArg !== '--debug') { 84 | throw new Error(CliErrors.InvalidDebugFlag(debugArg)); 85 | } 86 | 87 | return { 88 | parserType: parserTypeArg, 89 | inputPdfFile: inputPdfFilePathArg, 90 | outputFileName: outputFileNameArg, 91 | debug: !!debugArg, 92 | }; 93 | } 94 | 95 | const helpMessage = `Usage: npm run sanitize parser-type input-pdf-file.pdf output-sanitized-text-file.json [-- --debug]\nMake sure to pass -- before the debug input, like so: npm sanitize x x x -- --debug`; 96 | 97 | /** Exported just so we can test it without running bash scripts */ 98 | export async function sanitizeForTestFileCli(args: string[], printHelp = true) { 99 | if (args.includes('-h') || args.includes('help') || args.includes('--help')) { 100 | return helpMessage; 101 | } 102 | 103 | let validatedArgs: CliArgs; 104 | try { 105 | validatedArgs = getValidatedArgs(args); 106 | } catch (error) { 107 | printHelp && console.log(helpMessage); 108 | throw error; 109 | } 110 | validatedArgs.debug && console.log({validatedArgs}); 111 | 112 | const results = await runSanitization(validatedArgs); 113 | 114 | validatedArgs.debug && console.log('Results:', results.result); 115 | return results; 116 | } 117 | 118 | // when this script is run directly 119 | // run with "npm run sanitize" 120 | if (require.main === module) { 121 | sanitizeForTestFileCli(process.argv.slice(2), true) 122 | .then((output) => { 123 | if (typeof output === 'string') { 124 | console.log(output); 125 | } else { 126 | console.log('Sample file written to:', output.sanitizedTestFilePath); 127 | } 128 | }) 129 | .catch((error) => { 130 | console.error(error); 131 | process.exit(1); 132 | }); 133 | } 134 | -------------------------------------------------------------------------------- /src/sanitizer/sanitized-files.test.ts: -------------------------------------------------------------------------------- 1 | import {join} from 'path'; 2 | import {testGroup} from 'test-vir'; 3 | import {getAllRecursiveFiles, sanitizedFilesDir} from '../repo-paths'; 4 | import {createSanitizedTestInput} from './sanitized-test'; 5 | 6 | async function getAllSanitizedFiles(): Promise { 7 | return Array.from(await getAllRecursiveFiles(sanitizedFilesDir)).map((subPath) => 8 | join(sanitizedFilesDir, subPath), 9 | ); 10 | } 11 | 12 | testGroup({ 13 | description: 'run all sanitized file tests', 14 | tests: async (runTest) => { 15 | (await getAllSanitizedFiles()).forEach((filePath) => { 16 | runTest(createSanitizedTestInput(filePath)); 17 | }); 18 | }, 19 | }); 20 | -------------------------------------------------------------------------------- /src/sanitizer/sanitized-test.test.ts: -------------------------------------------------------------------------------- 1 | import {getEnumTypedValues} from 'augment-vir'; 2 | import {unlinkSync} from 'fs-extra'; 3 | import {join} from 'path'; 4 | import {testGroup} from 'test-vir'; 5 | import {ParserType} from '../parser/all-parsers'; 6 | import {sampleFilesDir} from '../repo-paths'; 7 | import {writeSanitizedTestFile} from './sanitized-test'; 8 | 9 | testGroup({ 10 | description: writeSanitizedTestFile.name, 11 | tests: (runTest) => { 12 | const missingFileName = 'missingFi'; 13 | 14 | getEnumTypedValues(ParserType).forEach((singleParserType) => { 15 | runTest({ 16 | expectError: { 17 | errorMessage: `PDF file "${missingFileName}" does not exist`, 18 | }, 19 | description: `fails on missing file for ${singleParserType}`, 20 | test: async () => { 21 | const fakeOutputFile = 'please-no-write-here.txt'; 22 | await writeSanitizedTestFile( 23 | { 24 | parserInput: {filePath: missingFileName}, 25 | type: singleParserType, 26 | }, 27 | fakeOutputFile, 28 | ); 29 | // just in case the file actually gets written 30 | unlinkSync(join(sampleFilesDir, fakeOutputFile)); 31 | }, 32 | }); 33 | }); 34 | }, 35 | }); 36 | -------------------------------------------------------------------------------- /src/sanitizer/sanitized-test.ts: -------------------------------------------------------------------------------- 1 | import {Overwrite, RequiredAndNotNullBy} from 'augment-vir'; 2 | import {ensureDir, existsSync, readFileSync, writeFile} from 'fs-extra'; 3 | import {dirname, join, relative} from 'path'; 4 | import {format, resolveConfig} from 'prettier'; 5 | import {TestInputObject} from 'test-vir'; 6 | import {setSanitizerMode, unsetSanitizerMode} from '../global'; 7 | import {getPackageVersion} from '../package-version'; 8 | import {AllParserOptions, parsers, ParserType} from '../parser/all-parsers'; 9 | import {StatementPdf} from '../parser/parse-api'; 10 | import {ParsedOutput} from '../parser/parsed-output'; 11 | import {checkThatPdfExists} from '../pdf/read-pdf'; 12 | import {prettierConfigPath, repoRootDir, sanitizedFilesDir} from '../repo-paths'; 13 | import {sanitizePdf} from './sanitizer'; 14 | 15 | export type SanitizedTestFile = { 16 | text: string[]; 17 | parserType: ParserType; 18 | name: string; 19 | packageVersion: string; 20 | parserOptions?: AllParserOptions[SelectedParser]; 21 | } & ( 22 | | { 23 | output: ParsedOutput; 24 | errorMessage?: undefined; 25 | } 26 | | { 27 | output?: undefined; 28 | errorMessage: string; 29 | } 30 | ); 31 | 32 | type SanitizingStatementPdf = Overwrite< 33 | StatementPdf, 34 | {parserInput: RequiredAndNotNullBy} 35 | >; 36 | 37 | async function validateSanitizedParsing( 38 | {parserInput, type: parserType}: SanitizingStatementPdf, 39 | parsedSanitized: ParsedOutput, 40 | debug: boolean, 41 | ): Promise { 42 | const parser = parsers[parserType]; 43 | if (debug) { 44 | console.log('\n/////////////////// parsing original:\n'); 45 | } 46 | const parsedOriginal = await parser.parsePdf(parserInput); 47 | 48 | // quick sanity checks on the sanitized parsing output 49 | if (parsedSanitized.incomes.length !== parsedOriginal.incomes.length) { 50 | if (debug) { 51 | console.log('/////////////////// sanitized incomes'); 52 | console.log(parsedSanitized.incomes); 53 | console.log('/////////////////// original incomes'); 54 | console.log(parsedOriginal.incomes); 55 | } 56 | throw new Error( 57 | `Sanitized incomes count did not match the original in "${parserInput.name}"`, 58 | ); 59 | } 60 | 61 | if (parsedSanitized.expenses.length !== parsedOriginal.expenses.length && debug) { 62 | console.log('/////////////////// sanitized expenses'); 63 | console.log(parsedSanitized.expenses); 64 | console.log('/////////////////// original expenses'); 65 | console.log(parsedOriginal.expenses); 66 | throw new Error( 67 | `Sanitized expenses count did not match the original in "${parserInput.name}"`, 68 | ); 69 | } 70 | } 71 | 72 | function getSanitizedName(filePath: string): string { 73 | return `Sanitized ${relative(repoRootDir, filePath)}`; 74 | } 75 | 76 | async function createSanitizedTestFileObject({ 77 | parserInput, 78 | type: parserType, 79 | }: SanitizingStatementPdf): Promise> { 80 | const parser = parsers[parserType]; 81 | 82 | const sanitizedText = await sanitizePdf(parserInput.filePath, parserType, !!parserInput.debug); 83 | 84 | let parsedSanitized: ParsedOutput | undefined; 85 | let parseError: Error | undefined; 86 | try { 87 | setSanitizerMode(); 88 | parsedSanitized = parser.parseText({ 89 | textLines: sanitizedText, 90 | ...parserInput, 91 | }); 92 | unsetSanitizerMode(); 93 | } catch (error) { 94 | if (error instanceof Error) { 95 | parseError = error; 96 | } else { 97 | parseError = new Error(String(error)); 98 | } 99 | } 100 | 101 | const sanitizedTestObject: SanitizedTestFile = { 102 | name: parserInput.name, 103 | parserType, 104 | packageVersion: getPackageVersion(), 105 | text: sanitizedText, 106 | ...(parsedSanitized 107 | ? {output: parsedSanitized} 108 | : { 109 | errorMessage: 110 | parseError?.message || 111 | 'Sanitized parser output is undefined but no error was encountered', 112 | }), 113 | }; 114 | 115 | return sanitizedTestObject; 116 | } 117 | 118 | export async function writeSanitizedTestFile( 119 | rawStatementPdf: StatementPdf, 120 | outputFileName: string, 121 | debug: boolean = rawStatementPdf.parserInput.debug || false, 122 | ) { 123 | const sampleFilePath = join(sanitizedFilesDir, rawStatementPdf.type, outputFileName); 124 | 125 | const statementPdf: SanitizingStatementPdf = { 126 | ...rawStatementPdf, 127 | parserInput: { 128 | ...rawStatementPdf.parserInput, 129 | name: rawStatementPdf.parserInput.name ?? getSanitizedName(sampleFilePath), 130 | debug, 131 | }, 132 | }; 133 | checkThatPdfExists(statementPdf.parserInput.filePath); 134 | 135 | // first, make sure the pdf itself passes parsing 136 | try { 137 | await parsers[statementPdf.type].parsePdf({...statementPdf.parserInput, debug}); 138 | } catch (error) { 139 | throw new Error( 140 | `Failed to parse the original PDF before trying to sanitize it: ${ 141 | error instanceof Error && error.stack ? error.stack : String(error) 142 | }`, 143 | ); 144 | } 145 | 146 | const sanitizedTestObject = await createSanitizedTestFileObject(statementPdf); 147 | 148 | // if there was an error, don't try to parse output as there won't be any 149 | sanitizedTestObject.output && 150 | (await validateSanitizedParsing(statementPdf, sanitizedTestObject.output, debug)); 151 | 152 | await ensureDir(dirname(sampleFilePath)); 153 | 154 | const prettierConfig = await resolveConfig(prettierConfigPath); 155 | 156 | await writeFile( 157 | sampleFilePath, 158 | // format the file so it doesn't break format tests 159 | format(JSON.stringify(sanitizedTestObject, null, 4), { 160 | ...prettierConfig, 161 | filepath: sampleFilePath, 162 | }), 163 | ); 164 | 165 | if (!existsSync(sampleFilePath)) { 166 | throw new Error(`sanitized test file was not written: ${sampleFilePath}`); 167 | } 168 | 169 | return { 170 | path: sampleFilePath, 171 | result: sanitizedTestObject.output || sanitizedTestObject.errorMessage, 172 | }; 173 | } 174 | 175 | export function createSanitizedTestInput( 176 | filePath: string, 177 | ): TestInputObject, Error> { 178 | const testFile: SanitizedTestFile = JSON.parse( 179 | readFileSync(filePath).toString(), 180 | ); 181 | const parser = parsers[testFile.parserType]; 182 | 183 | const testInput: TestInputObject = { 184 | test: () => { 185 | setSanitizerMode(); 186 | const reParsedOutput = parser.parseText({ 187 | textLines: testFile.text, 188 | parserOptions: testFile.parserOptions, 189 | name: testFile.name, 190 | }); 191 | unsetSanitizerMode(); 192 | 193 | /** 194 | * Make sure all the values are JSON values. For example, properties with the value of 195 | * undefined are removed and invalid date values are turned into null 196 | */ 197 | return JSON.parse(JSON.stringify(reParsedOutput)); 198 | }, 199 | description: `compared sanitized file "${filePath}"`, 200 | ...(testFile.output 201 | ? {expect: testFile.output} 202 | : {expectError: {errorMessage: testFile.errorMessage}}), 203 | }; 204 | 205 | return testInput; 206 | } 207 | -------------------------------------------------------------------------------- /src/sanitizer/sanitizer.test.ts: -------------------------------------------------------------------------------- 1 | import {trimArrayStrings} from 'augment-vir'; 2 | import {testGroup, TestInputObject} from 'test-vir'; 3 | import {ParserKeyword} from '../parser/parser-options'; 4 | import {collapseAroundKeyword, sanitizeStatementText} from './sanitizer'; 5 | 6 | testGroup({ 7 | description: collapseAroundKeyword.name, 8 | tests: (runTest) => { 9 | function collapseTest( 10 | keyword: ParserKeyword, 11 | input: string, 12 | expect: string, 13 | description?: string, 14 | debug = false, 15 | extraOptions?: Omit< 16 | Partial>, 17 | 'description' | 'expect' | 'test' 18 | >, 19 | ) { 20 | runTest({ 21 | ...(extraOptions as any), 22 | description, 23 | expect, 24 | test: () => { 25 | return collapseAroundKeyword(keyword, input, debug); 26 | }, 27 | }); 28 | } 29 | 30 | collapseTest('cow', 'a b c d cow e f g h i', 'd cow i'); 31 | collapseTest('cow', ' a b c d cow e f g h i ', ' d cow i '); 32 | }, 33 | }); 34 | 35 | testGroup({ 36 | tests: (runTest) => { 37 | function sanitizerTest( 38 | input: string[], 39 | expectation: string[], 40 | keywords: Readonly = [], 41 | description?: string, 42 | debug = false, 43 | extraOptions?: Omit< 44 | Partial>, 45 | 'description' | 'expect' | 'test' 46 | >, 47 | ): void { 48 | runTest({ 49 | ...(extraOptions as any), 50 | description, 51 | expect: expectation, 52 | test: () => { 53 | const sanitized = sanitizeStatementText(input, keywords, debug); 54 | 55 | return sanitized; 56 | }, 57 | }); 58 | } 59 | { 60 | const unSanitized = trimArrayStrings( 61 | ` 62 | secret account number: 123-456-789 63 | $30 super secret purchase don't tell anyone about it 64 | $100 another secret thing 456677 65 | `.split('\n'), 66 | ); 67 | 68 | const keywords = ['account number'] as const; 69 | 70 | runTest({ 71 | description: 'output length should be the same', 72 | expect: unSanitized.length, 73 | test: () => { 74 | const sanitized = sanitizeStatementText(unSanitized, [], false); 75 | return sanitized.length; 76 | }, 77 | }); 78 | 79 | runTest({ 80 | description: 'original text should not exist', 81 | expect: 0, 82 | test: () => { 83 | const sanitized = sanitizeStatementText(unSanitized, [], false); 84 | 85 | const matchingRows = sanitized.filter( 86 | (sanitizedRow, index) => sanitizedRow === unSanitized[index], 87 | ); 88 | 89 | return matchingRows.length; 90 | }, 91 | }); 92 | 93 | runTest({ 94 | description: 'keywords should be preserved only in the row where they exist', 95 | expect: unSanitized.filter((dirtyRow) => dirtyRow.includes(keywords[0])).length, 96 | test: () => { 97 | const sanitized = sanitizeStatementText(unSanitized, keywords, false); 98 | 99 | const matchingRows = sanitized.filter((sanitizedRow) => 100 | sanitizedRow.includes(keywords[0]), 101 | ); 102 | 103 | return matchingRows.length; 104 | }, 105 | }); 106 | 107 | sanitizerTest( 108 | unSanitized, 109 | trimArrayStrings( 110 | ` 111 | a account number 1-2-3 112 | $4 k 113 | $5 n 6 114 | `.split('\n'), 115 | ), 116 | keywords, 117 | 'should replace text in a deterministic and unique manner', 118 | ); 119 | 120 | sanitizerTest( 121 | unSanitized, 122 | trimArrayStrings( 123 | ` 124 | a account number 1-2-3 125 | $4 super secret purchase k 126 | $5 n 6 127 | `.split('\n'), 128 | ), 129 | [...keywords, /super \S+ purchase/], 130 | 'should work with RegExp keywords', 131 | ); 132 | } 133 | 134 | sanitizerTest( 135 | [' super duper thing', ' secret stuff delete '], 136 | [' super duper thing', ' f '], 137 | [/\s+super duper thing/], 138 | 'RegExp keywords should work with preceding spaces', 139 | ); 140 | 141 | sanitizerTest( 142 | [ 143 | ' 5678 one thing 9876 9999 10.63 95632 cow 789', 144 | ' Van 9876 11.11 cow', 145 | ], 146 | [' 1 b 2 3 4,444.44 5 cow 6', ' d 7 8,888.88 cow'], 147 | ['cow'], 148 | 'handle keywords when replacement numbers are longer', 149 | ); 150 | 151 | sanitizerTest( 152 | ['lorem ipsum dolor sit amet, consectetur-cow adipiscing'], 153 | ['e f-cow h'], 154 | ['cow'], 155 | 'handle keywords after dashes', 156 | ); 157 | 158 | sanitizerTest( 159 | [ 160 | '(555)555-555 (555) 555 555 (555)-555-555 5 (555)555-ABCDEF hoops - 7 ABCDEF (HACK), 555 FA FunTimes', 161 | ], 162 | ['(1)2-3 (4) 5 6 (7)-8-9 1 (2)3-b - 4 c (d), 6 f'], 163 | [], 164 | 'maintain parentheses', 165 | ); 166 | }, 167 | }); 168 | -------------------------------------------------------------------------------- /test-specific-file.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | set -e; 3 | 4 | filePath="$1" 5 | filePath="${filePath#"./"}" 6 | filePath="${filePath#"/"}" 7 | jsFilePath="$(echo "$filePath" | sed -E 's/.ts$/.js/')" 8 | 9 | if [[ "$jsFilePath" != *.test.js ]]; then 10 | jsFilePath="$(echo "$jsFilePath" | sed -E 's/.js$/.test.js/')" 11 | fi 12 | 13 | distPath="dist/${jsFilePath#"src/"}" 14 | 15 | 16 | if [[ ! -f "$distPath" ]]; then 17 | echo -e "\033[1;31mThere are no tests for $filePath\033[0m" 18 | exit 1 19 | fi 20 | 21 | npm run compile 22 | npx test-vir "$distPath" -------------------------------------------------------------------------------- /tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "compilerOptions": { 3 | "alwaysStrict": true, 4 | "declaration": true, 5 | "esModuleInterop": true, 6 | "exactOptionalPropertyTypes": true, 7 | "forceConsistentCasingInFileNames": true, 8 | "module": "commonjs", 9 | "noImplicitAny": true, 10 | "noImplicitOverride": true, 11 | "noImplicitReturns": true, 12 | "noImplicitThis": true, 13 | "noUncheckedIndexedAccess": true, 14 | "outDir": "dist", 15 | "rootDir": "src", 16 | "skipDefaultLibCheck": true, 17 | "skipLibCheck": true, 18 | "strict": true, 19 | "target": "ES2018", 20 | "useUnknownInCatchVariables": true 21 | }, 22 | "exclude": ["node_modules", "dist", "test-repos", "scripts"] 23 | } 24 | --------------------------------------------------------------------------------