├── .envrc
├── .gitignore
├── LICENSE
├── MANIFEST.in
├── README.md
├── beanborg
    ├── __init__.py
    ├── arg_parser.py
    ├── bb_archive.py
    ├── bb_import.py
    ├── bb_mover.py
    ├── classification
    │   ├── __init__.py
    │   ├── classifier.py
    │   ├── custom_fuzzy_wordf_completer.py
    │   ├── data_loader.py
    │   ├── gpt_service.py
    │   ├── transaction_model.py
    │   └── ui_service.py
    ├── config.py
    ├── handlers
    │   ├── __init__.py
    │   └── amount_handler.py
    ├── importer.py
    ├── model
    │   ├── __init__.py
    │   └── transactions.py
    ├── rule_engine
    │   ├── Context.py
    │   ├── __init__.py
    │   ├── decision_tables.py
    │   ├── rules.py
    │   └── rules_engine.py
    └── utils
    │   ├── __init__.py
    │   ├── duplicate_detector.py
    │   ├── hash_utils.py
    │   ├── journal_utils.py
    │   └── string_utils.py
├── bin
    ├── bb_archive
    ├── bb_import
    └── bb_mover
├── requirements.txt
├── run_tests.sh
├── setup.py
├── tests
    ├── files
    │   ├── 1234.ldg
    │   ├── My_Custom_Rule.py
    │   ├── _1234.ldg
    │   ├── account.rules
    │   ├── amount_handler.yaml
    │   ├── asset.rules
    │   ├── bank1.yaml
    │   ├── bank1_custom_rule.yaml
    │   ├── bank1_ignore_at_pos.yaml
    │   ├── bank1_ignore_by_counterparty.yaml
    │   ├── bank1_ignore_contains_string_at_pos.yaml
    │   ├── bank1_replace_asset.yaml
    │   ├── bank1_replace_counterparty.yaml
    │   ├── bank1_replace_expense.yaml
    │   ├── payee.rules
    │   └── payee_with_comments.rules
    ├── test_config.py
    ├── test_currency_handler.py
    ├── test_decision_tables.py
    ├── test_duplicate_detector.py
    └── test_rules_engine.py
├── tox.ini
└── tutorial
    ├── README.md
    ├── UK0000001444555.ldg
    ├── accounts.ldg
    ├── assets
        └── csv.png
    ├── main.ldg
    └── test-data
        └── eagle-bank-statement.csv


/.envrc:
--------------------------------------------------------------------------------
1 | layout python 
2 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | share/python-wheels/
 24 | *.egg-info/
 25 | .installed.cfg
 26 | *.egg
 27 | MANIFEST
 28 | .direnv
 29 | # PyInstaller
 30 | #  Usually these files are written by a python script from a template
 31 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 32 | *.manifest
 33 | *.spec
 34 | 
 35 | # Installer logs
 36 | pip-log.txt
 37 | pip-delete-this-directory.txt
 38 | 
 39 | # Unit test / coverage reports
 40 | htmlcov/
 41 | .tox/
 42 | .nox/
 43 | .coverage
 44 | .coverage.*
 45 | .cache
 46 | nosetests.xml
 47 | coverage.xml
 48 | *.cover
 49 | *.py,cover
 50 | .hypothesis/
 51 | .pytest_cache/
 52 | cover/
 53 | 
 54 | # Translations
 55 | *.mo
 56 | *.pot
 57 | 
 58 | # Django stuff:
 59 | *.log
 60 | local_settings.py
 61 | db.sqlite3
 62 | db.sqlite3-journal
 63 | 
 64 | # Flask stuff:
 65 | instance/
 66 | .webassets-cache
 67 | 
 68 | # Scrapy stuff:
 69 | .scrapy
 70 | 
 71 | # Sphinx documentation
 72 | docs/_build/
 73 | 
 74 | # PyBuilder
 75 | .pybuilder/
 76 | target/
 77 | 
 78 | # Jupyter Notebook
 79 | .ipynb_checkpoints
 80 | 
 81 | # IPython
 82 | profile_default/
 83 | ipython_config.py
 84 | 
 85 | # pyenv
 86 | #   For a library or package, you might want to ignore these files since the code is
 87 | #   intended to run in multiple environments; otherwise, check them in:
 88 | # .python-version
 89 | 
 90 | # pipenv
 91 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 92 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 93 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 94 | #   install all needed dependencies.
 95 | #Pipfile.lock
 96 | 
 97 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
 98 | __pypackages__/
 99 | 
100 | # Celery stuff
101 | celerybeat-schedule
102 | celerybeat.pid
103 | 
104 | # SageMath parsed files
105 | *.sage.py
106 | 
107 | # Environments
108 | .env
109 | .venv
110 | env/
111 | venv/
112 | ENV/
113 | env.bak/
114 | venv.bak/
115 | 
116 | # Spyder project settings
117 | .spyderproject
118 | .spyproject
119 | 
120 | # Rope project settings
121 | .ropeproject
122 | 
123 | # mkdocs documentation
124 | /site
125 | 
126 | # mypy
127 | .mypy_cache/
128 | .dmypy.json
129 | dmypy.json
130 | 
131 | # Pyre type checker
132 | .pyre/
133 | 
134 | # pytype static type analyzer
135 | .pytype/
136 | 
137 | # Cython debug symbols
138 | cython_debug/
139 | 
140 | eag.yaml
141 | tmp
142 | deploy_local.sh
143 | rules
144 | todo.txt
145 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 |  Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    APPENDIX: How to apply the Apache License to your work.
179 | 
180 |       To apply the Apache License to your work, attach the following
181 |       boilerplate notice, with the fields enclosed by brackets "[]"
182 |       replaced with your own identifying information. (Don't include
183 |       the brackets!)  The text should be enclosed in the appropriate
184 |       comment syntax for the file format. We also recommend that a
185 |       file or class name and description of purpose be included on the
186 |       same "printed page" as the copyright notice for easier
187 |       identification within third-party archives.
188 | 
189 |    Copyright [yyyy] [name of copyright owner]
190 | 
191 |    Licensed under the Apache License, Version 2.0 (the "License");
192 |    you may not use this file except in compliance with the License.
193 |    You may obtain a copy of the License at
194 | 
195 |        http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 |    Unless required by applicable law or agreed to in writing, software
198 |    distributed under the License is distributed on an "AS IS" BASIS,
199 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 |    See the License for the specific language governing permissions and
201 |    limitations under the License.


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include requirements.txt
2 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Beanborg
  2 | 
  3 | Beanborg automatically imports financial transactions from external CSV files into the [Beancount](https://github.com/beancount/beancount) bookkeeping system. It is designed to streamline transaction importing by matching data to the correct expense accounts and doing so quickly, even with multiple files.
  4 | 
  5 | ## Requirements
  6 | 
  7 | - Python 3
  8 | - Beancount v2
  9 | 
 10 | ## Goals and key features
 11 | 
 12 | Beanborg has two main design goals:
 13 | 
 14 | - automatic matching of transaction data with the correct Expense accounts
 15 | - speed, capable of processing multiple financial CSV files in seconds.
 16 | 
 17 | Example:
 18 | 
 19 | Given the following transaction from a CSV file:
 20 | 
 21 | ```
 22 | 04.11.2020;04.11.2020;Direct Debit;"Fresh Food Inc.";-21,30;EUR;0000001;UK0000001444555
 23 | ```
 24 | 
 25 | Beanborg imports the transaction into Beancount and assigns the Account "Expense:Grocery" to the transaction:
 26 | 
 27 | ```
 28 | 2020-11-04 * "Fresh Food Inc." ""
 29 | csv: "04.11.2020,04.11.2020,Direct Debit,Fresh Food,-21,30,EUR,0000001,UK0000001444555"
 30 | md5: "60a54f6ed13ae7b7e70fd475eb677511"
 31 | Assets:Bank1:Bob:Current  -21.30 EUR
 32 | Expenses:Grocery      
 33 | ```
 34 | 
 35 | ## Additional features:
 36 | 
 37 | - Extendable rule-based system for transaction categorization.
 38 | - Duplicate transaction detection.
 39 | - Transaction classification using machine learning (ML) and large language models (LLM) (optional).
 40 | - Highly configurable with extensive rules.
 41 | - Smart archiving: files are renamed with start and end dates after processing.
 42 | 
 43 | 
 44 | ## Installation
 45 | 
 46 | To install beanborg, use:
 47 | 
 48 | ```
 49 | pip install git+https://github.com/luciano-fiandesio/beanborg.git
 50 | ```
 51 | 
 52 | Fora specific branch:
 53 | 
 54 | ```
 55 | pip install git+https://github.com/luciano-fiandesio/beanborg.git@BRANCH_NAME
 56 | ```
 57 | 
 58 | ## Workflow
 59 | 
 60 | Beanborg is based on a three-stage workflow:
 61 | 
 62 | 1. Move the CSV file to the staging area.
 63 | 2. Import the CSV into Beancount, categorizing transactions.
 64 | 3. Archive the CSV after processing.
 65 | 
 66 | ### Stage 1: Move Bank CSV File
 67 | 
 68 | Move a bank CSV file to the staging area:
 69 | 
 70 | ```
 71 | bb_mover -f ~/config/wells-fargo.yaml
 72 | ```
 73 | 
 74 | ### Stage 2: Import the CSV into Beancount
 75 | 
 76 | Import the CSV into Beancount, categorizing transactions:
 77 | 
 78 | ```
 79 | bb_import -f ~/config/wells-fargo.yaml
 80 | ```
 81 | 
 82 | ### Stage 3: Archive the CSV File
 83 | Move the CSV file to the archive folder:
 84 | 
 85 | ```
 86 | bb_archive -f ~/config/wells-fargo.yaml
 87 | ```
 88 | 
 89 | ## Configuration
 90 | 
 91 | Each financial institution requires a dedicated YAML configuration file that defines the structure of the CSV file and the rules applied during import.
 92 | 
 93 | ### Sample configuration file
 94 | 
 95 | ```
 96 | --- !Config
 97 | csv:
 98 |   download_path: "/home/mike/downloads"
 99 |   name: wells-fargo
100 |   bank_ref: wfa
101 |   date_format: "%d/%m/%Y"
102 |   skip: 1
103 |   
104 | indexes:
105 |   date:   1
106 |   amount: 2
107 |   counterparty: 6
108 | 
109 | rules:
110 |   beancount_file: 'main-ledger.ldg'
111 |   rules_file: well-fargo.rules
112 |   account: 565444499
113 |   currency: USD
114 |   ruleset:
115 |     - Replace_Asset
116 |     - Replace_Expense
117 | ```
118 | 
119 | ### Structure of a configuration file
120 | 
121 | A Beanborg configuration must start with the `--- !Config` tag and has 3 main sections:
122 | 
123 | #### csv
124 | 
125 | The `csv` section of the configuration file determines the options related to the structure and location of the CVS file to import.
126 | Here are the list of options for the `csv` section:
127 | 
128 | | Property           | Description                                                                                                                                                                                                                                             | Default | Example                        |
129 | |--------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|---------|--------------------------------|
130 | | `download_path`    | Full path to the folder to which the CSV is downloaded to at the beginning of the import process. This option is only required by the `bb_mover` script.                                                                                                |         | "/home/john/download"          |
131 | | name               | The name of the CSV file, at the time of download. Note that the name can be partial. For instance, is the CSV file is named "bank1-statement-03-2020", the `name` can be simply set to `bank1`. This option is only required by the `bb_mover` script. |         | `bank1`                        |
132 | | `ref`              | Once the CVS file is imported into the staging area, it gets renamed using the value of `ref`. It is recommended to use a short string to identify the financial institution. This option is used by all the scripts.                                   |         | `com`                          |
133 | | `separator`        | The field delimiter used in the financial institution's CSV file.                                                                                                                                                                                       | ,       |                                |
134 | | `currency_sep`     | The decimal separator used in the CSV file                                                                                                                                                                                                              | .       |                                |
135 | | `date_format`      | Date format used in the CVS file. The format is based on  strftime directives: https://strftime.org/. Note that the value must be in quotes                                                                                                             |         | "%d/%m/%Y"                     |
136 | | `skip`             | Number of lines of the CSV file to skip during import                                                                                                                                                                                                   | 1       |                                |
137 | | `target`           | The folder name or path in which the CSV file is moved to during the first stage.    s                                                                                                                                                                  | tmp     |                                |
138 | | `archive`          | The folder name of path in which the CSV file is archived during the archive stage                                                                                                                                                                      | archive |                                |
139 | | `post_move_script` | Path to a post-move script that is executed after the CSV file is moved into the work folder. The script must use a `shebang` (e.g. `#!/bin/bash`) in order to be executed.                                                                             |         | `/home/tom/scripts/convert.sh` |
140 | | keep_original      | Keep the CSV file from the `download_path`. The default is to delete it after the move process. This option is only required by the `bb_mover` script.                                                                                                  | `False` | `True`                         |
141 | 
142 | #### indexes
143 | 
144 | The `indexes` section of the configuration file allows mapping each CSV "column" (or index) to the information required to parse and import the data. In other words, each option is used by Beanborg to determine where the `date` or `amount` of each transaction is located on the CVS file.
145 | 
146 | Note that the first index starts from `0`.
147 | 
148 | | Property       | Description                                                                                                                                                                   | Default |
149 | |----------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|---------|
150 | | `date`         | The index corresponding to the date of the transaction.                                                                                                                       | 0       |
151 | | `counterparty` | The index corresponding to the name of the counterparty of the transaction.                                                                                                   | 3       |
152 | | `amount`       | The index corresponding to the amount of the transaction (debit or credit).                                                                                                   | 4       |
153 | | `account`      | The index corresponding to the account of the transaction (e.g. the IBAN or ABA code).                                                                                        | 4       |
154 | | `currency`     | The index corresponding to the currency of the transaction.                                                                                                                   | 5       |
155 | | `tx_type`      | The index corresponding to the transaction type.                                                                                                                              | 2       |
156 | | `amount_in`    | Some financial institutions, use separate indexes for debit and credit. In this case, it is possible to specify the index for the index corresponding to the credited amount. |         |
157 | | `narration`    | The index corresponding to the narration or reference field of the transaction.                                                                                               |         |
158 | 
159 | #### rules
160 | 
161 | | Property                       | Description                                                                                                                | Default            |
162 | |--------------------------------|----------------------------------------------------------------------------------------------------------------------------|--------------------|
163 | | `beancount_file`               | The master Beancount ledger file. This property is mandatory and it is required to by the duplication detection mechanism. | `main.ldg`         |
164 | | `rules_folder`                 | The folder name in which custom rules and look-up tables files are stored                                                  | `rules`            |
165 | | `account`                      | This property is normally used when a CSV file doesn't contain any account property (IBAN, ABA, account number, etc).      |                    |
166 | | `currency`                     | Force a default currency                                                                                                   |                    |
167 | | `default_expense`              | Default expense account                                                                                                    | `Expenses:Unknown` |
168 | | `force_negative`               | TODO                                                                                                                       | False              |
169 | | `invert_negative`              | TODO                                                                                                                       | False              |
170 | | `origin_account`               | Specifies the origin account of each transaction                                                                           |                    |
171 | | `ruleset`                      | List of rules to apply to the CSV file. See `rules` section.                                                               |                    |
172 | | `advanced_duplicate_detection` | Enable the advanced duplication detection rule (see Advanced Duplicate Detection section)                                  | `true`             |
173 | 
174 | ## Rules
175 | 
176 | Beanborg’s rules engine is highly customizable, allowing users to automate the categorization of transactions based on pre-existing rules. 
177 | Each rule is referenced by name and can be used for tasks such as assigning accounts, ignoring transactions, or modifying transaction details like the counterparty's name.
178 | 
179 | Some rules rely on **lookup tables**, which are semicolon-separated CSV files. These files contain three columns: `value`, `expression`, and `result`, allowing flexible criteria for matching and transforming data.
180 | 
181 | - **value**: The string that the rule searches for.
182 | - **expression**: The matching criteria used by the rule, such as `equals`, `equals_ic`, `startsWith`, `endsWith`, `contains`, or `contains_ic`.
183 |   - `equals_ic` and `contains_ic` are case-insensitive versions of `equals` and `contains`.
184 | - **result**: The output of the rule when a match is found.
185 | 
186 | ### Example: Expense Categorization Rule
187 | 
188 | For instance, if you want to categorize any transaction where the payee is "Walmart" under `Expenses:Groceries`, the lookup entry would be:
189 | 
190 | `Walmart;equals;Expenses:Groceries`
191 | 
192 | 
193 | To ensure that any variation of "Walmart," regardless of case, is also matched, you can use:
194 | 
195 | `Walmart;contains_ic;Expenses:Groceries`
196 | 
197 | The `_ic` indicates `ignore case`.
198 | 
199 | The following sections provide a detailed explanation of the rules available in Beanborg.
200 | 
201 | #### Replace_Payee
202 | 
203 | The `Replace_Payee` rule is used to modify the name of a transaction’s counterparty. This is useful when you want to standardize or adjust the names in your financial records.
204 | 
205 | This rule requires a lookup file named `payee.rules`, which should be placed in the directory defined by the `rules.rules_folder` option in the configuration file.
206 | 
207 | Suppose you want to modify a transaction where the counterparty is listed as "Fresh Food Inc." and replace it with "FRESH FOOD" when importing the data into the ledger.
208 | 
209 | Given the following CSV transaction:
210 | 
211 | ```
212 | 04.11.2020;04.11.2020;Direct Debit;"Fresh Food Inc.";-21,30;EUR;0000001;UK0000001444555
213 | ```
214 | 
215 | You would follow these steps:
216 | 
217 | 1. Add the `Replace_Payee` rule to the list of rules in the configuration file for the relevant financial institution.
218 | 2. In the `payee.rules` lookup file, add the following entry:
219 | 
220 | ```
221 | Fresh Food Inc.;equals;FRESH FOOD
222 | ```
223 | 
224 | This will ensure that the counterparty "Fresh Food Inc." is replaced with "FRESH FOOD" in your Beancount ledger.
225 | 
226 | 
227 | #### Replace_Expense
228 | 
229 | The `Replace_Expense` rule is used to assign an account to a transaction based on the value of the `counterparty` index from the CSV file. This rule is particularly helpful for categorizing transactions into the appropriate expense accounts.
230 | 
231 | This rule requires a lookup file named `account.rules`, which should be located in the directory defined by the `rules.rules_folder` option in the configuration file.
232 | 
233 | Suppose you want to categorize a transaction where the counterparty is "Fresh Food Inc." under the account `Expenses:Grocery` when importing the data into Beancount.
234 | 
235 | Given the following CSV transaction:
236 | 
237 | ```
238 | 04.11.2020;04.11.2020;Direct Debit;"Fresh Food Inc.";-21,30;EUR;0000001;UK0000001444555
239 | ```
240 | 
241 | You would follow these steps:
242 | 
243 | 1. Add the `Replace_Expense` rule to the list of rules in the configuration file for the relevant financial institution.
244 | 2. In the `account.rules` lookup file, add the following entry:
245 | 
246 | ```
247 | Fresh Food Inc.;equals;Expenses:Groceries
248 | ```
249 | 
250 | This will ensure that any transaction with "Fresh Food Inc." as the counterparty will be assigned to the `Expenses:Grocery` account in your Beancount ledger.
251 | 
252 | 
253 | #### Replace_Asset
254 | 
255 | 
256 | The `Replace_Asset` rule assigns an "origin" account to a transaction based on the value of the `account` index in a CSV file. 
257 | This rule is useful for ensuring that transactions are recorded with the correct source account in Beancount.
258 | 
259 | The `Replace_Asset` rule is automatically added to the ruleset, even if it is not explicitly declared in the configuration file.
260 | 
261 | ##### Origin Account Resolution
262 | 
263 | The rule can resolve the origin account in two ways:
264 | 
265 | 1. Using a lookup file named `asset.rules`, located in the directory defined by the `rules.rules_folder` option in the config file.
266 | 2. Using the `rules.origin_account` property specified directly in the configuration file.
267 | 
268 | Suppose you want to import the following CSV transaction and assign the origin account as `Assets:Jim:Current`:
269 | 
270 | ```
271 | 04.11.2020;04.11.2020;Direct Debit;"Fresh Food Inc.";-21,30;EUR;0000001;UK0000001444555
272 | ```
273 | 
274 | ##### Steps:
275 | 
276 | 1. Create an `asset.rules` lookup file and add the following entry:
277 | 
278 | ```
279 | value;expression;result 
280 | UK0000001444555;equals;Assets:Jim:Current
281 | ```
282 | 
283 | This entry will match the `account` index value (`UK0000001444555`) and assign the origin account as `Assets:Jim:Current` in your Beancount ledger. 
284 | If no match is found, the rule will default to `Assets:Unknown`.
285 | 
286 | #### Handling Missing `account` Index
287 | 
288 | If the CSV file does not contain an `account` index, you can specify the account directly in the configuration file by using the `account` property:
289 | 
290 | ```yaml
291 | --- !Config
292 | ...
293 | rules:
294 |   account: UK0000001444555
295 | ```
296 | 
297 | This will assign the account `Assets:Jim:Current` to all transactions in the CSV file, regardless of the actual account value in the CSV.
298 | 
299 | Alternatively, you can set the `origin_account` property in the `rules` block and skip this rule completely.
300 | 
301 | ```yaml
302 | --- !Config
303 | ...
304 | rules:
305 |   origin_account: Assets:Jim:Current
306 | ```
307 | 
308 | #### Set_Accounts
309 | 
310 | Assigns an "origin" account to a transaction, based on value of the `account` index of a CSV file row.
311 | This rule is useful to assign the correct source account of a CSV transaction. This rule is **implicitly added** to the ruleset, even if it doesn't get declared
312 |     
313 | The rule can resolve the origin account in two ways: 
314 | 
315 | - using a look-up file named `asset.rules` located in the directory defined by the `rules.rules_folder` option of the config file
316 | - using the value of the property `rules.origin_account` of the config file in use
317 | 
318 | As an example, let's take this CSV transaction. We want to import the transaction so that the origin account is set to `Assets:Jim:Current`.
319 |  
320 | ```
321 | 04.11.2020;04.11.2020;Direct Debit;"Fresh Food Inc.";-21,30;EUR;0000001;UK0000001444555
322 | ```
323 | 
324 | Add the `Replace_Asset` to the `ruleset` and create an `asset.rules` file. Add the following snippet to the `asset.rules` file:
325 | 
326 | ```
327 | value;expression;result
328 | UK0000001444555;equals;Assets:Jim:Current
329 | ```
330 | 
331 | The rule will match the value of the `account` CSV index (`UK0000001444555`) to `Assets:Jim:Current` and create the Beancount transaction. If no match is found, the rule will default to `Assets:Unknown`.
332 | 
333 | In a scenario where a CSV file does not contain any `account` index, it is possible to specify the account value by setting the `account` property in the config file in use.
334 | 
335 | ```
336 | --- !Config
337 | ...
338 | rules:
339 |   account: UK0000001444555
340 | ```
341 | 
342 | Note that in the majority of situations, it is more intuitive to set the `origin_account` property on the `rules` block and skip this rule completely.
343 | 
344 | ```
345 | --- !Config
346 | ...
347 | rules:
348 |   origin_account: Assets:Jim:Current
349 | ```
350 | 
351 | #### Set_Accounts
352 | 
353 | The `Set_Accounts` sets both  the **origin** and **destination** account for a given transaction, based on one or more values of a given CSV index.
354 | This rule is useful for transactions like ATM withdrawals, where both accounts need to be defined.
355 | 
356 | As an example, consider the following CSV transaction representing an ATM withdrawal:
357 | 
358 | ```
359 | 01.12.2020;01.11.2020;Cash Withdrawal;Bank Of Holland;-100;EUR;0000001;UK0000001444555
360 | ```
361 | 
362 | In this case, we want to set the **origin** account to `Assets:Jim:Current` and the **destination** account to `Assets:Jim:Cash`.
363 | 
364 | The `Set_Accounts` rule can be configured as follows:
365 | 
366 | ```
367 | - name: Set_Accounts
368 |   from: Assets:Jim:Current
369 |   to: Assets:Jim:Cash
370 |   csv_index: 2
371 |   csv_values: Cash Withdrawal
372 | ```
373 | 
374 | 
375 | - The rule points to `csv_index: 2`, which refers to the third column in the CSV (indexing starts from 0).
376 | - If the value at index 2 matches `Cash Withdrawal`, the origin account is set to `Assets:Jim:Current` and the destination account is set to `Assets:Jim:Cash`.
377 | 
378 | The `Set_Accounts` rule supports multiple `csv_values` separated by a semicolon (`;`). 
379 | If any of the specified values match, the rule is applied. 
380 | For example, if you want the rule to apply to different forms of "withdrawal" in multiple languages:
381 | 
382 | 
383 | ```
384 | - name: Set_Accounts
385 |       from: Assets:Jim:Current
386 |       to: Assets:Jim:Cash
387 |       csv_index: 2
388 |       csv_values: Cash Withdrawal;*Retiro*;*Ritiro*
389 | ```
390 | 
391 | - The `csv_values` are case-insensitive.
392 | - Wildcards are supported using `fnmatch`. In the example above, 
393 | the wildcard * is used to match any string that contains `Retiro` or `Ritiro`.
394 | 
395 | #### Ignore_By_Payee
396 | 
397 | The `Ignore_By_Payee` rule can be used to ignore transactions based on the value of the `counterparty` index in a CSV file. 
398 | This is useful when you want to exclude specific transactions from being imported into the ledger.
399 | 
400 | 
401 | Suppose you want to ignore any transactions where the counterparty is "Mc Donald" or "Best Shoes". You can configure the rule as follows:
402 | 
403 | ```
404 | - name: Ignore_By_Payee
405 |       ignore_payee:
406 |         - Mc Donald
407 |         - Best Shoes
408 | ```
409 | 
410 | The names of counterparties in the `ignore_payee` list are case-insensitive. This means both "Mc Donald" and "mc donald" would be matched and ignored.
411 | 
412 | 
413 | #### Ignore_By_StringAtPos
414 | 
415 | The `Ignore_By_StringAtPos` rule allows you to ignore a transaction based on the value found at a specific index in the CSV file. This is useful for filtering out transactions that meet specific criteria in a particular column.
416 | 
417 | ### Example
418 | 
419 | To ignore transactions where the value in index 4 (fifth column) matches `abc0102`, configure the rule like this:
420 | 
421 | ```yaml
422 | - name: Ignore_By_StringAtPos
423 |   ignore_string_at_pos: 
424 |     - abc0102;4
425 | ```
426 | - The index in the CSV file starts from `0`, so `4` refers to the fifth column.
427 | - The values specified in `ignore_string_at_pos` are case-insensitive, meaning `abc0102` and `ABC0102` would both be matched and ignored.
428 | 
429 | ### Custom rules
430 | 
431 | TODO
432 | 
433 | ### Advanced Duplicate Detection
434 | 
435 | Beanborg employs a simple duplicate detection method. When a transaction is imported into the ledger, the transaction CSV data are hashed and the hash is permanently associated
436 | to the ledger entry (using [transaction metadata](https://beancount.github.io/docs/beancount_language_syntax.html#metadata)).
437 | 
438 | Beanborg includes a robust duplicate detection mechanism to prevent importing the same transaction multiple times. This method works by hashing the transaction data from the CSV file and associating the resulting hash with the ledger entry using [transaction metadata](https://beancount.github.io/docs/beancount_language_syntax.html#metadata).
439 | 
440 | 
441 | #### Basic Duplicate Detection
442 | 
443 | When a transaction is imported, Beanborg generates a hash of the CSV data. For example, consider the following CSV entry:
444 | 
445 | ```
446 | 2019-03-17,2019-03-18,Überweisung,nick sammy,-520,00,IT389328932723787832,Personal,E-d3be986080315683eee5efbeb297243a,Gebucht,Privat
447 | ```
448 | 
449 | 
450 | The corresponding hash (`2454abe7257b2b40dfa9e5d24b6e16e7`) is stored in the ledger's metadata under the `md5` key. 
451 | If you attempt to import the same CSV row again, Beanborg detects that the hash already exists and rejects the transaction, preventing duplicates.
452 | 
453 | #### Handling Inconsistent Data
454 | 
455 | In practice, banks may modify transaction details in the CSV file after the first export. For example, consider the following modified entry:
456 | 
457 | ```
458 | 2019-03-17,2019-03-18,Überweisung,Nick Sammy,-520,00,IT389328932723787832,Personal,E-d3be986080315683eee5efbeb297243a,Gebucht,Privat
459 | ```
460 | 
461 | In this case, the payee’s name has changed from `nick sammy` to `Nick Sammy`. Since this small variation alters the transaction's hash, Beanborg would treat it as a different entry, bypassing the basic duplicate detection mechanism.
462 | 
463 | To address these inconsistencies, Beanborg implements a secondary, advanced duplicate detection system. In addition to hashing the transaction, it checks if a transaction with the **same date and amount** already exists in the ledger for the current account. If a potential duplicate is found, Beanborg prompts the user to confirm whether the transaction should be imported.
464 | 
465 | The advanced duplicate detection can be disabled by setting the `advanced_duplicate_detection` option to `false` in the account’s configuration file, allowing Beanborg to rely solely on hash-based detection.
466 | 
467 | ```yaml
468 | rules:
469 |   advanced_duplicate_detection: false
470 | ```
471 | 
472 | ### Machine Learning-Based Transaction Categorization
473 | 
474 | Beanborg integrates an advanced Machine Learning (ML) mechanism to automatically categorize transactions when rule-based categorization is not possible. This system ensures that transactions are accurately classified by leveraging both machine learning and, optionally, the ChatGPT API.
475 | 
476 | 
477 | #### How It Works
478 | 
479 | When Beanborg is unable to categorize a transaction through its predefined rules, it invokes an ML model trained on historical data to predict the most likely categories. This provides an additional layer of automation to reduce the need for manual intervention.
480 | 
481 | - **Top Predictions**: The system generates up to three category predictions using the ML model. These predictions are displayed to the user, who can select one of the suggested categories or manually assign a category if none of the suggestions are appropriate.
482 |   
483 | - **Optional GPT Integration**: If enabled, a fourth prediction is provided by querying the ChatGPT API, offering an AI-based suggestion that complements the ML model's predictions.
484 | 
485 | #### Prediction Workflow
486 | 
487 | The categorization workflow follows a structured process:
488 | 
489 | 1. **Transaction Evaluation**: If no rule matches a transaction, Beanborg invokes the ML model to generate category predictions.
490 | 2. **Top 3 ML Predictions**: The system displays the three most likely categories for the transaction based on the training dataset and the features extracted.
491 | 3. **User Interaction**: The user can choose one of the three ML-generated categories or manually assign a category if the predictions are not suitable.
492 | 4. **Optional GPT Suggestion**: If enabled, a fourth prediction generated by the ChatGPT API is displayed, offering an alternative suggestion.
493 | 5. **Dynamic Learning**: The system updates the training dataset based on the user's final choice, enabling continuous model improvement.
494 | 
495 | #### Enabling the ChatGPT API predictions
496 | 
497 | To enable the optional ChatGPT API-based prediction, follow these steps:
498 | 
499 | 1. Set the `OPENAI_API_KEY` environment variable with your OpenAI API key.
500 | 2. Update the configuration file to activate the feature by setting the `rules.use_llm` property to `true`.
501 | 
502 | Your configuration should look like this:
503 | 
504 | ```yaml
505 | rules:
506 |   use_llm: true
507 | ```
508 | 
509 | With these settings enabled, Beanborg will include an additional category prediction generated by the ChatGPT API alongside the machine learning model’s top predictions.
510 | 
511 | 


--------------------------------------------------------------------------------
/beanborg/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/luciano-fiandesio/beanborg/a2e3ddf5dfea1f23cf51e5bdaf930d5495616469/beanborg/__init__.py


--------------------------------------------------------------------------------
/beanborg/arg_parser.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | 
 3 | 
 4 | def eval_args(help_message):
 5 | 
 6 |     parser = argparse.ArgumentParser(description=help_message)
 7 | 
 8 |     parser.add_argument(
 9 |         "-f",
10 |         "--file",
11 |         help="Configuration file to load",
12 |         required=True,
13 |     )
14 | 
15 |     parser.add_argument(
16 |         "-v", "--debug", required=False, default=False, action="store_true"
17 |     )
18 | 
19 |     parser.add_argument(
20 |         "--fix-only",
21 |         required=False,
22 |         default=False,
23 |         action="store_true",
24 |         help="Only fix transactions without an account",
25 |     )
26 | 
27 |     args = parser.parse_args()
28 |     return args
29 | 


--------------------------------------------------------------------------------
/beanborg/bb_archive.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding: utf-8 -*-
 3 | 
 4 | __copyright__ = "Copyright (C) 2023  Luciano Fiandesio"
 5 | __license__ = "GNU GPLv2"
 6 | 
 7 | import csv
 8 | import os
 9 | import shutil
10 | import sys
11 | from datetime import datetime
12 | 
13 | from rich import print as rprint
14 | 
15 | from beanborg.arg_parser import eval_args
16 | from beanborg.config import init_config
17 | 
18 | 
19 | def main():
20 | 
21 |     args = eval_args("Archives imported CVS file")
22 |     config = init_config(args.file, args.debug)
23 | 
24 |     target_csv = os.path.join(config.csv.target, config.csv.ref + ".csv")
25 | 
26 |     if not os.path.isfile(target_csv):
27 |         rprint(f"[red]file: {target_csv}  does not exist![red]")
28 |         sys.exit(-1)
29 | 
30 |     if not os.path.isdir(config.csv.archive):
31 |         os.mkdir(config.csv.archive)
32 | 
33 |     dates = []
34 |     print("\u2713" + " detecting start and end date of transaction file...")
35 |     with open(target_csv) as csv_file:
36 |         csv_reader = csv.reader(csv_file, delimiter=config.csv.separator)
37 |         for _ in range(config.csv.skip):
38 |             next(csv_reader)  # skip the line
39 | 
40 |         for row in csv_reader:
41 |             try:
42 |                 dates.append(
43 |                     datetime.strptime(
44 |                         row[config.indexes.date].strip(), config.csv.date_format
45 |                     )
46 |                 )
47 |             except Exception as ex:
48 |                 print("error: " + str(ex))
49 | 
50 |     print("\u2713" + " moving file to archive...")
51 |     os.rename(
52 |         target_csv,
53 |         config.csv.archive
54 |         + "/"
55 |         + config.csv.ref
56 |         + "_"
57 |         + str(min(dates).date())
58 |         + "_"
59 |         + str(max(dates).date())
60 |         + ".csv",
61 |     )
62 | 
63 |     print("\u2713" + " removing temp folder")
64 |     shutil.rmtree(config.csv.target)
65 | 
66 | 
67 | if __name__ == "__main__":
68 |     main()
69 | 


--------------------------------------------------------------------------------
/beanborg/bb_import.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding: utf-8 -*-
 3 | 
 4 | __copyright__ = "Copyright (C) 2024  Luciano Fiandesio"
 5 | __license__ = "GNU GPLv2"
 6 | 
 7 | 
 8 | from beanborg.importer import Importer
 9 | 
10 | 
11 | def main():
12 |     imp = Importer()
13 |     imp.import_transactions()
14 | 
15 | 
16 | if __name__ == "__main__":
17 |     main()
18 | 


--------------------------------------------------------------------------------
/beanborg/bb_mover.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding: utf-8 -*-
 3 | 
 4 | __copyright__ = "Copyright (C) 2023  Luciano Fiandesio"
 5 | __license__ = "GNU GPLv2"
 6 | 
 7 | import glob
 8 | import os
 9 | import shutil
10 | import sys
11 | from subprocess import CalledProcessError, check_call
12 | 
13 | from rich import print as rprint
14 | 
15 | from beanborg.arg_parser import eval_args
16 | from beanborg.config import init_config
17 | 
18 | 
19 | def main():
20 | 
21 |     args = eval_args("Move bank csv file to processing folder")
22 |     config = init_config(args.file, args.debug)
23 |     current_dir = os.getcwd()
24 |     # support path like ~/Downloads
25 |     path = os.path.expanduser(config.csv.download_path)
26 |     if not os.path.isdir(path):
27 |         rprint(f"[red]folder: {config.csv.download_path} does not exist![/red]")
28 |         sys.exit(-1)
29 | 
30 |     if not os.path.isdir(config.csv.target):
31 |         os.mkdir(config.csv.target)
32 | 
33 |     # count number of files starting with:
34 |     file_count = len(glob.glob1(path, config.csv.name + "*"))
35 | 
36 |     if file_count > 1:
37 |         print(
38 |             "more than one file starting with % s found in %s. \
39 |               Can not continue ."
40 |             % (config.csv.name, config.csv.download_path)
41 |         )
42 |         sys.exit(-1)
43 | 
44 |     if file_count == 0:
45 |         rprint(
46 |             f"[red]No file found in [bold]{config.csv.download_path}[/bold] "
47 |             f"with name starting with: [bold]{config.csv.name}[/bold][/red]"
48 |         )
49 |         sys.exit(-1)
50 | 
51 |     if config.csv.post_script_path and not os.path.isfile(config.csv.post_script_path):
52 |         print("No post-move script found: %s" % (config.csv.post_script_path))
53 |         sys.exit(-1)
54 | 
55 |     for f in os.listdir(path):
56 |         if f.startswith(config.csv.name):
57 |             src = os.path.join(path, f)
58 |             moved_csv = os.path.join(config.csv.target, config.csv.ref + ".csv")
59 |             if config.csv.keep_original:
60 |                 shutil.copy(src, moved_csv)
61 |             else:
62 |                 os.rename(src, moved_csv)
63 | 
64 |             if config.csv.post_script_path:
65 |                 try:
66 |                     check_call(
67 |                         [
68 |                             config.csv.post_script_path,
69 |                             os.path.join(current_dir, moved_csv),
70 |                         ]
71 |                     )
72 |                 except CalledProcessError as e:
73 |                     rprint(
74 |                         "[red]An error occurred executing: %s\n%s[/red]"
75 |                         % (config.csv.post_script_path, str(e))
76 |                     )
77 |     print("Done :) ")
78 | 
79 | 
80 | if __name__ == "__main__":
81 |     main()
82 | 


--------------------------------------------------------------------------------
/beanborg/classification/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/luciano-fiandesio/beanborg/a2e3ddf5dfea1f23cf51e5bdaf930d5495616469/beanborg/classification/__init__.py


--------------------------------------------------------------------------------
/beanborg/classification/classifier.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | 
  3 | import pandas as pd
  4 | from beancount.core.data import Posting
  5 | from prompt_toolkit import prompt
  6 | from prompt_toolkit.key_binding import KeyBindings
  7 | from prompt_toolkit.keys import Keys
  8 | from rich import print
  9 | from rich.prompt import Confirm
 10 | 
 11 | from beanborg.classification.custom_fuzzy_wordf_completer import (
 12 |     CustomFuzzyWordCompleter,
 13 | )
 14 | from beanborg.classification.data_loader import DataLoader
 15 | from beanborg.classification.gpt_service import GPTService
 16 | from beanborg.classification.transaction_model import TransactionModel
 17 | from beanborg.classification.ui_service import UIService
 18 | from beanborg.utils.journal_utils import JournalUtils
 19 | from beanborg.utils.string_utils import StringUtils
 20 | 
 21 | 
 22 | class Classifier:
 23 | 
 24 |     def __init__(self, data="training_data.csv", use_llm=False, bc_file=None):
 25 |         self.trainingDataFile = data
 26 |         self.use_llm = use_llm
 27 |         self.bc_file = bc_file
 28 |         self.training_data = DataLoader.load_data(self.trainingDataFile)
 29 |         try:
 30 |             self.model = TransactionModel(self.training_data, data)
 31 |         except Exception as e:
 32 |             print(f"Error initializing TransactionModel: {e}")
 33 |             self.model = None
 34 | 
 35 |         self.gpt_service = GPTService(self.use_llm)
 36 |         self.ui_service = UIService()
 37 | 
 38 |     def has_no_category(self, tx, args) -> bool:
 39 |         return tx.postings[1].account == args.rules.default_expense
 40 | 
 41 |     def get_day_of_month(self, date):
 42 |         return pd.to_datetime(date).day
 43 | 
 44 |     def get_day_of_week(self, date):
 45 |         return pd.to_datetime(date).dayofweek
 46 | 
 47 |     def get_predictions(self, text, day_of_month, day_of_week):
 48 | 
 49 |         if self.model is None:
 50 |             return [], [], self.get_llm_prediction(text)
 51 | 
 52 |         # Use the TransactionModel for predictions
 53 |         top_labels, top_probs = self.model.predict(text, day_of_month, day_of_week)
 54 | 
 55 |         alternative_label = self.get_llm_prediction(text)
 56 | 
 57 |         return top_labels, top_probs, alternative_label
 58 | 
 59 |     def confirm_classification(self, txs, args):
 60 |         return Confirm.ask(
 61 |             f"\n[red]You have [bold]{txs.count_no_category(args.rules.default_expense)}[/bold] "
 62 |             f"transactions without category, do you want to fix them now?[/red]"
 63 |         )
 64 | 
 65 |     def get_llm_prediction(self, text):
 66 | 
 67 |         if self.use_llm:
 68 |             # This function queries the GPT service for a label prediction based on the provided text.
 69 |             # It uses the available accounts from the journal to help the GPT service make a more informed prediction.
 70 |             # If the GPT service is not available, it returns None.
 71 |             accounts = JournalUtils().get_accounts(self.bc_file)
 72 |             alternative_label = self.gpt_service.query_gpt_for_label(text, accounts)
 73 |         else:
 74 |             alternative_label = None
 75 | 
 76 |         return alternative_label
 77 | 
 78 |     def process_transaction(self, tx, index, txs, args):
 79 |         stripped_text = StringUtils.strip_digits(tx.payee.upper())
 80 |         day_of_month = self.get_day_of_month(tx.date)
 81 |         day_of_week = self.get_day_of_week(tx.date)
 82 | 
 83 |         top_labels, top_probs, chatgpt_prediction = self.get_predictions(
 84 |             stripped_text, day_of_month, day_of_week
 85 |         )
 86 |         self.ui_service.display_transaction(
 87 |             tx, top_labels, top_probs, chatgpt_prediction
 88 |         )
 89 | 
 90 |         selected_category = self.get_user_selection(
 91 |             top_labels, chatgpt_prediction, args
 92 |         )
 93 |         if selected_category is None:
 94 |             return "quit"
 95 |         elif selected_category:
 96 |             narration = self.get_user_narration()
 97 |             self.update_transaction(tx, index, txs, selected_category, narration)
 98 |             amount = tx.postings[0].units.number
 99 |             if selected_category != args.rules.default_expense:
100 |                 if self.model is not None:
101 |                     self.model.update_training_data(
102 |                         tx.date,
103 |                         stripped_text,
104 |                         amount,
105 |                         selected_category,
106 |                         day_of_month,
107 |                         day_of_week,
108 |                     )
109 |                 else:
110 |                     row = pd.DataFrame(
111 |                         {
112 |                             "date": [tx.date],
113 |                             "desc": [stripped_text],
114 |                             "amount": [amount],
115 |                             "cat": [selected_category],
116 |                         }
117 |                     )
118 |                     DataLoader.add_training_row(self, self.trainingDataFile, row)
119 | 
120 |             return "continue"
121 | 
122 |     def get_user_narration(self):
123 |         narration = input(
124 |             "Enter a comment for the transaction (press Enter to skip): "
125 |         ).strip()
126 |         return narration if narration else None
127 | 
128 |     def get_user_selection(self, top_labels, chatgpt_prediction, args):
129 |         options = len(top_labels) + (1 if chatgpt_prediction else 0)
130 |         if options == 0:
131 |             return self.handle_custom_input(args)
132 | 
133 |         while True:
134 |             selected_number = input(
135 |                 f"Enter your selection (1-{options}, or 'Enter' to choose the category, 'q' to quit): "
136 |             )
137 |             if selected_number.lower() == "q":
138 |                 return None
139 |             if selected_number.isdigit():
140 |                 return self.handle_numeric_selection(
141 |                     int(selected_number), top_labels, chatgpt_prediction
142 |                 )
143 |             return self.handle_custom_input(args)
144 | 
145 |     def handle_numeric_selection(self, selected_number, top_labels, chatgpt_prediction):
146 |         if chatgpt_prediction and selected_number == len(top_labels) + 1:
147 |             return chatgpt_prediction
148 |         elif 1 <= selected_number <= len(top_labels):
149 |             return top_labels[selected_number - 1]
150 |         return None
151 | 
152 |     def handle_custom_input(self, args):
153 |         accounts = JournalUtils().get_accounts(args.rules.bc_file)
154 |         account_completer = CustomFuzzyWordCompleter(accounts)
155 |         kb = self.create_key_bindings()
156 |         selected_category = prompt(
157 |             "Enter account: ",
158 |             completer=account_completer,
159 |             complete_while_typing=True,
160 |             key_bindings=kb,
161 |             default=args.rules.default_expense,
162 |         )
163 |         if selected_category not in accounts:
164 |             print(
165 |                 "[bold red]Invalid account. Please select a valid account.[/bold red]"
166 |             )
167 |             return self.handle_custom_input(args)
168 |         return selected_category
169 | 
170 |     def create_key_bindings(self):
171 |         kb = KeyBindings()
172 | 
173 |         @kb.add(Keys.Backspace)
174 |         def _(event):
175 |             event.current_buffer.delete_before_cursor(count=1)
176 |             event.current_buffer.start_completion(select_first=False)
177 | 
178 |         return kb
179 | 
180 |     def update_transaction(self, tx, index, txs, category, narration=None):
181 |         posting = Posting(category, None, None, None, None, None)
182 |         new_postings = [tx.postings[0]] + [posting]
183 |         new_tx = tx._replace(postings=new_postings)
184 |         if narration:
185 |             new_tx = new_tx._replace(narration=narration)
186 |         txs.getTransactions()[index] = new_tx
187 | 
188 |     def classify(self, txs, args):
189 |         if not self.confirm_classification(txs, args):
190 |             return
191 | 
192 |         for i, tx in enumerate(txs.getTransactions()):
193 |             if self.has_no_category(tx, args):
194 |                 result = self.process_transaction(tx, i, txs, args)
195 |                 if result == "quit":
196 |                     break
197 | 


--------------------------------------------------------------------------------
/beanborg/classification/custom_fuzzy_wordf_completer.py:
--------------------------------------------------------------------------------
 1 | from prompt_toolkit.completion import Completion, FuzzyWordCompleter
 2 | 
 3 | 
 4 | class CustomFuzzyWordCompleter(FuzzyWordCompleter):
 5 |     def get_completions(self, document, complete_event):
 6 |         word_before_cursor = document.get_word_before_cursor(WORD=True)
 7 |         for word in self.words:
 8 |             if word.lower().startswith(word_before_cursor.lower()):
 9 |                 yield Completion(word, start_position=-len(word_before_cursor))
10 | 


--------------------------------------------------------------------------------
/beanborg/classification/data_loader.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | import pandas as pd
 4 | 
 5 | 
 6 | class DataLoader:
 7 |     @staticmethod
 8 |     def load_data(filepath: str) -> pd.DataFrame:
 9 | 
10 |         expanded_filepath = os.path.expanduser(filepath)
11 |         if not os.path.exists(expanded_filepath):
12 |             os.makedirs(os.path.dirname(expanded_filepath), exist_ok=True)
13 |             with open(expanded_filepath, "w") as f:
14 |                 f.write("date,desc,amount,cat\n")
15 | 
16 |         data = pd.read_csv(filepath)
17 |         data["date"] = pd.to_datetime(data["date"], format="%Y-%m-%d")
18 |         data["day_of_month"] = pd.to_datetime(data["date"], errors="coerce").dt.day
19 |         data["day_of_week"] = pd.to_datetime(data["date"], errors="coerce").dt.dayofweek
20 |         data["desc"] = data["desc"].astype(str)
21 |         return data
22 | 
23 |     @staticmethod
24 |     def add_training_row(self, filepath: str, row: pd.Series):
25 |         expanded_filepath = os.path.expanduser(filepath)
26 |         if os.path.exists(expanded_filepath):
27 |             data = pd.read_csv(filepath)
28 |             data = pd.concat([data, row], ignore_index=True)
29 |             data.to_csv(filepath, index=False)
30 | 


--------------------------------------------------------------------------------
/beanborg/classification/gpt_service.py:
--------------------------------------------------------------------------------
 1 | from typing import List
 2 | 
 3 | from openai import AuthenticationError, OpenAI
 4 | 
 5 | 
 6 | class GPTService:
 7 |     def __init__(self, use_llm: bool):
 8 |         if use_llm:
 9 |             try:
10 |                 self.client = OpenAI()
11 |                 # Test the API key by making a simple request
12 |                 self.client.models.list()
13 |             except AuthenticationError:
14 |                 self.client = None
15 |                 print("OpenAI API key is invalid or not set.")
16 |             except Exception as e:
17 |                 self.client = None
18 |                 print(f"Failed to initialize OpenAI client: {str(e)}")
19 | 
20 |     def query_gpt_for_label(self, description: str, labels: List[str]) -> str:
21 |         if not self.client:
22 |             return "OpenAI not available"
23 | 
24 |         try:
25 |             response = self.client.chat.completions.create(
26 |                 model="gpt-4",
27 |                 messages=[
28 |                     {
29 |                         "role": "system",
30 |                         "content": "You are 'TransactionBud' a helpful and concise utility designed to categorize bank transactions efficiently. Your primary function is to assign a category to each transaction presented to you",
31 |                     },
32 |                     {
33 |                         "role": "user",
34 |                         "content": f"Given the description '{description}', what would be the most appropriate category among the following: {', '.join(labels)}? Only output the category name without any additional text.",
35 |                     },
36 |                 ],
37 |                 temperature=0.7,
38 |                 top_p=1,
39 |             )
40 |             return response.choices[0].message.content
41 |         except Exception as e:
42 |             print(f"Failed to query GPT: {str(e)}")
43 |             return "OpenAI not available"
44 | 


--------------------------------------------------------------------------------
/beanborg/classification/transaction_model.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | 
  3 | import numpy as np
  4 | import pandas as pd
  5 | from imblearn.over_sampling import SMOTE
  6 | from imblearn.pipeline import Pipeline as ImbPipeline
  7 | from imblearn.pipeline import make_pipeline
  8 | from sklearn.compose import ColumnTransformer
  9 | from sklearn.feature_extraction.text import CountVectorizer
 10 | from sklearn.neighbors import KNeighborsClassifier
 11 | from sklearn.preprocessing import LabelEncoder, StandardScaler
 12 | 
 13 | 
 14 | class TransactionModel:
 15 |     def __init__(self, training_data, data_file):
 16 |         self.training_data = training_data
 17 |         self.data_file = data_file
 18 |         self._create_and_fit_model()
 19 | 
 20 |     def _remove_single_sample_classes(self, X, y):
 21 |         class_counts = y.value_counts()
 22 |         classes_to_keep = class_counts[class_counts >= 2].index
 23 |         mask = y.isin(classes_to_keep)
 24 |         return X[mask], y[mask]
 25 | 
 26 |     def _create_and_fit_model(self):
 27 |         X = self.training_data[["desc", "day_of_month", "day_of_week"]]
 28 |         y = self.training_data["cat"]
 29 | 
 30 |         # Remove classes with only one sample
 31 |         X, y = self._remove_single_sample_classes(X, y)
 32 | 
 33 |         # Encode target labels
 34 |         self.label_encoder = LabelEncoder()
 35 |         y_encoded = self.label_encoder.fit_transform(y)
 36 | 
 37 |         # Create feature processing pipeline
 38 |         feature_pipeline = ColumnTransformer(
 39 |             [
 40 |                 (
 41 |                     "text",
 42 |                     make_pipeline(
 43 |                         CountVectorizer(analyzer=str.split),  # Removed token_pattern
 44 |                         StandardScaler(with_mean=False),
 45 |                     ),
 46 |                     "desc",
 47 |                 ),
 48 |                 ("num", StandardScaler(), ["day_of_month", "day_of_week"]),
 49 |             ]
 50 |         )
 51 | 
 52 |         # Create KNN classifier
 53 |         n_neighbors = min(5, len(y) - 1)
 54 |         knn = KNeighborsClassifier(n_neighbors=n_neighbors)
 55 | 
 56 |         # Create pipeline with SMOTE
 57 |         self.model = ImbPipeline(
 58 |             [
 59 |                 ("features", feature_pipeline),
 60 |                 ("smote", SMOTE(k_neighbors=min(5, min(y.value_counts()) - 1))),
 61 |                 ("classifier", knn),
 62 |             ]
 63 |         )
 64 | 
 65 |         # Fit the model
 66 |         self.model.fit(X, y_encoded)
 67 | 
 68 |     def predict(self, text, day_of_month, day_of_week, n=3):
 69 |         # Create a DataFrame for the input text with the same structure as the training data
 70 |         data = {
 71 |             "desc": [text],
 72 |             "day_of_month": [day_of_month],
 73 |             "day_of_week": [day_of_week],
 74 |         }
 75 |         input_df = pd.DataFrame(data)
 76 | 
 77 |         # Predict the probabilities for the input DataFrame
 78 |         probs = self.model.predict_proba(input_df)
 79 | 
 80 |         # Get the indices of the top n probabilities
 81 |         top_indices = np.argsort(probs[0])[-n:][::-1]
 82 | 
 83 |         # Map indices to class labels and probabilities
 84 |         top_classes = self.label_encoder.classes_[top_indices]
 85 |         top_probabilities = probs[0][top_indices]
 86 | 
 87 |         return top_classes, top_probabilities
 88 | 
 89 |     def update_training_data(
 90 |         self, date, description, amount, category, day_of_month, day_of_week
 91 |     ):
 92 |         """
 93 |         Updates the training data with a new or existing entry and retrains the model.
 94 |         """
 95 | 
 96 |         tokenized_description = self._tokenize_description(description)
 97 | 
 98 |         # Check if the description already exists
 99 |         existing_entry = self.training_data[
100 |             self.training_data["desc"] == tokenized_description
101 |         ]
102 | 
103 |         if not existing_entry.empty:
104 |             existing_category = existing_entry["cat"].iloc[0]
105 | 
106 |             if existing_category != category:
107 |                 # Conflict found: Ask user how to handle the conflicting category
108 |                 self._handle_existing_entry_conflict(
109 |                     tokenized_description,
110 |                     existing_category,
111 |                     date,
112 |                     amount,
113 |                     category,
114 |                     day_of_month,
115 |                     day_of_week,
116 |                 )
117 |             else:
118 |                 # Entry already exists with the same category, no update needed
119 |                 print(
120 |                     f"Entry already exists with category '{existing_category}'. Skipping update."
121 |                 )
122 |                 return
123 |         else:
124 |             # Add a new entry
125 |             self._add_new_entry(
126 |                 date, tokenized_description, amount, category, day_of_month, day_of_week
127 |             )
128 |             print(f"New entry added: '{description}' with category '{category}'.")
129 | 
130 |         # Append the new data to the CSV file instead of rewriting it entirely
131 |         self._append_to_csv(
132 |             date, tokenized_description, amount, category, day_of_month, day_of_week
133 |         )
134 | 
135 |         self._create_and_fit_model()
136 | 
137 |     def _append_to_csv(
138 |         self, date, description, amount, category, day_of_month, day_of_week
139 |     ):
140 |         """
141 |         Append the new entry to the CSV file without overwriting the whole file.
142 |         Ensures a newline is present before appending the new entry.
143 |         """
144 |         new_data = pd.DataFrame(
145 |             {
146 |                 "date": [date],
147 |                 "desc": [description],
148 |                 "amount": [amount],
149 |                 "cat": [category],
150 |             }
151 |         )
152 | 
153 |         # Check if the file already exists
154 |         file_exists = os.path.isfile(self.data_file)
155 | 
156 |         # Ensure there's a newline at the end of the file before appending new data
157 |         if file_exists:
158 |             with open(self.data_file, "rb+") as f:
159 |                 f.seek(-1, os.SEEK_END)  # Move to the last byte
160 |                 last_char = f.read(1)
161 |                 if last_char != b"\n":  # Check if the last character is a newline
162 |                     f.write(b"\n")  # If not, add a newline
163 | 
164 |         # Now append the new data
165 |         new_data.to_csv(self.data_file, mode="a", header=False, index=False)
166 | 
167 |     def _handle_existing_entry_conflict(
168 |         self,
169 |         description,
170 |         existing_category,
171 |         date,
172 |         amount,
173 |         new_category,
174 |         day_of_month,
175 |         day_of_week,
176 |     ):
177 |         """
178 |         Handle the case where an entry with the same description exists but has a different category.
179 |         Allows the user to choose between updating, adding a new entry, or skipping.
180 |         """
181 |         print(
182 |             f"Description '{description}' already exists with category '{existing_category}'."
183 |         )
184 |         action = input(
185 |             "Choose action:\n"
186 |             "1. Update existing entry\n"
187 |             "2. Add new entry\n"
188 |             "3. Skip update\n"
189 |             "Enter choice (1/2/3): "
190 |         )
191 | 
192 |         if action == "1":
193 |             # Update the existing entry with the new category
194 |             self.training_data.loc[self.training_data["desc"] == description, "cat"] = (
195 |                 new_category
196 |             )
197 |             print(
198 |                 f"Existing entry for '{description}' updated to category '{new_category}'."
199 |             )
200 |         elif action == "2":
201 |             # Add a new entry despite the conflict
202 |             self._add_new_entry(
203 |                 date, description, amount, new_category, day_of_month, day_of_week
204 |             )
205 |             print(
206 |                 f"New entry added for '{description}' with category '{new_category}'."
207 |             )
208 |         else:
209 |             # Skip the update process
210 |             print("Update skipped.")
211 | 
212 |     def _add_new_entry(
213 |         self, date, description, amount, category, day_of_month, day_of_week
214 |     ):
215 |         """
216 |         Add a new entry to the training data.
217 |         """
218 |         new_data = pd.DataFrame(
219 |             {
220 |                 "date": [date],
221 |                 "desc": [description],
222 |                 "amount": [amount],
223 |                 "cat": [category],
224 |                 "day_of_month": [day_of_month],
225 |                 "day_of_week": [day_of_week],
226 |             }
227 |         )
228 |         self.training_data = pd.concat(
229 |             [self.training_data, new_data], ignore_index=True
230 |         )
231 | 
232 |     def _tokenize_description(self, description):
233 |         """
234 |         Tokenize the description using CountVectorizer.
235 |         """
236 |         vectorizer = CountVectorizer(analyzer=str.split)
237 |         tokens = vectorizer.build_analyzer()(description)
238 |         return " ".join(tokens)
239 | 


--------------------------------------------------------------------------------
/beanborg/classification/ui_service.py:
--------------------------------------------------------------------------------
 1 | from typing import List
 2 | 
 3 | from beancount.parser.printer import format_entry
 4 | from rich import box
 5 | from rich.console import Console
 6 | from rich.panel import Panel
 7 | from rich.syntax import Syntax
 8 | 
 9 | 
10 | class UIService:
11 |     @staticmethod
12 |     def display_transaction(
13 |         tx, top_labels: List[str], top_probs: List[float], chatgpt_prediction: str
14 |     ):
15 |         console = Console()
16 |         console.clear()
17 | 
18 |         # Convert the transaction to a string and apply syntax highlighting
19 |         tx_str = format_entry(tx)
20 |         highlighted_tx = Syntax(tx_str, "python", theme="monokai", line_numbers=False)
21 | 
22 |         tx_panel = Panel(
23 |             highlighted_tx,
24 |             title="Transaction",
25 |             width=80,
26 |             expand=False,
27 |             border_style="cyan",
28 |             box=box.ROUNDED,
29 |         )
30 | 
31 |         predictions_content = ["Top 3 predictions:"]
32 |         for i, (label, prob) in enumerate(zip(top_labels, top_probs), 1):
33 |             predictions_content.append(
34 |                 f"[bold cyan]{i}.[/] [cyan]{label}[/] ({prob:.2f})"
35 |             )
36 |         if chatgpt_prediction:
37 |             predictions_content.append(
38 |                 f"[bold cyan]{len(top_labels) + 1}.[/] ChatGPT: [cyan]{chatgpt_prediction}[/]"
39 |             )
40 | 
41 |         console.print(tx_panel)
42 | 
43 |         # Only print the predictions panel if there are predictions to show
44 |         if predictions_content and (len(predictions_content) > 1 or chatgpt_prediction):
45 |             pred_panel = Panel(
46 |                 "\n".join(predictions_content),
47 |                 title="Predictions",
48 |                 width=80,
49 |                 expand=False,
50 |                 border_style="magenta",
51 |                 box=box.ROUNDED,
52 |             )
53 |             console.print(pred_panel)
54 | 


--------------------------------------------------------------------------------
/beanborg/config.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import sys
  3 | 
  4 | import yaml
  5 | 
  6 | 
  7 | class Rules:
  8 |     def __init__(
  9 |         self,
 10 |         bc_file=None,
 11 |         rules_folder=None,
 12 |         account=None,
 13 |         currency=None,
 14 |         default_expense=None,
 15 |         force_negative=None,
 16 |         invert_negative=None,
 17 |         origin_account=None,
 18 |         ruleset=[],
 19 |         advanced_duplicate_detection=None,
 20 |         training_data=None,
 21 |         use_llm=None,
 22 |     ):
 23 |         self.bc_file = bc_file
 24 |         self.rules_folder = rules_folder
 25 |         self.account = account
 26 |         self.currency = currency
 27 |         self.default_expense = default_expense
 28 |         self.force_negative = force_negative
 29 |         self.invert_negative = invert_negative
 30 |         self.origin_account = origin_account
 31 |         self.ruleset = ruleset
 32 |         self.advanced_duplicate_detection = advanced_duplicate_detection
 33 |         self.training_data = training_data
 34 |         self.use_llm = use_llm
 35 | 
 36 | 
 37 | class Indexes:
 38 |     def __init__(
 39 |         self,
 40 |         date=None,
 41 |         counterparty=None,
 42 |         amount=None,
 43 |         account=None,
 44 |         currency=None,
 45 |         tx_type=None,
 46 |         amount_in=None,
 47 |         narration=None,
 48 |     ):
 49 |         self.date = date
 50 |         self.counterparty = counterparty
 51 |         self.amount = amount
 52 |         self.account = account
 53 |         self.currency = currency
 54 |         self.tx_type = tx_type
 55 |         self.amount_in = amount_in
 56 |         self.narration = narration
 57 | 
 58 | 
 59 | class Csv:
 60 |     def __init__(
 61 |         self,
 62 |         download_path,
 63 |         name,
 64 |         ref,
 65 |         separator=None,
 66 |         date_format=None,
 67 |         skip=None,
 68 |         target=None,
 69 |         archive=None,
 70 |         post_script_path=None,
 71 |         keep_original=None,
 72 |     ):
 73 |         self.download_path = download_path
 74 |         self.name = name
 75 |         self.ref = ref
 76 |         self.separator = separator
 77 |         self.date_format = date_format
 78 |         self.skip = skip
 79 |         self.target = target
 80 |         self.archive = archive
 81 |         self.post_script_path = post_script_path
 82 |         self.keep_original = keep_original
 83 | 
 84 | 
 85 | class Config:
 86 |     def __init__(self, csv, indexes, rules, debug=False):
 87 |         self.csv = csv
 88 |         self.indexes = indexes
 89 |         self.rules = rules
 90 |         self.debug = debug
 91 | 
 92 |     def load(loader, node):
 93 |         values = loader.construct_mapping(node, deep=True)
 94 | 
 95 |         csv_data = values["csv"]
 96 | 
 97 |         csv = Csv(
 98 |             csv_data["download_path"],
 99 |             csv_data["name"],
100 |             csv_data["bank_ref"],
101 |             csv_data.get("separator", ","),
102 |             csv_data["date_format"],
103 |             csv_data.get("skip", 1),
104 |             csv_data.get("target", "tmp"),
105 |             csv_data.get("archive_path", "archive"),
106 |             csv_data.get("post_move_script"),
107 |             csv_data.get("keep_original", False),
108 |         )
109 | 
110 |         idx = values.get("indexes", dict())
111 | 
112 |         indexes = Indexes(
113 |             idx.get("date", 0),
114 |             idx.get("counterparty", 3),
115 |             idx.get("amount", 4),
116 |             idx.get("account", 1),
117 |             idx.get("currency", 5),
118 |             idx.get("tx_type", 2),
119 |             idx.get("amount_in", None),
120 |             idx.get("narration", None),
121 |         )
122 | 
123 |         rls = values.get("rules", dict())
124 | 
125 |         rules = Rules(
126 |             rls.get("beancount_file", "main.ldg"),
127 |             rls.get("rules_folder", "rules"),
128 |             rls.get("account", None),
129 |             rls.get("currency", None),
130 |             rls.get("default_expense", "Expenses:Unknown"),
131 |             rls.get("force_negative", False),
132 |             rls.get("invert_negative", False),
133 |             rls.get("origin_account", None),
134 |             rls.get("ruleset", []),
135 |             rls.get("advanced_duplicate_detection", True),
136 |             rls.get("training_data", "training_data.csv"),
137 |             rls.get("use_llm", False),
138 |         )
139 | 
140 |         return Config(csv, indexes, rules)
141 | 
142 | 
143 | def init_config(file, debug):
144 | 
145 |     yaml.add_constructor("!Config", Config.load)
146 | 
147 |     if not os.path.isfile(file):
148 |         print("file: %s does not exist!" % (file))
149 |         sys.exit(-1)
150 | 
151 |     with open(file, "r") as file:
152 |         try:
153 |             config = yaml.load(file, Loader=yaml.FullLoader)
154 |         except yaml.scanner.ScannerError:
155 |             print("file: %s is malformed, please check" % (file.name))
156 |             sys.exit(-1)
157 | 
158 |     config.debug = debug
159 |     return config
160 | 


--------------------------------------------------------------------------------
/beanborg/handlers/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/luciano-fiandesio/beanborg/a2e3ddf5dfea1f23cf51e5bdaf930d5495616469/beanborg/handlers/__init__.py


--------------------------------------------------------------------------------
/beanborg/handlers/amount_handler.py:
--------------------------------------------------------------------------------
 1 | __copyright__ = "Copyright (C) 2022  Luciano Fiandesio"
 2 | __license__ = "GNU GPLv2"
 3 | 
 4 | from beancount.core.number import D
 5 | 
 6 | 
 7 | class AmountHandler:
 8 | 
 9 |     # create mapping tables for currency conversion
10 |     sign_trans = str.maketrans({"$": "", " ": ""})  # remove $ and space
11 |     dot_trans = str.maketrans({".": "", ",": ""})  # remove . and ,
12 | 
13 |     def handle(self, val, args):
14 | 
15 |         if args.indexes.amount_in:
16 |             return self.__convert(val[args.indexes.amount_in].strip()) - self.__convert(
17 |                 val
18 |             )
19 | 
20 |         if args.rules.invert_negative and val[0] == "-":
21 |             val = val.replace("-", "+")
22 | 
23 |         if args.rules.force_negative == 1 and val[0].isdigit():
24 |             val = "-" + val
25 | 
26 |         return self.__convert(val)
27 | 
28 |     def __convert(self, num, sign_trans=sign_trans, dot_trans=dot_trans):
29 |         """
30 |         Converts the given string into a decimal, where the last
31 |         two digits are always assumed to be the decimals:
32 | 
33 |         "22 000,76"      -> 22000.76
34 |         "22.000,76"      -> 22000.76
35 |         "22,000.76"      -> 22000.76
36 |         "1022000,76"     -> 1022000.76
37 |         "-1,022,000.76", -> -1022000.76
38 |         "1022000",       -> 1022000.0
39 |         "22 000,76$",    -> 22000.76
40 |         "$22 000,76"     -> 22000.76
41 | 
42 |         """
43 | 
44 |         num = num.translate(sign_trans)
45 |         num = num[:-3].translate(dot_trans) + num[-3:]
46 |         return D(num.replace(",", "."))
47 | 


--------------------------------------------------------------------------------
/beanborg/importer.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | import csv
  3 | import os
  4 | import re
  5 | import sys
  6 | import traceback
  7 | from dataclasses import dataclass
  8 | from datetime import datetime, timedelta
  9 | from random import SystemRandom
 10 | 
 11 | from beancount.core.data import Amount
 12 | from beancount.parser.printer import format_entry
 13 | from rich import print as rprint
 14 | from rich.table import Table
 15 | 
 16 | from beanborg.arg_parser import eval_args
 17 | from beanborg.classification.classifier import Classifier
 18 | from beanborg.config import init_config
 19 | from beanborg.handlers.amount_handler import AmountHandler
 20 | from beanborg.model.transactions import Transactions
 21 | from beanborg.rule_engine.Context import Context
 22 | from beanborg.rule_engine.rules_engine import RuleEngine
 23 | from beanborg.utils.duplicate_detector import (
 24 |     hash_tuple,
 25 |     init_duplication_store,
 26 |     print_duplication_warning,
 27 |     to_tuple,
 28 | )
 29 | from beanborg.utils.hash_utils import hash
 30 | from beanborg.utils.journal_utils import JournalUtils
 31 | 
 32 | 
 33 | @dataclass
 34 | class ImportStats:
 35 |     tx_in_file: int = 0
 36 |     processed: int = 0
 37 |     error: int = 0
 38 |     no_category: int = 0
 39 |     hash_collision: int = 0
 40 |     ignored_by_rule: int = 0
 41 |     skipped_by_user: int = 0
 42 | 
 43 | 
 44 | class Importer:
 45 |     """
 46 |     Initialize the import rule engine using the arguments from
 47 |     the configuration file
 48 |     """
 49 | 
 50 |     def debug(self):
 51 |         """check if the importer is started using the debug flag
 52 | 
 53 |         Returns:
 54 |             boolean: is debug
 55 |         """
 56 |         return self.args.debug
 57 | 
 58 |     def log_error(self, row):
 59 |         """simple error logger"""
 60 |         print(f'CSV: {",".join(row)}')
 61 |         rprint("-" * 80)
 62 | 
 63 |     def __init__(self):
 64 |         self.stats = ImportStats()
 65 |         self.args = None
 66 |         self.accounts = set()
 67 |         self.txs = Transactions({})
 68 | 
 69 |     def gen_datetime(self, min_year=1900, max_year=datetime.now().year):
 70 |         """generate a datetime in format yyyy-mm-dd hh:mm:ss.000000"""
 71 |         start = datetime(min_year, 1, 1, 00, 00, 00)
 72 |         years = max_year - min_year + 1
 73 |         end = start + timedelta(days=365 * years)
 74 |         return start + (end - start) * SystemRandom.random(self)
 75 | 
 76 |     def init_rule_engine(self):
 77 |         """
 78 |         Initialize the import rule engine using the arguments from
 79 |         the configuration file
 80 |         """
 81 | 
 82 |         folder = self.args.rules.rules_folder
 83 | 
 84 |         if (
 85 |             len(self.args.rules.ruleset) > 1
 86 |             and not os.path.isfile(folder + "/asset.rules")
 87 |             and self.args.rules.account is None
 88 |             and self.args.rules.origin_account is None
 89 |         ):
 90 | 
 91 |             rprint(
 92 |                 "[red]Please specify an account in your config file "
 93 |                 "or create an entry in the asset.rules file[/red]"
 94 |             )
 95 |             sys.exit(-1)
 96 | 
 97 |         return RuleEngine(
 98 |             Context(
 99 |                 date_fomat=self.args.csv.date_format,
100 |                 default_expense=self.args.rules.default_expense,
101 |                 date_pos=self.args.indexes.date,
102 |                 payee_pos=self.args.indexes.counterparty,
103 |                 tx_type_pos=self.args.indexes.tx_type,
104 |                 account_pos=self.args.indexes.account,
105 |                 narration_pos=self.args.indexes.narration,
106 |                 account=self.args.rules.account,
107 |                 ruleset=self.args.rules.ruleset,
108 |                 rules_dir=folder,
109 |                 force_account=self.args.rules.origin_account,
110 |                 debug=self.args.debug,
111 |             )
112 |         )
113 | 
114 |     def print_summary(self):
115 |         table = Table(title="Import Summary")
116 |         table.add_column("Counter", style="magenta")
117 |         table.add_column("Value", style="green", justify="right")
118 |         table.add_row("csv tx count", str(self.stats.tx_in_file))
119 |         table.add_row("imported", str(self.stats.processed))
120 |         table.add_row("tx already present", str(self.stats.hash_collision))
121 |         table.add_row("tx ignored by rule", str(self.stats.ignored_by_rule))
122 |         table.add_row("tx skipped by user", str(self.stats.skipped_by_user))
123 | 
124 |         if self.stats.error > 0:
125 |             table.add_row("error", str(self.stats.error), style="red")
126 |         else:
127 |             table.add_row("error", str(self.stats.error))
128 |         table.add_row("tx without category", str(self.stats.no_category))
129 |         print("\n")
130 |         rprint(table)
131 | 
132 |     def get_account(self, row):
133 |         """get the account value for the given csv line
134 |         or use the specified account
135 |         """
136 |         if self.args.rules.account:
137 |             return self.args.rules.account
138 | 
139 |         return row[self.args.indexes.account]
140 | 
141 |     def get_currency(self, row):
142 |         """get the currency value for the given csv line or
143 |         use the specified currency
144 |         """
145 |         if self.args.rules.currency:
146 |             return self.args.rules.currency
147 |         return row[self.args.indexes.currency]
148 | 
149 |     def warn_hash_collision(self, row, md5):
150 |         rprint(
151 |             "[red]warning[/red]: "
152 |             "a transaction with identical hash exists in "
153 |             "the journal: "
154 |             f"[bold]{md5}[/bold]"
155 |         )
156 |         self.log_error(row)
157 |         self.stats.hash_collision += 1
158 | 
159 |     def fetch_account_transactions(self, account):
160 | 
161 |         account_file = account + ".ldg"
162 |         account_tx = (
163 |             init_duplication_store(account_file, self.args.rules.bc_file)
164 |             if self.args.rules.advanced_duplicate_detection
165 |             else {}
166 |         )
167 |         return account_tx
168 | 
169 |     def verify_accounts_count(self):
170 |         if len(self.accounts) > 1 and len(self.transactions) > 0:
171 |             rprint(
172 |                 "[red]Expecting only one account in csv"
173 |                 f"file, found: {str(len(self.accounts))}[/red]"
174 |             )
175 | 
176 |     def verify_unique_transactions(self, account):
177 | 
178 |         account_txs = self.fetch_account_transactions(account)
179 |         pre_trans = []
180 |         for key in sorted(self.txs.getTransactions()):
181 |             # check if the transaction being imported matches another
182 |             # existing transaction
183 |             # in the current ledger file.
184 |             tup = to_tuple(self.txs.getTransactions()[key])
185 |             if hash_tuple(tup) in account_txs:
186 |                 if print_duplication_warning(account_txs[hash_tuple(tup)]):
187 |                     pre_trans.append(self.txs[key])
188 |             else:
189 |                 pre_trans.append(self.txs.getTransactions()[key])
190 | 
191 |         return Transactions(pre_trans)
192 | 
193 |     def write_tx(self, file_handler, tx):
194 |         file_handler.write(format_entry(tx) + "\n")
195 | 
196 |     def write_to_ledger(self, account_file, transactions):
197 | 
198 |         with open(account_file, "a") as exc:
199 |             for tx in transactions:
200 |                 self.write_tx(exc, tx)
201 | 
202 |     def fix_uncategorized_tx(self):
203 |         """
204 |         Fix uncategorized transactions in the ledger file.
205 |         """
206 | 
207 |         # Get target account
208 |         account = self.args.rules.account
209 |         txs = JournalUtils().get_transactions_by_account_name(
210 |             self.args.rules.bc_file, account
211 |         )
212 |         # Get the filename of the first transaction
213 |         filename = txs[0].meta["filename"]
214 | 
215 |         # filter out txs that have already been categorized
216 |         txs = Transactions(
217 |             [
218 |                 tx
219 |                 for tx in txs
220 |                 if tx.postings[1].account == self.args.rules.default_expense
221 |             ]
222 |         )
223 |         Classifier(
224 |             self.args.rules.training_data,
225 |             self.args.rules.use_llm,
226 |             self.args.rules.bc_file,
227 |         ).classify(txs, self.args)
228 | 
229 |         with open(filename, "r") as file:
230 |             content = file.read()
231 |             for tx in txs.getTransactions():
232 |                 self.update_transaction(
233 |                     content, filename, tx.meta["md5"], tx.postings[1].account
234 |                 )
235 | 
236 |     def update_transaction(self, ledger_content, ledger_file, md5, new_category):
237 | 
238 |         # Find the transaction block with the given md5
239 |         pattern = rf'(.*?md5: "{md5}".*?Expenses:Unknown.*?\n\n)'
240 |         match = re.search(pattern, ledger_content, re.DOTALL)
241 | 
242 |         if match:
243 |             transaction_block = match.group(1)
244 | 
245 |             # Replace 'Expenses:Unknown' with the new category
246 |             updated_block = re.sub(
247 |                 r"(  Expenses:Unknown)", f"  {new_category}", transaction_block
248 |             )
249 | 
250 |             # Replace the old block with the updated one
251 |             updated_content = ledger_content.replace(transaction_block, updated_block)
252 | 
253 |             # Write the updated content back to the file
254 |             with open(ledger_file, "w") as file:
255 |                 file.write(updated_content)
256 |         else:
257 |             print(f"Skipping transaction with md5 {md5} not found.")
258 | 
259 |     def import_transactions(self):
260 | 
261 |         options = eval_args("Parse bank csv file and import into beancount")
262 |         self.args = init_config(options.file, options.debug)
263 | 
264 |         if options.fix_only:
265 |             self.fix_uncategorized_tx()
266 |             return
267 | 
268 |         # transactions csv file to import
269 |         import_csv = os.path.join(self.args.csv.target, f"{self.args.csv.ref}.csv")
270 | 
271 |         if not os.path.isfile(import_csv):
272 |             rprint("[red]file: %s does not exist![red]" % (import_csv))
273 |             sys.exit(-1)
274 | 
275 |         rule_engine = self.init_rule_engine()
276 |         tx_hashes = JournalUtils().transaction_hashes(self.args.rules.bc_file)
277 | 
278 |         with open(import_csv) as csv_file:
279 |             csv_reader = csv.reader(csv_file, delimiter=self.args.csv.separator)
280 |             for _ in range(self.args.csv.skip):
281 |                 next(csv_reader)  # skip the line
282 |             for row in csv_reader:
283 |                 self.stats.tx_in_file += 1
284 |                 try:
285 |                     # calculate hash of csv row
286 |                     md5 = hash(row)
287 | 
288 |                     # keep track of the accounts for each tx:
289 |                     # the system expects one account per imported file
290 |                     res_account = self.get_account(row)
291 |                     if self.debug():
292 |                         print("resolved account: " + str(res_account))
293 |                     self.accounts.add(res_account)
294 | 
295 |                     if md5 not in tx_hashes:
296 |                         self.process_tx(row, md5, rule_engine)
297 |                     else:
298 |                         self.warn_hash_collision(row, md5)
299 | 
300 |                 except Exception as e:
301 |                     print("error: " + str(e))
302 |                     self.log_error(row)
303 |                     self.stats.error += 1
304 |                     if self.debug():
305 |                         traceback.print_exc()
306 | 
307 |         self.verify_accounts_count()
308 |         working_account = self.accounts.pop()
309 |         filtered_txs = self.verify_unique_transactions(working_account)
310 | 
311 |         self.stats.skipped_by_user = self.txs.count() - filtered_txs.count()
312 |         self.stats.processed = filtered_txs.count()
313 | 
314 |         if filtered_txs.count_no_category(self.args.rules.default_expense) > 0:
315 |             Classifier(
316 |                 self.args.rules.training_data,
317 |                 self.args.rules.use_llm,
318 |                 self.args.rules.bc_file,
319 |             ).classify(filtered_txs, self.args)
320 | 
321 |         # write transactions to file
322 |         account_file = working_account + ".ldg"
323 |         self.write_to_ledger(account_file, filtered_txs.getTransactions())
324 |         self.print_summary()
325 | 
326 |     def validate(self, tx):
327 |         """
328 |         Handle the origin account: if the tx processed by the
329 |         rules engin has no origin account, try to assign one
330 |         from the property file: args.rules.origin_account
331 |         """
332 |         if tx.postings[0].account is None:
333 |             raise Exception(
334 |                 "Unable to resolve the origin account for this transaction, "
335 |                 "please check that the `Replace_Asset` rule "
336 |                 "is in use for this account or set the "
337 |                 " `origin_account` property "
338 |                 "in the config file."
339 |             )
340 | 
341 |         return tx
342 | 
343 |     def enrich(self, row, tx, tx_date, md5):
344 | 
345 |         tx_meta = {"csv": ",".join(row), "md5": md5}
346 | 
347 |         # replace date """
348 |         tx = tx._replace(date=str(tx_date.date()))
349 | 
350 |         # add md5 and csv """
351 |         tx = tx._replace(meta=tx_meta)
352 | 
353 |         # get a decimal, with the minus sign,
354 |         # if it's an expense
355 |         amount = AmountHandler().handle(
356 |             row[self.args.indexes.amount].strip(), self.args
357 |         )
358 |         # add units (how much was spent)
359 |         new_posting = tx.postings[0]._replace(
360 |             units=Amount(amount, self.get_currency(row))
361 |         )
362 |         tx = tx._replace(postings=[new_posting] + [tx.postings[1]])
363 | 
364 |         # add narration
365 |         if self.args.indexes.narration:
366 |             tx = tx._replace(narration=row[self.args.indexes.narration].strip())
367 | 
368 |         if self.debug():
369 |             print(tx)
370 | 
371 |         return tx
372 | 
373 |     def process_tx(self, row, md5, rule_engine):
374 | 
375 |         tx = rule_engine.execute(row)
376 | 
377 |         if tx:
378 |             # check if the a category is assigned
379 |             if tx.postings[1].account == self.args.rules.default_expense:
380 |                 self.stats.no_category += 1
381 | 
382 |             tx_date = datetime.strptime(
383 |                 row[self.args.indexes.date].strip(), self.args.csv.date_format
384 |             )
385 | 
386 |             tx = self.validate(self.enrich(row, tx, tx_date, md5))
387 | 
388 |             # generate a key based on:
389 |             # - the tx date
390 |             # - a random time (tx time is not important, but date is!)
391 |             key = str(tx_date) + str(self.gen_datetime().time())
392 |             self.txs.getTransactions()[key] = tx
393 | 
394 |         else:
395 |             self.stats.ignored_by_rule += 1
396 | 


--------------------------------------------------------------------------------
/beanborg/model/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/luciano-fiandesio/beanborg/a2e3ddf5dfea1f23cf51e5bdaf930d5495616469/beanborg/model/__init__.py


--------------------------------------------------------------------------------
/beanborg/model/transactions.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | 
 4 | class Transactions:
 5 |     __transactions = {}
 6 | 
 7 |     def __init__(self, transactions):
 8 |         self.__transactions = transactions
 9 | 
10 |     def count_no_category(self, default_expense) -> int:
11 |         txs = []
12 |         for tx in self.__transactions:
13 |             if tx.postings[1].account == default_expense:
14 |                 txs.append(tx)
15 | 
16 |         return len(txs)
17 | 
18 |     def count(self) -> int:
19 |         return len(self.__transactions)
20 | 
21 |     def getTransactions(self):
22 |         return self.__transactions
23 | 


--------------------------------------------------------------------------------
/beanborg/rule_engine/Context.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | from dataclasses import dataclass
 4 | 
 5 | 
 6 | @dataclass
 7 | class Context:
 8 |     # ruleset
 9 |     ruleset: []
10 |     # custom rules folder
11 |     rules_dir: str
12 |     # the date format used in the CSV file
13 |     date_fomat: str
14 |     # the default account (Expense) to use for a the second "leg" of a
15 |     # transaction
16 |     default_expense: str
17 |     # the index of the date field in the csv file
18 |     date_pos: int
19 |     # # the index of the counterparty field in the csv file
20 |     payee_pos: int
21 |     # the index of the transaction type field in the csv file
22 |     tx_type_pos: int
23 |     # the index of the account id field in the csv file
24 |     account_pos: int
25 |     # the index of the narration field in the csv file
26 |     narration_pos: int
27 |     # if the CSV file has no account id, use "account" to lookup the Account
28 |     # Origin when using the Replace_Asset rule
29 |     account: str
30 |     # Force the Account Origin to the value specifed
31 |     force_account: str
32 |     # Output debug info
33 |     debug: bool
34 | 


--------------------------------------------------------------------------------
/beanborg/rule_engine/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/luciano-fiandesio/beanborg/a2e3ddf5dfea1f23cf51e5bdaf930d5495616469/beanborg/rule_engine/__init__.py


--------------------------------------------------------------------------------
/beanborg/rule_engine/decision_tables.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | import csv
 4 | import os
 5 | 
 6 | 
 7 | def init_decision_table(file, debug=False):
 8 |     table = {}
 9 |     tablefile = os.path.join(os.getcwd(), file)
10 |     if not os.path.isfile(tablefile) or os.stat(file).st_size == 0:
11 |         if debug:
12 |             print("The decision table file: " + file + " is missing or empty.")
13 |     else:
14 |         with open(tablefile) as csv_file:
15 |             csv_reader = csv.reader(decomment(csv_file), delimiter=";")
16 |             next(csv_reader)  # skip first line
17 |             for row in csv_reader:
18 |                 if any(row):
19 |                     if len(row) == 3:
20 |                         table[row[0]] = (row[1], row[2])
21 |                     else:
22 |                         print("invalid rule: " + ", ".join(row))
23 |     return table
24 | 
25 | 
26 | def decomment(csvfile):
27 |     for row in csvfile:
28 |         raw = row.split("#")[0].strip()
29 |         if raw:
30 |             yield row
31 | 
32 | 
33 | def resolve_from_decision_table(table, string, default):
34 | 
35 |     eq_check_func = {
36 |         "equals": _equals,
37 |         "equals_ic": _equals_ignore_case,
38 |         "startsWith": _startsWith,
39 |         "endsWith": _endsWith,
40 |         "contains": _contains,
41 |         "contains_ic": _contains_ignore_case,
42 |         "eq": _equals,
43 |         "sw": _startsWith,
44 |         "ew": _endsWith,
45 |         "co": _contains,
46 |     }
47 |     for k in table.keys():
48 |         t = table[k]
49 |         eq_check_type = t[0]
50 |         # TODO: do not fail if string (equals, contains, etc does not match)
51 |         if eq_check_func.get(eq_check_type)(string, k):
52 |             return t[1]
53 | 
54 |     return default
55 | 
56 | 
57 | def _equals(string_a, string_b):
58 |     return string_a == string_b
59 | 
60 | 
61 | def _equals_ignore_case(string_a, string_b):
62 |     return string_a.casefold() == string_b.casefold()
63 | 
64 | 
65 | def _startsWith(string_a, string_b):
66 |     return string_a.startswith(string_b)
67 | 
68 | 
69 | def _endsWith(string_a, string_b):
70 |     return string_a.endswith(string_b)
71 | 
72 | 
73 | def _contains(string_a, string_b):
74 |     return string_b in string_a
75 | 
76 | 
77 | def _contains_ignore_case(string_a, string_b):
78 |     return string_b.casefold() in string_a.casefold()
79 | 


--------------------------------------------------------------------------------
/beanborg/rule_engine/rules.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | 
  3 | import abc
  4 | import fnmatch
  5 | import os
  6 | import sys
  7 | 
  8 | from beancount.core.data import Posting
  9 | 
 10 | from .Context import Context
 11 | from .decision_tables import init_decision_table, resolve_from_decision_table
 12 | 
 13 | 
 14 | class LookUpCache:
 15 |     """
 16 |     Simple cache for lookup tables
 17 |     """
 18 | 
 19 |     cache = dict()
 20 | 
 21 |     @staticmethod
 22 |     def init_decision_table(key, path):
 23 | 
 24 |         if key in LookUpCache.cache:
 25 |             return LookUpCache.cache[key]
 26 | 
 27 |         data = init_decision_table(path)
 28 |         LookUpCache.cache[key] = data
 29 |         return data
 30 | 
 31 | 
 32 | class Rule:
 33 |     __metaclass__ = abc.ABCMeta
 34 | 
 35 |     def __init__(self, name: str, context: Context):
 36 |         self.name = name
 37 |         self.context = context
 38 | 
 39 |     @abc.abstractmethod
 40 |     def execute(self, csv_line, transaction=None, ruleDef=None):
 41 | 
 42 |         return
 43 | 
 44 |     def checkAccountFromTo(self, ruleDef):
 45 |         if ruleDef.get("from") is None or ruleDef.get("to") is None:
 46 |             raise Exception(
 47 |                 "Account from and to required for rule: {rule}".format(
 48 |                     rule=ruleDef.rule.__name__
 49 |                 )
 50 |             )
 51 | 
 52 |     def failIfAttributeMissing(self, ruleDef, attributeName):
 53 |         if ruleDef.get(attributeName) is None:
 54 |             raise Exception(
 55 |                 "Attribute {attribute_name} required for rule: {rule} ".format(
 56 |                     attribute_name=attributeName, rule=ruleDef.rule.__name__
 57 |                 )
 58 |             )
 59 | 
 60 | 
 61 | class Set_Accounts(Rule):
 62 |     """
 63 |     Assign a from/to asset or account to a transaction, depending on the
 64 |     value of a given cvs index.
 65 | 
 66 |     Rule attributes:
 67 |         name: rule name (Set_Accounts)
 68 |         from: asset or account
 69 |         to:   asset or account
 70 |         csv_index: csv row index to analyze (base-0)
 71 |         csv_values: semicolon delimited list of strings.
 72 |                     If any of the values matches the
 73 |                     value at the csv row's index, the from/to values
 74 |                     are assigned.
 75 |                     The string evaluation is case insensitive.
 76 | 
 77 |     Example:
 78 |         -  name: Set_Accounts
 79 |            from: Assets:Bank1:Bob:Savings
 80 |            to: Account:Groceries
 81 |            csv_index: 4
 82 |            csv_values: superfood;super_food;
 83 | 
 84 |     """
 85 | 
 86 |     def __init__(self, name, context):
 87 |         Rule.__init__(self, name, context)
 88 | 
 89 |     def execute(self, csv_line, tx, ruleDef=None):
 90 | 
 91 |         # current value at index for the current row
 92 |         # csv_field_val = csv_line[ruleDef.csv_index].lower()
 93 |         csv_field_val = csv_line[ruleDef.get("csv_index")].lower().strip()
 94 | 
 95 |         # values specified in the rule definition
 96 |         vals = ruleDef.get("csv_values").split(";")
 97 | 
 98 |         match = False
 99 |         for val in vals:
100 |             # Use fnmatch to allow wildcard matching
101 |             if fnmatch.fnmatch(csv_field_val, val.lower().strip()):
102 |                 match = True
103 |                 break
104 | 
105 |         if match:
106 |             newPosting = [
107 |                 Posting(
108 |                     account=ruleDef.get("from"),
109 |                     units=None,
110 |                     cost=None,
111 |                     price=None,
112 |                     flag=None,
113 |                     meta=None,
114 |                 ),
115 |                 Posting(
116 |                     account=ruleDef.get("to"),
117 |                     units=None,
118 |                     cost=None,
119 |                     price=None,
120 |                     flag=None,
121 |                     meta=None,
122 |                 ),
123 |             ]
124 | 
125 |             return (True, tx._replace(postings=newPosting))
126 | 
127 |         return (False, tx)
128 | 
129 | 
130 | class Replace_Payee(Rule):
131 |     """
132 |     Replaces the name of the transaction counterparty
133 |     (for instance: McDonald -> Mc Donald Restaurant)
134 |     The rule file containing the substitution rules
135 |     must be located in the rules folder and must be named "payee.rules"
136 |     """
137 | 
138 |     def __init__(self, name, context):
139 |         Rule.__init__(self, name, context)
140 | 
141 |     def execute(self, csv_line, tx, ruleDef=None):
142 |         table = os.path.join(self.context.rules_dir, "payee.rules")
143 |         if not os.path.isfile(table):
144 |             print(
145 |                 "file: %s does not exist! - The 'Replace_Payee' rules \
146 |                     requires the payee.rules file."
147 |                 % (table)
148 |             )
149 |             sys.exit(-1)
150 | 
151 |         return (
152 |             False,
153 |             tx._replace(
154 |                 payee=resolve_from_decision_table(
155 |                     LookUpCache.init_decision_table("payee", table),
156 |                     csv_line[self.context.payee_pos],
157 |                     csv_line[self.context.payee_pos],
158 |                 )
159 |             ),
160 |         )
161 | 
162 | 
163 | class Replace_Asset(Rule):
164 |     """
165 |     Assigns an account to a transaction, based on value of the 'account' index
166 |     of a CSV file row.
167 |     This rule is useful to assign the correct source account
168 |     of a CSV transaction.
169 | 
170 |     The rule is based on the 'asset.rules' look-up file.
171 |     If no 'asset.rules' file is found, the account
172 |     will be resolved to "Assets:Unknown" or
173 |     to the value of the property `rules.origin_account` of the config file.
174 |     """
175 | 
176 |     def __init__(self, name, context):
177 |         Rule.__init__(self, name, context)
178 | 
179 |     def execute(self, csv_line, tx=None, ruleDef=None):
180 | 
181 |         asset = None
182 |         table = os.path.join(self.context.rules_dir, "asset.rules")
183 |         if self.context.force_account:
184 |             asset = self.context.force_account
185 |         else:
186 |             if not os.path.isfile(table):
187 |                 print(
188 |                     "file: %s does not exist! - \
189 |                         The 'Replace_Asset' rules requires the asset.rules \
190 |                             file."
191 |                     % (table)
192 |                 )
193 |                 sys.exit(-1)
194 | 
195 |             asset = resolve_from_decision_table(
196 |                 LookUpCache.init_decision_table("asset", table),
197 |                 (
198 |                     self.context.account
199 |                     if self.context.account is not None
200 |                     else csv_line[self.context.account_pos]
201 |                 ),
202 |                 "Assets:Unknown",
203 |             )
204 | 
205 |         if asset:
206 |             posting = Posting(asset, None, None, None, None, None)
207 |             new_postings = [posting] + [tx.postings[1]]
208 |             return (False, tx._replace(postings=new_postings))
209 | 
210 |         return (False, tx)
211 | 
212 | 
213 | class Replace_Expense(Rule):
214 |     """
215 |     Categorizes a transaction by assigning the account
216 |     extracted from a look-up table
217 |     based on the 'payee_pos' index of a CSV file row.
218 | 
219 |     The rule is based on the 'payee.rules' look-up file.
220 |     """
221 | 
222 |     def __init__(self, name, context):
223 |         Rule.__init__(self, name, context)
224 | 
225 |     def execute(self, csv_line, tx=None, ruleDef=None):
226 |         table = os.path.join(self.context.rules_dir, "account.rules")
227 | 
228 |         if not os.path.isfile(table):
229 |             print(
230 |                 "file: % s does not exist! - The 'Replace_Expense' rules \
231 |                   requires the account.rules file."
232 |                 % (table)
233 |             )
234 |             sys.exit(-1)
235 | 
236 |         expense = resolve_from_decision_table(
237 |             LookUpCache.init_decision_table("account", table),
238 |             csv_line[self.context.payee_pos],
239 |             self.context.default_expense,
240 |         )
241 |         if expense:
242 |             posting = Posting(expense, None, None, None, None, None)
243 |             new_postings = [tx.postings[0]] + [posting]
244 |             return (False, tx._replace(postings=new_postings))
245 | 
246 |         return (False, tx)
247 | 
248 | 
249 | class Ignore_By_Payee(Rule):
250 |     def __init__(self, name, context):
251 |         Rule.__init__(self, name, context)
252 | 
253 |     def execute(self, csv_line, tx=None, ruleDef=None):
254 | 
255 |         self.failIfAttributeMissing(ruleDef, "ignore_payee")
256 |         for ignorablePayee in ruleDef.get("ignore_payee"):
257 |             if ignorablePayee.lower() in csv_line[self.context.payee_pos].lower():
258 |                 return (True, None)
259 | 
260 |         return (False, tx)
261 | 
262 | 
263 | class Ignore_By_StringAtPos(Rule):
264 |     """
265 |     Ignores a transaction based on the value of the specified index.
266 | 
267 |     For instance, given this csv entry:
268 | 
269 |     10.12.2022,bp-fuel,20US$
270 | 
271 |     and this rule:
272 | 
273 |     -  name: Ignore_By_ContainsStringAtPos
274 |            ignore_string_at_pos:
275 |                - bp-fuel;1
276 | 
277 |     The row will be ignored, because the string "bp-fuel" matches
278 |     the index at position 1.
279 | 
280 |     Example:
281 |         -  name: Ignore_By_StringAtPos
282 |            ignore_string_at_pos:
283 |                - val;3
284 |     """
285 | 
286 |     def __init__(self, name, context):
287 |         Rule.__init__(self, name, context)
288 | 
289 |     def execute(self, csv_line, tx=None, ruleDef=None):
290 | 
291 |         self.failIfAttributeMissing(ruleDef, "ignore_string_at_pos")
292 |         for ignorable in ruleDef.get("ignore_string_at_pos"):
293 |             pos = int(ignorable.split(";")[1])
294 |             strToIgnore = ignorable.split(";")[0]
295 | 
296 |             if strToIgnore.lower().strip() == csv_line[pos].lower().strip():
297 |                 return (True, None)
298 | 
299 |         return (False, tx)
300 | 
301 | 
302 | class Ignore_By_ContainsStringAtPos(Rule):
303 |     """
304 |     Ignores a transaction if the specified value is present
305 |     in the specified index.
306 |     For instance, given this csv entry:
307 | 
308 |     10.12.2022,mega supermarket,20US$
309 | 
310 |     and this rule:
311 | 
312 |     -  name: Ignore_By_ContainsStringAtPos
313 |            ignore_string_at_pos:
314 |                - mega;1
315 | 
316 |     The row will be ignored, because the string "mega" is part of
317 |     the index at position 1.
318 | 
319 |     Note that this rule supports multiple string specifications.
320 | 
321 |     Example:
322 |         -  name: Ignore_By_ContainsStringAtPos
323 |            ignore_string_at_pos:
324 |                - val;3
325 |                - another val;6
326 |     """
327 | 
328 |     def __init__(self, name, context):
329 |         Rule.__init__(self, name, context)
330 | 
331 |     def execute(self, csv_line, tx=None, ruleDef=None):
332 | 
333 |         self.failIfAttributeMissing(ruleDef, "ignore_string_contains_at_pos")
334 | 
335 |         for ignorable in ruleDef.get("ignore_string_contains_at_pos"):
336 |             pos = int(ignorable.split(";")[1])
337 |             strToIgnore = ignorable.split(";")[0]
338 |             if strToIgnore.lower() in csv_line[pos].lower():
339 |                 return (True, None)
340 | 


--------------------------------------------------------------------------------
/beanborg/rule_engine/rules_engine.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | 
  3 | import fnmatch
  4 | import os
  5 | import sys
  6 | import uuid
  7 | from dataclasses import dataclass
  8 | from typing import Dict, List
  9 | 
 10 | from beancount.core.data import Posting, Transaction
 11 | 
 12 | from .Context import Context
 13 | from .rules import *
 14 | 
 15 | __location__ = os.path.realpath(os.path.join(os.getcwd(), os.path.dirname(__file__)))
 16 | 
 17 | 
 18 | @dataclass
 19 | class RuleDef:
 20 |     rule: str
 21 |     attributes: Dict[str, List[str]]
 22 | 
 23 |     def get(self, key):
 24 |         return self.attributes[key]
 25 | 
 26 | 
 27 | class Rule_Init(Rule):
 28 |     def __init__(self, name, context):
 29 |         Rule.__init__(self, name, context)
 30 | 
 31 |     def execute(self, csv_line, transaction=None):
 32 | 
 33 |         return (
 34 |             False,
 35 |             Transaction(
 36 |                 meta=None,
 37 |                 date=None,
 38 |                 flag="*",
 39 |                 payee=None,
 40 |                 narration=None,
 41 |                 tags=None,
 42 |                 links=None,
 43 |                 postings=[
 44 |                     Posting(
 45 |                         account=None,
 46 |                         units=None,
 47 |                         cost=None,
 48 |                         price=None,
 49 |                         flag=None,
 50 |                         meta=None,
 51 |                     ),
 52 |                     Posting(
 53 |                         account=None,
 54 |                         units=None,
 55 |                         cost=None,
 56 |                         price=None,
 57 |                         flag=None,
 58 |                         meta=None,
 59 |                     ),
 60 |                 ],
 61 |             ),
 62 |         )
 63 | 
 64 | 
 65 | class RuleEngine:
 66 | 
 67 |     def handle(self, cr):
 68 | 
 69 |         return cr
 70 | 
 71 |     def __init__(self, ctx: Context):
 72 | 
 73 |         self._ctx = ctx
 74 |         self.rules = {}
 75 | 
 76 |         custom_rules = self.load_custom_rules()
 77 | 
 78 |         if self._ctx.ruleset is None:
 79 |             print(
 80 |                 "\u26A0"
 81 |                 + " no rules file spefified for this financial \
 82 |                 institution"
 83 |             )
 84 |             self.rules = {}
 85 |         else:
 86 |             for yrule in self._ctx.ruleset:
 87 |                 rule_props = {}
 88 |                 for key in yrule:
 89 |                     if key == "name":
 90 |                         rule_name = yrule["name"]
 91 |                     else:
 92 |                         rule_props[key] = yrule.get(key)
 93 | 
 94 |                 if rule_name in custom_rules:
 95 |                     self.rules[rule_name] = RuleDef(custom_rules[rule_name], rule_props)
 96 |                 else:
 97 |                     unique_rule_name = rule_name + "|" + uuid.uuid4().hex.upper()[0:6]
 98 |                     self.rules[unique_rule_name] = RuleDef(
 99 |                         globals()[rule_name], rule_props
100 |                     )
101 |         # assign default rules, if they are not already specified
102 |         if ctx.rules_dir and not self.is_rule_in_list("Replace_Asset"):
103 |             self.rules["Replace_Asset"] = RuleDef(globals()["Replace_Asset"], None)
104 | 
105 |     def is_rule_in_list(self, name):
106 |         for rule_name in self.rules:
107 |             if rule_name.startswith(name):
108 |                 return True
109 | 
110 |         return False
111 | 
112 |     def load_custom_rules(self):
113 | 
114 |         custom_rulez = {}
115 |         if self._ctx.rules_dir is not None:
116 |             custom_rules_path = os.path.join(os.getcwd(), self._ctx.rules_dir)
117 |             if not os.path.isdir(custom_rules_path):
118 |                 if self._ctx.debug:
119 |                     print("Custom rules folder not found...ignoring")
120 |                 return custom_rulez
121 |             sys.path.append(custom_rules_path)
122 |             custom_rules = fnmatch.filter(os.listdir(custom_rules_path), "*.py")
123 |             for r in custom_rules:
124 |                 mod_name = r[:-3]
125 |                 mod = __import__(mod_name, globals={})
126 |                 class_ = getattr(mod, mod_name)
127 |                 # TODO check if custom rule is of type rule before adding
128 |                 custom_rulez[mod_name] = class_
129 | 
130 |         return custom_rulez
131 | 
132 |     def execute(self, csv_line):
133 | 
134 |         final, tx = Rule_Init("init", self._ctx).execute(csv_line)
135 | 
136 |         for key in self.rules:
137 |             if not final:
138 |                 if self._ctx.debug:
139 |                     print("Executing rule: " + str(self.rules[key].rule))
140 |                 rulez = self.rules[key].rule(key, self._ctx)
141 |                 final, tx = rulez.execute(csv_line, tx, self.rules[key])
142 | 
143 |         return tx
144 | 


--------------------------------------------------------------------------------
/beanborg/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/luciano-fiandesio/beanborg/a2e3ddf5dfea1f23cf51e5bdaf930d5495616469/beanborg/utils/__init__.py


--------------------------------------------------------------------------------
/beanborg/utils/duplicate_detector.py:
--------------------------------------------------------------------------------
 1 | import hashlib
 2 | 
 3 | from beancount import loader
 4 | from beancount.core.data import Transaction
 5 | from rich import print as rprint
 6 | from rich.prompt import Confirm
 7 | 
 8 | 
 9 | def hash_tuple(tuple):
10 | 
11 |     m = hashlib.md5()
12 |     for s in tuple:
13 |         m.update(str(s).encode("utf-8"))
14 |     return m.hexdigest()
15 | 
16 | 
17 | def to_tuple(transaction):
18 | 
19 |     return (str(transaction.date), transaction.postings[0].units)
20 | 
21 | 
22 | def init_duplication_store(account, journal):
23 |     """
24 |     Builds a map of existing transactions for the account being imported.
25 |     Each map entry has an hash of the value as key and a tuple of
26 |     transaction date and amount value.
27 |     This map is used to report identical transactions being imported,
28 |     should the standard hash based approach fail.
29 |     """
30 |     transactions = {}
31 |     entries, _, _ = loader.load_file(journal)
32 |     for entry in entries:
33 |         if isinstance(entry, Transaction) and entry.meta["filename"].endswith(account):
34 |             tup = to_tuple(entry)
35 |             transactions[hash_tuple(tup)] = tup
36 | 
37 |     return transactions
38 | 
39 | 
40 | def print_duplication_warning(tx):
41 | 
42 |     rprint(
43 |         "[red]Warning[/red]: a transaction with identical date and"
44 |         " amount already exists in the ledger. "
45 |         f"\ndate: [bold]{tx[0]}[/bold]\namount [bold]{tx[1]}[/bold]"
46 |     )
47 |     return Confirm.ask("Do you want to import it?")
48 | 


--------------------------------------------------------------------------------
/beanborg/utils/hash_utils.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | 
3 | import hashlib
4 | 
5 | 
6 | def hash(csv_row):
7 | 
8 |     return hashlib.md5(",".join(csv_row).encode("utf-8")).hexdigest()
9 | 


--------------------------------------------------------------------------------
/beanborg/utils/journal_utils.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | from beancount import loader
 3 | from beancount.core.data import Transaction
 4 | from beancount.core.getters import get_accounts
 5 | 
 6 | 
 7 | class JournalUtils:
 8 | 
 9 |     def get_entries(self, journal):
10 |         """
11 |         Load in-memory all the entries of the provided ledger.
12 |         """
13 |         entries, _, _ = loader.load_file(journal)
14 |         return entries
15 | 
16 |     def transaction_hashes(self, journal):
17 |         """
18 |         Load in-memory all the hashes (md5 property) of the provided ledger.
19 |         This is required for the duplication detecting algo
20 |         """
21 | 
22 |         md5s = []
23 |         entries = self.get_entries(journal)
24 |         for entry in entries:
25 |             if isinstance(entry, Transaction):
26 |                 md5 = entry.meta.get("md5", "")
27 |                 if md5:
28 |                     md5s.append(md5)
29 |         return md5s
30 | 
31 |     def get_accounts(self, journal):
32 | 
33 |         return get_accounts(self.get_entries(journal))
34 | 
35 |     def get_transactions_by_account_name(self, journal, account):
36 |         """
37 |         Get all transactions for a given account name.
38 |         """
39 |         entries = self.get_entries(journal)
40 |         txs = []
41 |         for entry in entries:
42 |             if isinstance(entry, Transaction):
43 |                 if str(entry.meta["filename"]).endswith(f"{account}.ldg"):
44 |                     txs.append(entry)
45 |         return txs
46 | 


--------------------------------------------------------------------------------
/beanborg/utils/string_utils.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | import re
 3 | 
 4 | 
 5 | class StringUtils:
 6 | 
 7 |     def strip_digits(str):
 8 |         # return ''.join([c for c in str if not c.isdigit()])
 9 |         return re.sub("[^A-Z ]", "", str)
10 | 


--------------------------------------------------------------------------------
/bin/bb_archive:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding: utf-8 -*-
3 | import re
4 | import sys
5 | from beanborg import bb_archive
6 | if __name__ == '__main__':
7 |     sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0])
8 |     sys.exit(bb_archive.main())


--------------------------------------------------------------------------------
/bin/bb_import:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding: utf-8 -*-
3 | import re
4 | import sys
5 | from beanborg import bb_import
6 | if __name__ == '__main__':
7 |     sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0])
8 |     sys.exit(bb_import.main())
9 | 


--------------------------------------------------------------------------------
/bin/bb_mover:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding: utf-8 -*-
3 | import re
4 | import sys
5 | from beanborg import bb_mover
6 | if __name__ == '__main__':
7 |     sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0])
8 |     sys.exit(bb_mover.main())


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | beancount>=2.3.5, <3.0.0
 2 | pyyaml==6.0.1
 3 | rich==13.6.0
 4 | prompt-toolkit==3.0.39
 5 | pandas==1.5.2
 6 | PyYAML==6.0.1
 7 | textblob==0.17.1
 8 | openai>=1.14.1
 9 | numpy==1.26.3
10 | imblearn==0.0
11 | 


--------------------------------------------------------------------------------
/run_tests.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash -e
2 |  PYTHONPATH=. pytest -s
3 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | from setuptools import setup, find_packages
 2 | 
 3 | # Read requirements.txt
 4 | with open('requirements.txt') as f:
 5 |     required = f.read().splitlines()
 6 | 
 7 | setup(
 8 |     name='beanborg',
 9 |     version='0.1',
10 |     author='Luciano Fiandesio',
11 |     author_email='luciano@fiandes.io',
12 |     url='https://github.com/luciano-fiandesio/beanborg',
13 |     packages=find_packages(),
14 |     install_requires=required,
15 |     include_package_data=True,
16 |     scripts=['bin/bb_import', 'bin/bb_mover', 'bin/bb_archive']
17 | )


--------------------------------------------------------------------------------
/tests/files/1234.ldg:
--------------------------------------------------------------------------------
1 | 2020-02-13 * "Dummy Supermarket" ""
2 |   Assets:MyBank:Savings  -105.12 EUR
3 |   Expenses:Groceries                               
4 | 
5 | 


--------------------------------------------------------------------------------
/tests/files/My_Custom_Rule.py:
--------------------------------------------------------------------------------
 1 | from beanborg.rule_engine.rules import *
 2 | 
 3 | class My_Custom_Rule(Rule):
 4 |     def __init__(self, name, context): 
 5 |         
 6 |         # invoking the __init__ of the parent class  
 7 |         Rule.__init__(self, name, context)          
 8 | 
 9 |     def execute(self, csv_line, tx = None, ruleDef = None  ):
10 |         
11 |         self.checkAccountFromTo(ruleDef)
12 | 
13 |         if "Withdrawal".lower() in csv_line[self.context.tx_type_pos].lower():
14 |             cashPosting = [Posting(
15 |                 account=ruleDef.get("from"),
16 |                 units=None,
17 |                 cost=None,
18 |                 price=None,
19 |                 flag=None,
20 |                 meta=None),
21 |             Posting(
22 |                 account=ruleDef.get("to"),
23 |                 units=None,
24 |                 cost=None,
25 |                 price=None,
26 |                 flag=None,
27 |                 meta=None)]
28 |             return (True, tx._replace(postings=cashPosting))     
29 | 
30 |         return (False,tx)
31 | 


--------------------------------------------------------------------------------
/tests/files/_1234.ldg:
--------------------------------------------------------------------------------
1 | 2020-02-13 * "Dummy Supermarket changed" ""
2 |   Assets:MyBank:Savings  -105.12 EUR
3 |   Expenses:Groceries                               
4 | 


--------------------------------------------------------------------------------
/tests/files/account.rules:
--------------------------------------------------------------------------------
1 | value;expression;result
2 | freshfood;sw;Expenses:Groceries


--------------------------------------------------------------------------------
/tests/files/amount_handler.yaml:
--------------------------------------------------------------------------------
 1 | --- !Config
 2 | csv:
 3 |   download_path: "/Users/luciano/Desktop"
 4 |   name: bbk_statement
 5 |   bank_ref: bbk
 6 |   archive_path: archive2
 7 |   date_format: "%d/%m/%Y"
 8 |   
 9 | indexes:
10 |   date: 8
11 |   counterparty: 9 
12 |   amount: 10
13 |   account: 11
14 |   currency: 12
15 |   tx_type: 13
16 |    


--------------------------------------------------------------------------------
/tests/files/asset.rules:
--------------------------------------------------------------------------------
1 | value;expression;result
2 | ZZ03100400000608903100;eq;Assets:Bob:Savings
3 | 


--------------------------------------------------------------------------------
/tests/files/bank1.yaml:
--------------------------------------------------------------------------------
 1 | --- !Config
 2 | csv:
 3 |   download_path: "/Users/luciano/Desktop"
 4 |   name: bbk_statement
 5 |   bank_ref: bbk
 6 |   target: tmp2
 7 |   archive_path: archive2
 8 |   separator: '|'
 9 |   date_format: "%d/%m/%Y"
10 |   currency_sep: ","
11 |   skip: 3
12 |   
13 | indexes:
14 |   date: 8
15 |   counterparty: 9 
16 |   amount: 10
17 |   account: 11
18 |   currency: 12
19 |   tx_type: 13
20 |   amount_in: 14
21 |   
22 | rules:
23 |   beancount_file: 'main1.ldg'
24 |   #rules_file: luciano.amex.rules
25 |   account: '1234567'
26 |   currency: GBP
27 |   default_expense: 'Expense:Magic'
28 |   force_negative: true
29 |   invert_negative: true
30 |   ruleset:
31 |     - name: hello_rule
32 |       test: 1


--------------------------------------------------------------------------------
/tests/files/bank1_custom_rule.yaml:
--------------------------------------------------------------------------------
 1 | --- !Config
 2 | csv:
 3 |   download_path: "/Users/luciano/Desktop"
 4 |   name: bbk_statement
 5 |   bank_ref: bbk
 6 |   target: tmp2
 7 |   archive_path: archive2
 8 |   separator: '|'
 9 |   date_format: "%d/%m/%Y"
10 |   currency_sep: ","
11 |   skip: 3
12 |   
13 | indexes:
14 |   date: 8
15 |   counterparty: 9 
16 |   amount: 10
17 |   account: 11
18 |   currency: 12
19 |   tx_type: 13
20 |   amount_in: 14
21 |   
22 | rules:
23 |   beancount_file: 'main1.ldg'
24 |   #rules_file: luciano.amex.rules
25 |   account: '1234567'
26 |   currency: GBP
27 |   default_expense: 'Expense:Magic'
28 |   force_negative: true
29 |   invert_negative: true
30 |   ruleset:
31 |     - name: My_Custom_Rule
32 |       from: Assets:UK:Alice:Savings
33 |       to: Assets:UK:Alice:Cash


--------------------------------------------------------------------------------
/tests/files/bank1_ignore_at_pos.yaml:
--------------------------------------------------------------------------------
 1 | --- !Config
 2 | csv:
 3 |   download_path: "/Users/luciano/Desktop"
 4 |   name: bbk_statement
 5 |   bank_ref: bbk
 6 |   target: tmp2
 7 |   archive_path: archive2
 8 |   separator: '|'
 9 |   date_format: "%d/%m/%Y"
10 |   currency_sep: ","
11 |   skip: 3
12 |   
13 | indexes:
14 |   date: 8
15 |   counterparty: 9 
16 |   amount: 10
17 |   account: 11
18 |   currency: 12
19 |   tx_type: 13
20 |   amount_in: 14
21 |   
22 | rules:
23 |   beancount_file: 'main1.ldg'
24 |   #rules_file: luciano.amex.rules
25 |   account: '1234567'
26 |   currency: GBP
27 |   default_expense: 'Expense:Magic'
28 |   force_negative: true
29 |   invert_negative: true
30 |   ruleset:
31 |     - name: Ignore_By_StringAtPos
32 |       ignore_string_at_pos: 
33 |         - waiting;4
34 | 


--------------------------------------------------------------------------------
/tests/files/bank1_ignore_by_counterparty.yaml:
--------------------------------------------------------------------------------
 1 | --- !Config
 2 | csv:
 3 |   download_path: "/Users/luciano/Desktop"
 4 |   name: bbk_statement
 5 |   bank_ref: bbk
 6 |   target: tmp2
 7 |   archive_path: archive2
 8 |   separator: '|'
 9 |   date_format: "%d/%m/%Y"
10 |   currency_sep: ","
11 |   skip: 3
12 |   
13 | indexes:
14 |   date: 8
15 |   counterparty: 9 
16 |   amount: 10
17 |   account: 11
18 |   currency: 12
19 |   tx_type: 13
20 |   amount_in: 14
21 |   
22 | rules:
23 |   beancount_file: 'main1.ldg'
24 |   #rules_file: luciano.amex.rules
25 |   account: '1234567'
26 |   currency: GBP
27 |   default_expense: 'Expense:Magic'
28 |   force_negative: true
29 |   invert_negative: true
30 |   ruleset:
31 |     - name: Ignore_By_Payee
32 |       ignore_payee:
33 |         - alfa
34 |         - beta
35 | 


--------------------------------------------------------------------------------
/tests/files/bank1_ignore_contains_string_at_pos.yaml:
--------------------------------------------------------------------------------
 1 | --- !Config
 2 | csv:
 3 |   download_path: "/Users/luciano/Desktop"
 4 |   name: bbk_statement
 5 |   bank_ref: bbk
 6 |   target: tmp2
 7 |   archive_path: archive2
 8 |   separator: '|'
 9 |   date_format: "%d/%m/%Y"
10 |   currency_sep: ","
11 |   skip: 3
12 |   
13 | indexes:
14 |   date: 8
15 |   counterparty: 9 
16 |   amount: 10
17 |   account: 11
18 |   currency: 12
19 |   tx_type: 13
20 |   amount_in: 14
21 |   
22 | rules:
23 |   beancount_file: 'main1.ldg'
24 |   #rules_file: luciano.amex.rules
25 |   account: '1234567'
26 |   currency: GBP
27 |   default_expense: 'Expense:Magic'
28 |   force_negative: true
29 |   invert_negative: true
30 |   ruleset:
31 |     - name: Ignore_By_ContainsStringAtPos
32 |       ignore_string_contains_at_pos:
33 |         - waiting;4
34 | 


--------------------------------------------------------------------------------
/tests/files/bank1_replace_asset.yaml:
--------------------------------------------------------------------------------
 1 | --- !Config
 2 | csv:
 3 |   download_path: "/Users/luciano/Desktop"
 4 |   name: bbk_statement
 5 |   bank_ref: bbk
 6 |   target: tmp2
 7 |   archive_path: archive2
 8 |   separator: '|'
 9 |   date_format: "%d/%m/%Y"
10 |   currency_sep: ","
11 |   skip: 3
12 |   
13 | indexes:
14 |   date: 8
15 |   counterparty: 9 
16 |   amount: 10
17 |   account: 11
18 |   currency: 12
19 |   tx_type: 13
20 |   amount_in: 14
21 |   
22 | rules:
23 |   beancount_file: 'main1.ldg'
24 |   #rules_file: luciano.amex.rules
25 |   account: '1234567'
26 |   currency: GBP
27 |   default_expense: 'Expense:Magic'
28 |   force_negative: true
29 |   invert_negative: true
30 |   ruleset:
31 |     - name: Replace_Asset
32 | 


--------------------------------------------------------------------------------
/tests/files/bank1_replace_counterparty.yaml:
--------------------------------------------------------------------------------
 1 | --- !Config
 2 | csv:
 3 |   download_path: "/Users/luciano/Desktop"
 4 |   name: bbk_statement
 5 |   bank_ref: bbk
 6 |   target: tmp2
 7 |   archive_path: archive2
 8 |   separator: '|'
 9 |   date_format: "%d/%m/%Y"
10 |   currency_sep: ","
11 |   skip: 3
12 |   
13 | indexes:
14 |   date: 8
15 |   counterparty: 9 
16 |   amount: 10
17 |   account: 11
18 |   currency: 12
19 |   tx_type: 13
20 |   amount_in: 14
21 |   
22 | rules:
23 |   beancount_file: 'main1.ldg'
24 |   #rules_file: luciano.amex.rules
25 |   account: '1234567'
26 |   currency: GBP
27 |   default_expense: 'Expense:Magic'
28 |   force_negative: true
29 |   invert_negative: true
30 |   ruleset:
31 |     - name: Replace_Payee
32 | 


--------------------------------------------------------------------------------
/tests/files/bank1_replace_expense.yaml:
--------------------------------------------------------------------------------
 1 | --- !Config
 2 | csv:
 3 |   download_path: "/Users/luciano/Desktop"
 4 |   name: bbk_statement
 5 |   bank_ref: bbk
 6 |   target: tmp2
 7 |   archive_path: archive2
 8 |   separator: '|'
 9 |   date_format: "%d/%m/%Y"
10 |   currency_sep: ","
11 |   skip: 3
12 |   
13 | indexes:
14 |   date: 8
15 |   counterparty: 9 
16 |   amount: 10
17 |   account: 11
18 |   currency: 12
19 |   tx_type: 13
20 |   amount_in: 14
21 |   
22 | rules:
23 |   beancount_file: 'main1.ldg'
24 |   #rules_file: luciano.amex.rules
25 |   account: '1234567'
26 |   currency: GBP
27 |   default_expense: 'Expense:Magic'
28 |   force_negative: true
29 |   invert_negative: true
30 |   ruleset:
31 |     - name: Replace_Expense
32 | 


--------------------------------------------------------------------------------
/tests/files/payee.rules:
--------------------------------------------------------------------------------
1 | value;expression;result
2 | ford;contains;Ford Auto


--------------------------------------------------------------------------------
/tests/files/payee_with_comments.rules:
--------------------------------------------------------------------------------
1 | value;expression;result
2 | # this is a comment
3 | ford;contains;Ford Auto


--------------------------------------------------------------------------------
/tests/test_config.py:
--------------------------------------------------------------------------------
 1 | from beanborg.rule_engine.rules_engine import RuleEngine
 2 | from beanborg.rule_engine.Context import *
 3 | from beanborg.rule_engine.decision_tables import *
 4 | from beanborg.config import *
 5 | 
 6 | def test_config1():
 7 | 
 8 |     config = init_config('tests/files/bank1.yaml', False)
 9 | 
10 |     assert config.csv.download_path == "/Users/luciano/Desktop"
11 |     assert config.csv.name == "bbk_statement"
12 |     assert config.csv.ref == "bbk"
13 |     assert config.csv.target == 'tmp2'
14 |     assert config.csv.archive == 'archive2'
15 |     assert config.csv.separator == '|'
16 |     assert config.csv.date_format == '%d/%m/%Y'
17 |     assert config.csv.skip == 3
18 | 
19 |     assert config.indexes.date == 8
20 |     assert config.indexes.counterparty == 9
21 |     assert config.indexes.amount == 10
22 |     assert config.indexes.account == 11
23 |     assert config.indexes.currency == 12
24 |     assert config.indexes.tx_type == 13
25 |     assert config.indexes.amount_in == 14
26 | 
27 |     assert config.rules.bc_file == 'main1.ldg'
28 |     assert config.rules.account == '1234567'
29 |     assert config.rules.currency == 'GBP'
30 |     assert config.rules.default_expense == "Expense:Magic"
31 |     assert config.rules.force_negative == True
32 |     assert config.rules.invert_negative == True
33 | 
34 |     assert len(config.rules.ruleset) == 1
35 |     assert config.rules.ruleset[0]['name'] == 'hello_rule'
36 |     assert config.rules.ruleset[0]['test'] == 1
37 |     
38 |     


--------------------------------------------------------------------------------
/tests/test_currency_handler.py:
--------------------------------------------------------------------------------
 1 | from beanborg.handlers.amount_handler import AmountHandler
 2 | from beanborg.config import *
 3 | from beancount.core.number import D
 4 | 
 5 | def test_handler():
 6 | 
 7 | 	config = init_config('tests/files/amount_handler.yaml', False)
 8 | 
 9 | 	handler = AmountHandler()
10 | 	
11 | 	assert D("100.00") == handler.handle("100.00", config)
12 | 	assert D("22000.76") == handler.handle("22 000,76", config)
13 | 	assert D("22000.76") == handler.handle("22.000,76", config)
14 | 	assert D("1022000.76") == handler.handle("1022000,76", config)
15 | 	assert D("-1022000.76") == handler.handle("-1,022,000.76", config)
16 | 	assert D("1022000.00") == handler.handle("1022000", config)
17 | 	assert D("22000.76") == handler.handle("22 000,76$", config)
18 | 						
19 | 


--------------------------------------------------------------------------------
/tests/test_decision_tables.py:
--------------------------------------------------------------------------------
 1 | from beanborg.rule_engine.decision_tables import *
 2 | 
 3 | 
 4 | def test_equal_value():
 5 |     table = {}
 6 |     table["superman"] = ("equals", "batman")
 7 |     assert "batman" == resolve_from_decision_table(table, "superman", "mini")
 8 | 
 9 | def test_equal_value_different_case():
10 |     table = {}
11 |     table["superman"] = ("equals", "batman")
12 |     assert "Batman" != resolve_from_decision_table(table, "superman", "mini")
13 | 
14 | def test_equal_value_ignore_different_case():
15 |     table = {}
16 |     table["rewe"] = ("equals_ic", "Expenses:Groceries")
17 |     assert "Expenses:Groceries" == resolve_from_decision_table(table, "rewe", "Expenses:Unknown")
18 | 
19 | def test_startsWith_value():
20 |     table = {}
21 |     table["superman"] = ("startsWith", "batman")
22 |     assert "batman" == resolve_from_decision_table(table, "superman_is_cool", "mini")
23 | 
24 | def test_endsWith_value():
25 |     table = {}
26 |     table["superman"] = ("endsWith", "batman")
27 |     assert "batman" == resolve_from_decision_table(table, "hello_superman", "mini")
28 | 
29 | def test_contains_value():
30 |     table = {}
31 |     table["superman"] = ("contains", "batman")
32 |     assert "batman" == resolve_from_decision_table(
33 |         table, "hello_superman_hello", "mini"
34 |     )
35 | 
36 | def test_contains_value_ignore_case():
37 |     table = {}
38 |     table["rewe"] = ("contains_ic", "Expenses:Groceries")
39 |     
40 |     assert "Expenses:Groceries" == resolve_from_decision_table(
41 |         table, "card transaction - supermarket REWE", "Expenses:Unknown"
42 |     )
43 | 
44 | def test_loadfile():
45 |     table = init_decision_table("tests/files/payee_with_comments.rules")
46 |     assert table["ford"] != None
47 |     assert table["ford"][0] == "contains"
48 |     assert table["ford"][1] == "Ford Auto"
49 |     


--------------------------------------------------------------------------------
/tests/test_duplicate_detector.py:
--------------------------------------------------------------------------------
 1 | from beanborg.utils.duplicate_detector import *
 2 | from beancount import loader
 3 | 
 4 | def test_duplication():
 5 | 
 6 |     # load dummy ledger file
 7 |     txs = init_duplication_store('1234.ldg', 'tests/files/1234.ldg' )
 8 |     
 9 |     # load a second dummy ledger file, that contains an identical transaction
10 |     entries, _, _ = loader.load_file('tests/files/_1234.ldg')
11 |     for entry in entries:
12 |         tup = to_tuple(entry)
13 |         assert (hash_tuple(tup) in txs)
14 | 
15 | 
16 |     
17 | 


--------------------------------------------------------------------------------
/tests/test_rules_engine.py:
--------------------------------------------------------------------------------
  1 | from beanborg.rule_engine.rules_engine import RuleEngine
  2 | from beanborg.rule_engine.Context import *
  3 | from beanborg.rule_engine.decision_tables import *
  4 | from beanborg.config import *
  5 | 
  6 | def test_payee_replacement():
  7 | 
  8 |     rule_engine = make_rule_engine('tests/files/bank1_replace_counterparty.yaml')
  9 | 
 10 |     entries = "31.10.2019,b,auszahlung,electro ford,x,ZZ03100400000608903100".split(",")
 11 |     tx = rule_engine.execute(entries)
 12 |     assert tx.payee == "Ford Auto"
 13 | 
 14 | def test_asset_replacement():
 15 | 
 16 |     rule_engine = make_rule_engine('tests/files/bank1_replace_asset.yaml')
 17 |     entries = "31.10.2019,b,auszahlung,electro ford,x,ZZ03100400000608903100".split(",")
 18 |     tx = rule_engine.execute(entries)
 19 |     assert tx.postings[0].account == "Assets:Bob:Savings"
 20 | 
 21 | def test_expense_replacement():
 22 | 
 23 |     rule_engine = make_rule_engine('tests/files/bank1_replace_expense.yaml')
 24 |     entries = "31.10.2019,b,auszahlung,freshfood Bonn,x,ZZ03100400000608903100".split(
 25 |         ","
 26 |     )
 27 |     tx = rule_engine.execute(entries)
 28 |     assert tx.postings[1].account == "Expenses:Groceries"
 29 | 
 30 | def test_ignore():
 31 | 
 32 |     rule_engine = make_rule_engine('tests/files/bank1_ignore_by_counterparty.yaml')
 33 |     
 34 |     entries = "31.10.2019,b,auszahlung,alfa,x,ZZ03100400000608903100".split(",")
 35 |     tx = rule_engine.execute(entries)
 36 |     assert tx == None
 37 | 
 38 |     entries = "31.10.2019,b,auszahlung,beta,x,ZZ03100400000608903100".split(",")
 39 |     tx = rule_engine.execute(entries)
 40 |     assert tx == None
 41 | 
 42 | def test_ignore_at_position():
 43 | 
 44 |     rule_engine = make_rule_engine('tests/files/bank1_ignore_at_pos.yaml')
 45 |     entries = "31.10.2019,b,auszahlung,alfa,waiting,ZZ03100400000608903100".split(",")
 46 |     tx = rule_engine.execute(entries)
 47 |     assert tx == None
 48 | 
 49 | def test_ignore_by_contains_string_at_position():
 50 |     rule_engine = make_rule_engine('tests/files/bank1_ignore_contains_string_at_pos.yaml')
 51 |     entries = "31.10.2019,b,auszahlung,alfa,this is waiting alfa,ZZ03100400000608903100".split(",")
 52 |     tx = rule_engine.execute(entries)
 53 |     assert tx == None
 54 | 
 55 | 
 56 | def test_custom_rule():
 57 | 
 58 |     rule_engine = make_rule_engine('tests/files/bank1_custom_rule.yaml')
 59 |     entries = "31.10.2019,b,Withdrawal,alfa,waiting,ZZ03100400000608903100".split(",")
 60 |     tx = rule_engine.execute(entries)
 61 |     
 62 |     assert tx.postings[0].account == "Assets:UK:Alice:Savings"
 63 |     assert tx.postings[1].account == "Assets:UK:Alice:Cash"
 64 |     
 65 | def test_no_rulefile():
 66 | 
 67 |     rule_engine = RuleEngine(
 68 |         Context(
 69 |             rules_dir=None,
 70 |             account=None,
 71 |             date_fomat="%d.%m.%Y",
 72 |             default_expense="Expenses:Unknown",
 73 |             date_pos=0,
 74 |             payee_pos=3,
 75 |             tx_type_pos=2,
 76 |             narration_pos=-1,
 77 |             account_pos=5,
 78 |             ruleset=None,
 79 |             force_account=None,
 80 |             debug=False
 81 |         )
 82 |     )
 83 | 
 84 |     entries = "31.10.2019,b,Withdrawal,alfa,waiting,ZZ03100400000608903100".split(",")
 85 |     tx = rule_engine.execute(entries)
 86 | 
 87 |     # no exception - the transaction is empty
 88 |     assert tx
 89 | 
 90 | def make_rule_engine(config_file):
 91 |     config = init_config(config_file, False)
 92 |     
 93 |     return RuleEngine(
 94 |         Context(
 95 |             ruleset=config.rules.ruleset,
 96 |             rules_dir="tests/files",
 97 |             account=None,
 98 |             date_fomat="%d.%m.%Y",
 99 |             default_expense="Expenses:Unknown",
100 |             date_pos=0,
101 |             payee_pos=3,
102 |             tx_type_pos=2,
103 |             narration_pos=-1,
104 |             account_pos=5,
105 |             force_account=None,
106 |             debug=False
107 |         )
108 |     )
109 | 


--------------------------------------------------------------------------------
/tox.ini:
--------------------------------------------------------------------------------
 1 | [tox]
 2 | envlist = py{36,37,38,39}
 3 | 
 4 | # Activate isolated build environment. tox will use a virtual environment
 5 | # to build a source distribution from the source tree. For build tools and
 6 | # arguments use the pyproject.toml file as specified in PEP-517 and PEP-518.
 7 | isolated_build = true
 8 | 
 9 | [testenv]
10 | deps =
11 |     # If your project uses README.rst, uncomment the following:
12 |     # readme_renderer
13 |     flake8
14 |     pytest
15 | commands =
16 |     # This repository uses a Markdown long_description, so the -r flag to
17 |     # `setup.py check` is not needed. If your project contains a README.rst,
18 |     # use `python setup.py check -m -r -s` instead.
19 |     #python setup.py check -m -s
20 |     # flake8 .
21 |     py.test tests {posargs}
22 | 
23 | [flake8]
24 | exclude = .tox,*.egg,build,data
25 | #select = E,W,F
26 | extend-ignore = C408,B006,DUO130
27 | max-line-length = 120


--------------------------------------------------------------------------------
/tutorial/README.md:
--------------------------------------------------------------------------------
  1 | # Beanborg tutorial
  2 | 
  3 | This tutorial guides the user through the steps required to import financial transactions from a fictional bank (Eagle Bank) into an existing Beancount ledger, using [Beanborg](https://github.com/luciano-fiandesio/beanborg).
  4 | 
  5 | ## Initial setup
  6 | 
  7 | The tutorial is based on an existing Beancount setup, structred like [so](https://github.com/luciano-fiandesio/beanborg/tree/master/tutorial):
  8 | 
  9 | ```
 10 | accounts.ldg
 11 | main.ldg
 12 |   |
 13 |   |__ UK0000001444555.ldg
 14 | ```
 15 | 
 16 | To get started, install beanborg using `pip`:
 17 | 
 18 | ```
 19 | pip install git+https://github.com/luciano-fiandesio/beanborg.git
 20 | ```
 21 | 
 22 | To get an idea of the Beanborg workflow and learn about the different configuration options, you might want to take a quick look at the project's [README](https://github.com/luciano-fiandesio/beanborg/blob/master/README.md).
 23 | 
 24 | The goal of this tutorial is to be able to import the transactions from the [sample CSV](https://github.com/luciano-fiandesio/beanborg/blob/master/tutorial/test-data/eagle-bank-statement.csv) file into the ledger-managed `UK0000001444555.ldg` file.
 25 | 
 26 | ## Creating a configuration file for Eagle bank
 27 | 
 28 | Beanborg requires a configuration file for each type of CSV file that we wish to import. 
 29 | Normally, each CSV file is bound to a financial institution, so it's good practice to name our config file after the bank. In this case, `eagle.yaml`.
 30 | 
 31 | Let's create a new folder where we will store the import configuraton.
 32 | 
 33 | ```
 34 | mkdir config
 35 | ```
 36 | 
 37 | Create a new `eagle.yaml` file in the `config` folder.
 38 | 
 39 | Now, let's open the fictional CSV file, located in the `test-data` folder. It is important to understand the structure of the CSV file in order to configure Beanborg properly.
 40 | 
 41 | ```
 42 | OPEN BOOK;VALUE DATE;TX TYPE;BOOKING TEXT;AMOUNT;CURRENCY;ACCOUNT;IBAN
 43 | 04.11.2020;04.11.2020;Direct Debit;"Fresh Food";-21,30;EUR;0000001;UK0000001444555
 44 | 04.11.2020;03.11.2020;Credit;"Best Company";1000,00;EUR;0000001;UK0000001444555
 45 | 01.11.2020;01.11.2020;Direct Debit;"Doctor Bill";-540,10;EUR;0000001;UK0000001444555
 46 | 01.12.2020;01.11.2020;Cash Withdrawal;Bank Of Mars;-100;EUR;0000001;UK0000001444555
 47 | ```
 48 | 
 49 | By observing the CSV file, we can determine the following information:
 50 | 
 51 | - we want to skip the first line
 52 | - the field delimiter uses the `;` character rather than the more standard comma
 53 | - the currency separator uses the `,` character rather than the more standard `.`
 54 | - the date format uses day, month and year, separated by a dot.
 55 | - in order to match the ledger file `UK0000001444555.ldg` to this account we can use the `IBAN` field of the CSV file
 56 | 
 57 | Let's start creating the configuration file for Eagle Bank. Paste the following snippet in the previously created file,`eagle.yaml`:
 58 | 
 59 | 
 60 | ```
 61 | --- !Config
 62 | csv:
 63 |    download_path: !CHANGE ME!
 64 |    name: eagle
 65 |    bank_ref: eag
 66 |    separator: ';'
 67 |    date_format: "%d.%m.%Y"
 68 |    currency_sep: ","
 69 | ```
 70 | 
 71 | During a normal import operation, the CSV file is downloaded from the bank app - mobile or web and placed in a download folder. For the sake of the tutorial, you can copy the file `eagle-bank-statement.csv` to your Downloads folder and replace `!CHANGE ME` with the path to the folder, e.g. `/Users/tom/Downloads`.
 72 | 
 73 | Let's look at this initial configuration.
 74 | The `name` property is required to find the CSV file in the path specified by the `download_path` property. It is enough to specify the first letters of the CSV file, without the `csv` extension.
 75 | 
 76 | The `bank_ref` property is very important, because it is used by Beanborg to rename the CSV file and move it to the staging area. If one has multiple bank accounts to import, it is crucial that the value of `bank_ref` is unique.
 77 | 
 78 | The `date_format`, `separator` and `currency_sep` should be self-explanatory.
 79 | 
 80 | We don't need to specify the `skip` property, since the default value is `1`.
 81 | 
 82 | Let's try to import the CVS file into the working area, using `bb_mover`.
 83 | 
 84 | ```
 85 | bb_mover -f config/eagle.yaml
 86 | ```
 87 | 
 88 | If the file is found, the script should return:
 89 | 
 90 | ```
 91 | Done :)
 92 | ```
 93 | 
 94 | ## Add the mapping information and rules to the configuration file
 95 | 
 96 | In order to successfully import the transaction from Eagle Bank into our ledger, we need to supply some more information to Beanborg: CSV mapping info and rules.
 97 | 
 98 | Append the following configuration to the `eagle.yaml` file:
 99 | 
100 | ```
101 | indexes:
102 |     date: 1
103 |     tx_type: 2
104 |     counterparty: 3 
105 |     amount: 4
106 |     currency: 5
107 |     account: 7
108 | ```
109 | 
110 | Note that the `indexes` block sits at the root of the yaml file (same "level" as `csv`)
111 | 
112 | 
113 | With this block of configuration we are instructing Beanborg about the data within our CSV file.
114 | This image should hopefully makes the concept more clear:
115 | 
116 | ![Alt text](assets/csv.png)
117 | 
118 | Beanborg is now able to map the most relevant information of the CSV file with the Beancount structure and create a valid transaction.
119 | 
120 | The last section of the configuration relates to **rules**. 
121 | 
122 | Rules can be considered as a list of "actions" that are executed one after the other and are applied to each row of the CSV file we want to import.
123 | 
124 | There are different type of rules: some can be used to change the Accounts of a transaction or ignore a specific transaction.
125 | 
126 | Let's focus on a simple rule, that will assign the correct Expense to each transaction in our CSV file. 
127 | 
128 | 
129 | Append the following configuration to the `eagle.yaml` file:
130 | 
131 | ```
132 | rules:
133 |   ruleset:
134 |     - name: Replace_Expense
135 | ```
136 | 
137 | Before importing the CSV data, we need one last step: a configuration file (named `asset.rules`)  that helps Beanborg associate the bank account asset definition (`Assets:Bank1:Bob:Current`, which is the bank account defined in Beanborg used in this tutorial) to the bank account identifier in the CSV file (in this tutorial, the IBAN number).
138 | Note that this file is required by the `Replace_Asset` rule, which is automatically executed, even if it's not specified in the rules list.
139 | 
140 | In Beanborg, all configuration files are placed into the `rules` folder - note that the folder name can be changed using the `rules_folder` property of the `rules` configuration.
141 | 
142 | ```
143 | mkdir rules
144 | cd rules
145 | touch asset.rules
146 | ```
147 | 
148 | Copy the following content into `asset.rules`
149 | 
150 | ```
151 | value;expression;result
152 | UK0000001444555;equals;Assets:Bank1:Bob:Current
153 | ```
154 | 
155 | 
156 | It's now time to run the second Beanborg script, `bb_import`, which imports the transaction into the ledger.
157 | 
158 | ```
159 | bb_import -f config/eagle.yaml
160 | ```
161 | 
162 | The script should exit immediately with the following error:
163 | 
164 | ```
165 | file: rules/account.rules does not exist! - The 'Replace_Expense' rules requires the account.rules file.
166 | ```
167 | 
168 | The `Replace_Expense` rules requires an additional look-up table file to map counterparty names to Expense categories.
169 | This file (named `account.rules`) should be located in the `rules` folder.
170 | 
171 | 
172 | Create a new `account.rules` file in the `rules` folder and paste the following data:
173 | 
174 | ```
175 | value;expression;result
176 | Fresh Food;contains;Expenses:Groceries
177 | Best Company;contains;Expenses:Clothing
178 | Doctor Bill;eq;Expenses:Medical
179 | ```
180 | 
181 | Run `bb_import -f config/eagle.yaml` again and, this time, the import should be successful.
182 | 
183 | ```
184 | summary:
185 | 
186 | csv tx count:         4
187 | imported:             4
188 | tx already present:   0
189 | ignored by rule:      0
190 | error:                0
191 | ```
192 | 
193 | Each row in the CSV file is matched against the `account.rules` file, and if the `counterparty` index matches the first part of the expression (e.g. `Fresh Food`), the second leg of the transaction is replaced with the propert Expenses category, in this case `Expenses:Groceries`.
194 | 
195 | The `UK0000001444555.ldg` should now contain the 4 transactions from the CVS file and both "sides" of the transaction should be correctly set - except for one transaction: the cash withdrawal from bank of Mars. We will see how to correctly categorize this transaction as well.
196 | 
197 | Running the same script again `bb_import -f config/eagle.yaml` will trigger the automatic duplication detection mechanism:
198 | 
199 | ```
200 | summary:
201 | 
202 | csv tx count:         4
203 | imported:             0
204 | tx already present:   4
205 | ignored by rule:      0
206 | error:                0
207 | ```
208 | 
209 | Note that the value of `tx already present` is `4` and `imported` is set to `0`.
210 | 
211 | At this time, Beanborg does not support executing the rules without importing the data. In order to show how to import the cash withdrawal entry from our sample bank file, we need to delete and recreate the sample ledger file:
212 | 
213 | ```
214 | rm UK0000001444555.ldg
215 | touch UK0000001444555.ldg
216 | ```
217 | 
218 | Let's take a look at the cash withdrawal entry from the CVS file:
219 | 
220 | ```
221 | 01.12.2020;01.11.2020;Cash Withdrawal;Bank Of Mars;-100;EUR;0000001;UK0000001444555
222 | ```
223 | 
224 | We want to create a transaction that has the origin account set to our bank and the destination account set to `Assets:Cash:Bob`
225 | We can create a new rule in the `account.rules` file:
226 | 
227 | ```
228 | Bank Of Mars;contains;Assets:Cash:Bob
229 | ```
230 | 
231 | but this is probably not such a good idea, because we may have multiple type of transactions from `Bank of Mars`, for instance bank fees.
232 | Since the CSV entry clearly specifies `Cash Withdrawal` as transaction type, we can simply add a new `Set_Accounts` rule that makes use of the transaction type to assign the accounts to the transaction; add the following rule definition to the `eagle.yaml`:
233 | 
234 | ```
235 | - name: Set_Accounts
236 |       from: Assets:Bank1:Bob:Current
237 |       to: Assets:Cash:Bob
238 |       csv_index: 2
239 |       csv_values: Cash Withdrawal
240 | ```
241 | 
242 | Let's re-run the import script `bb_import -f config/eagle.yaml`: this time all four transactions should be properly categorized.
243 | The `Set_Accounts` rules uses the `csv_index` to determine which index of the csv to analyze (remember, the indexes count starts from `0`) and the `csv_values` determines the string that should match the value of the index. If a match is found, both `from` and `to` accounts are set on the transaction.
244 | 
245 | ## Archive the CSV bank file
246 | 
247 | Once the CSV file is imported, we need to archive the CSV file. Note that this step is mandatory. If one do not need to archive the CSV file, it is important to clean the "staging" folder of the working file: `rm tmp/*.*`, assuming the default value of the `csv.target` property is used.
248 | 
249 | The archiving script simply moves the CSV file from the stage directory (`tmp`) to an `archive` directory. Additionally, it renames the CSV file by appending to the name the first and last date of the transaction.
250 | Let's take as an example the CSV file from Eagle bank. When the file is imported into the staging area, it gets renamed to `eag.csv`.
251 | The archive script analyzes the CSV file and extracts the first and last transaction, so that the file is renamed to `eag_2020-11-01_2020-11-04` and moved to the `archive` folder.
252 | Let's try:
253 | 
254 | ```
255 | bb_archive -f config/eagle.yaml
256 | ```
257 | 
258 | The output should look like:
259 | 
260 | ```
261 | ✓ detecting start and end date of transaction file...
262 | ✓ moving file to archive...
263 | ✓ removing temp folder
264 | ```
265 | 
266 | Note that the `bb_archive` has also removed the stage folder `tmp`.
267 | 
268 | 
269 | 
270 | 


--------------------------------------------------------------------------------
/tutorial/UK0000001444555.ldg:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/tutorial/accounts.ldg:
--------------------------------------------------------------------------------
 1 | ; account opening declaration
 2 | 2000-01-01 open Equity:Opening-Balances
 3 | 
 4 | ; *** ASSETS ***
 5 | ; Bob
 6 | 2000-01-01 open Assets:Bank1:Bob:Current     EUR
 7 | 2000-01-01 open Assets:Cash:Bob              EUR
 8 | 
 9 | 
10 | 
11 | 
12 | ; *** EXPENSES ***
13 | 2000-01-01 open Expenses:Unknown
14 | ; monthly expenses
15 | 2000-01-01 open Expenses:Groceries
16 | 2000-01-01 open Expenses:Medical
17 | 2000-01-01 open Expenses:EatingOut
18 | 2000-01-01 open Expenses:Clothing
19 | 2000-01-01 open Expenses:Utilities:Electricity
20 | 2000-01-01 open Expenses:Utilities:Gas
21 | 2000-01-01 open Expenses:Utilities:Internet
22 | 
23 | ; *** INCOME *** 
24 | 2000-01-01 open Income:Salary:Company1
25 | 2000-01-01 open Income:Salary:Company2
26 | 2000-01-01 open Income:Sale:Ebay
27 | 
28 | 
29 | ; *** LIABILITIES *** 
30 | 2000-01-01 open Liabilities:Master:Alice
31 | 


--------------------------------------------------------------------------------
/tutorial/assets/csv.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/luciano-fiandesio/beanborg/a2e3ddf5dfea1f23cf51e5bdaf930d5495616469/tutorial/assets/csv.png


--------------------------------------------------------------------------------
/tutorial/main.ldg:
--------------------------------------------------------------------------------
1 | option "title" "Shared Ledger"
2 | option "operating_currency" "EUR"
3 | 
4 | include "accounts.ldg"
5 | ;include "budget.ldg"
6 | 
7 | include "UK0000001444555.ldg"
8 | ;include "BANK200000.ldg"
9 | 


--------------------------------------------------------------------------------
/tutorial/test-data/eagle-bank-statement.csv:
--------------------------------------------------------------------------------
1 | OPEN BOOK;VALUE DATE;TX TYPE;BOOKING TEXT;AMOUNT;CURRENCY;ACCOUNT;IBAN
2 | 04.11.2020;04.11.2020;Direct Debit;"Fresh Food";-21,30;EUR;0000001;UK0000001444555
3 | 04.11.2020;03.11.2020;Credit;"Best Company";1000,00;EUR;0000001;UK0000001444555
4 | 01.11.2020;01.11.2020;Direct Debit;"Doctor Bill";-540,10;EUR;0000001;UK0000001444555
5 | 01.12.2020;01.11.2020;Cash Withdrawal;Bank Of Mars;-100;EUR;0000001;UK0000001444555
6 | 


--------------------------------------------------------------------------------