├── .envrc ├── .gitignore ├── LICENSE ├── MANIFEST.in ├── README.md ├── beanborg ├── __init__.py ├── arg_parser.py ├── bb_archive.py ├── bb_import.py ├── bb_mover.py ├── classification │ ├── __init__.py │ ├── classifier.py │ ├── custom_fuzzy_wordf_completer.py │ ├── data_loader.py │ ├── gpt_service.py │ ├── transaction_model.py │ └── ui_service.py ├── config.py ├── handlers │ ├── __init__.py │ └── amount_handler.py ├── importer.py ├── model │ ├── __init__.py │ └── transactions.py ├── rule_engine │ ├── Context.py │ ├── __init__.py │ ├── decision_tables.py │ ├── rules.py │ └── rules_engine.py └── utils │ ├── __init__.py │ ├── duplicate_detector.py │ ├── hash_utils.py │ ├── journal_utils.py │ └── string_utils.py ├── bin ├── bb_archive ├── bb_import └── bb_mover ├── requirements.txt ├── run_tests.sh ├── setup.py ├── tests ├── files │ ├── 1234.ldg │ ├── My_Custom_Rule.py │ ├── _1234.ldg │ ├── account.rules │ ├── amount_handler.yaml │ ├── asset.rules │ ├── bank1.yaml │ ├── bank1_custom_rule.yaml │ ├── bank1_ignore_at_pos.yaml │ ├── bank1_ignore_by_counterparty.yaml │ ├── bank1_ignore_contains_string_at_pos.yaml │ ├── bank1_replace_asset.yaml │ ├── bank1_replace_counterparty.yaml │ ├── bank1_replace_expense.yaml │ ├── payee.rules │ └── payee_with_comments.rules ├── test_config.py ├── test_currency_handler.py ├── test_decision_tables.py ├── test_duplicate_detector.py └── test_rules_engine.py ├── tox.ini └── tutorial ├── README.md ├── UK0000001444555.ldg ├── accounts.ldg ├── assets └── csv.png ├── main.ldg └── test-data └── eagle-bank-statement.csv /.envrc: -------------------------------------------------------------------------------- 1 | layout python 2 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | share/python-wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | MANIFEST 28 | .direnv 29 | # PyInstaller 30 | # Usually these files are written by a python script from a template 31 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 32 | *.manifest 33 | *.spec 34 | 35 | # Installer logs 36 | pip-log.txt 37 | pip-delete-this-directory.txt 38 | 39 | # Unit test / coverage reports 40 | htmlcov/ 41 | .tox/ 42 | .nox/ 43 | .coverage 44 | .coverage.* 45 | .cache 46 | nosetests.xml 47 | coverage.xml 48 | *.cover 49 | *.py,cover 50 | .hypothesis/ 51 | .pytest_cache/ 52 | cover/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | .pybuilder/ 76 | target/ 77 | 78 | # Jupyter Notebook 79 | .ipynb_checkpoints 80 | 81 | # IPython 82 | profile_default/ 83 | ipython_config.py 84 | 85 | # pyenv 86 | # For a library or package, you might want to ignore these files since the code is 87 | # intended to run in multiple environments; otherwise, check them in: 88 | # .python-version 89 | 90 | # pipenv 91 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 92 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 93 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 94 | # install all needed dependencies. 95 | #Pipfile.lock 96 | 97 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 98 | __pypackages__/ 99 | 100 | # Celery stuff 101 | celerybeat-schedule 102 | celerybeat.pid 103 | 104 | # SageMath parsed files 105 | *.sage.py 106 | 107 | # Environments 108 | .env 109 | .venv 110 | env/ 111 | venv/ 112 | ENV/ 113 | env.bak/ 114 | venv.bak/ 115 | 116 | # Spyder project settings 117 | .spyderproject 118 | .spyproject 119 | 120 | # Rope project settings 121 | .ropeproject 122 | 123 | # mkdocs documentation 124 | /site 125 | 126 | # mypy 127 | .mypy_cache/ 128 | .dmypy.json 129 | dmypy.json 130 | 131 | # Pyre type checker 132 | .pyre/ 133 | 134 | # pytype static type analyzer 135 | .pytype/ 136 | 137 | # Cython debug symbols 138 | cython_debug/ 139 | 140 | eag.yaml 141 | tmp 142 | deploy_local.sh 143 | rules 144 | todo.txt 145 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include requirements.txt 2 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Beanborg 2 | 3 | Beanborg automatically imports financial transactions from external CSV files into the [Beancount](https://github.com/beancount/beancount) bookkeeping system. It is designed to streamline transaction importing by matching data to the correct expense accounts and doing so quickly, even with multiple files. 4 | 5 | ## Requirements 6 | 7 | - Python 3 8 | - Beancount v2 9 | 10 | ## Goals and key features 11 | 12 | Beanborg has two main design goals: 13 | 14 | - automatic matching of transaction data with the correct Expense accounts 15 | - speed, capable of processing multiple financial CSV files in seconds. 16 | 17 | Example: 18 | 19 | Given the following transaction from a CSV file: 20 | 21 | ``` 22 | 04.11.2020;04.11.2020;Direct Debit;"Fresh Food Inc.";-21,30;EUR;0000001;UK0000001444555 23 | ``` 24 | 25 | Beanborg imports the transaction into Beancount and assigns the Account "Expense:Grocery" to the transaction: 26 | 27 | ``` 28 | 2020-11-04 * "Fresh Food Inc." "" 29 | csv: "04.11.2020,04.11.2020,Direct Debit,Fresh Food,-21,30,EUR,0000001,UK0000001444555" 30 | md5: "60a54f6ed13ae7b7e70fd475eb677511" 31 | Assets:Bank1:Bob:Current -21.30 EUR 32 | Expenses:Grocery 33 | ``` 34 | 35 | ## Additional features: 36 | 37 | - Extendable rule-based system for transaction categorization. 38 | - Duplicate transaction detection. 39 | - Transaction classification using machine learning (ML) and large language models (LLM) (optional). 40 | - Highly configurable with extensive rules. 41 | - Smart archiving: files are renamed with start and end dates after processing. 42 | 43 | 44 | ## Installation 45 | 46 | To install beanborg, use: 47 | 48 | ``` 49 | pip install git+https://github.com/luciano-fiandesio/beanborg.git 50 | ``` 51 | 52 | Fora specific branch: 53 | 54 | ``` 55 | pip install git+https://github.com/luciano-fiandesio/beanborg.git@BRANCH_NAME 56 | ``` 57 | 58 | ## Workflow 59 | 60 | Beanborg is based on a three-stage workflow: 61 | 62 | 1. Move the CSV file to the staging area. 63 | 2. Import the CSV into Beancount, categorizing transactions. 64 | 3. Archive the CSV after processing. 65 | 66 | ### Stage 1: Move Bank CSV File 67 | 68 | Move a bank CSV file to the staging area: 69 | 70 | ``` 71 | bb_mover -f ~/config/wells-fargo.yaml 72 | ``` 73 | 74 | ### Stage 2: Import the CSV into Beancount 75 | 76 | Import the CSV into Beancount, categorizing transactions: 77 | 78 | ``` 79 | bb_import -f ~/config/wells-fargo.yaml 80 | ``` 81 | 82 | ### Stage 3: Archive the CSV File 83 | Move the CSV file to the archive folder: 84 | 85 | ``` 86 | bb_archive -f ~/config/wells-fargo.yaml 87 | ``` 88 | 89 | ## Configuration 90 | 91 | Each financial institution requires a dedicated YAML configuration file that defines the structure of the CSV file and the rules applied during import. 92 | 93 | ### Sample configuration file 94 | 95 | ``` 96 | --- !Config 97 | csv: 98 | download_path: "/home/mike/downloads" 99 | name: wells-fargo 100 | bank_ref: wfa 101 | date_format: "%d/%m/%Y" 102 | skip: 1 103 | 104 | indexes: 105 | date: 1 106 | amount: 2 107 | counterparty: 6 108 | 109 | rules: 110 | beancount_file: 'main-ledger.ldg' 111 | rules_file: well-fargo.rules 112 | account: 565444499 113 | currency: USD 114 | ruleset: 115 | - Replace_Asset 116 | - Replace_Expense 117 | ``` 118 | 119 | ### Structure of a configuration file 120 | 121 | A Beanborg configuration must start with the `--- !Config` tag and has 3 main sections: 122 | 123 | #### csv 124 | 125 | The `csv` section of the configuration file determines the options related to the structure and location of the CVS file to import. 126 | Here are the list of options for the `csv` section: 127 | 128 | | Property | Description | Default | Example | 129 | |--------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|---------|--------------------------------| 130 | | `download_path` | Full path to the folder to which the CSV is downloaded to at the beginning of the import process. This option is only required by the `bb_mover` script. | | "/home/john/download" | 131 | | name | The name of the CSV file, at the time of download. Note that the name can be partial. For instance, is the CSV file is named "bank1-statement-03-2020", the `name` can be simply set to `bank1`. This option is only required by the `bb_mover` script. | | `bank1` | 132 | | `ref` | Once the CVS file is imported into the staging area, it gets renamed using the value of `ref`. It is recommended to use a short string to identify the financial institution. This option is used by all the scripts. | | `com` | 133 | | `separator` | The field delimiter used in the financial institution's CSV file. | , | | 134 | | `currency_sep` | The decimal separator used in the CSV file | . | | 135 | | `date_format` | Date format used in the CVS file. The format is based on strftime directives: https://strftime.org/. Note that the value must be in quotes | | "%d/%m/%Y" | 136 | | `skip` | Number of lines of the CSV file to skip during import | 1 | | 137 | | `target` | The folder name or path in which the CSV file is moved to during the first stage. s | tmp | | 138 | | `archive` | The folder name of path in which the CSV file is archived during the archive stage | archive | | 139 | | `post_move_script` | Path to a post-move script that is executed after the CSV file is moved into the work folder. The script must use a `shebang` (e.g. `#!/bin/bash`) in order to be executed. | | `/home/tom/scripts/convert.sh` | 140 | | keep_original | Keep the CSV file from the `download_path`. The default is to delete it after the move process. This option is only required by the `bb_mover` script. | `False` | `True` | 141 | 142 | #### indexes 143 | 144 | The `indexes` section of the configuration file allows mapping each CSV "column" (or index) to the information required to parse and import the data. In other words, each option is used by Beanborg to determine where the `date` or `amount` of each transaction is located on the CVS file. 145 | 146 | Note that the first index starts from `0`. 147 | 148 | | Property | Description | Default | 149 | |----------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|---------| 150 | | `date` | The index corresponding to the date of the transaction. | 0 | 151 | | `counterparty` | The index corresponding to the name of the counterparty of the transaction. | 3 | 152 | | `amount` | The index corresponding to the amount of the transaction (debit or credit). | 4 | 153 | | `account` | The index corresponding to the account of the transaction (e.g. the IBAN or ABA code). | 4 | 154 | | `currency` | The index corresponding to the currency of the transaction. | 5 | 155 | | `tx_type` | The index corresponding to the transaction type. | 2 | 156 | | `amount_in` | Some financial institutions, use separate indexes for debit and credit. In this case, it is possible to specify the index for the index corresponding to the credited amount. | | 157 | | `narration` | The index corresponding to the narration or reference field of the transaction. | | 158 | 159 | #### rules 160 | 161 | | Property | Description | Default | 162 | |--------------------------------|----------------------------------------------------------------------------------------------------------------------------|--------------------| 163 | | `beancount_file` | The master Beancount ledger file. This property is mandatory and it is required to by the duplication detection mechanism. | `main.ldg` | 164 | | `rules_folder` | The folder name in which custom rules and look-up tables files are stored | `rules` | 165 | | `account` | This property is normally used when a CSV file doesn't contain any account property (IBAN, ABA, account number, etc). | | 166 | | `currency` | Force a default currency | | 167 | | `default_expense` | Default expense account | `Expenses:Unknown` | 168 | | `force_negative` | TODO | False | 169 | | `invert_negative` | TODO | False | 170 | | `origin_account` | Specifies the origin account of each transaction | | 171 | | `ruleset` | List of rules to apply to the CSV file. See `rules` section. | | 172 | | `advanced_duplicate_detection` | Enable the advanced duplication detection rule (see Advanced Duplicate Detection section) | `true` | 173 | 174 | ## Rules 175 | 176 | Beanborg’s rules engine is highly customizable, allowing users to automate the categorization of transactions based on pre-existing rules. 177 | Each rule is referenced by name and can be used for tasks such as assigning accounts, ignoring transactions, or modifying transaction details like the counterparty's name. 178 | 179 | Some rules rely on **lookup tables**, which are semicolon-separated CSV files. These files contain three columns: `value`, `expression`, and `result`, allowing flexible criteria for matching and transforming data. 180 | 181 | - **value**: The string that the rule searches for. 182 | - **expression**: The matching criteria used by the rule, such as `equals`, `equals_ic`, `startsWith`, `endsWith`, `contains`, or `contains_ic`. 183 | - `equals_ic` and `contains_ic` are case-insensitive versions of `equals` and `contains`. 184 | - **result**: The output of the rule when a match is found. 185 | 186 | ### Example: Expense Categorization Rule 187 | 188 | For instance, if you want to categorize any transaction where the payee is "Walmart" under `Expenses:Groceries`, the lookup entry would be: 189 | 190 | `Walmart;equals;Expenses:Groceries` 191 | 192 | 193 | To ensure that any variation of "Walmart," regardless of case, is also matched, you can use: 194 | 195 | `Walmart;contains_ic;Expenses:Groceries` 196 | 197 | The `_ic` indicates `ignore case`. 198 | 199 | The following sections provide a detailed explanation of the rules available in Beanborg. 200 | 201 | #### Replace_Payee 202 | 203 | The `Replace_Payee` rule is used to modify the name of a transaction’s counterparty. This is useful when you want to standardize or adjust the names in your financial records. 204 | 205 | This rule requires a lookup file named `payee.rules`, which should be placed in the directory defined by the `rules.rules_folder` option in the configuration file. 206 | 207 | Suppose you want to modify a transaction where the counterparty is listed as "Fresh Food Inc." and replace it with "FRESH FOOD" when importing the data into the ledger. 208 | 209 | Given the following CSV transaction: 210 | 211 | ``` 212 | 04.11.2020;04.11.2020;Direct Debit;"Fresh Food Inc.";-21,30;EUR;0000001;UK0000001444555 213 | ``` 214 | 215 | You would follow these steps: 216 | 217 | 1. Add the `Replace_Payee` rule to the list of rules in the configuration file for the relevant financial institution. 218 | 2. In the `payee.rules` lookup file, add the following entry: 219 | 220 | ``` 221 | Fresh Food Inc.;equals;FRESH FOOD 222 | ``` 223 | 224 | This will ensure that the counterparty "Fresh Food Inc." is replaced with "FRESH FOOD" in your Beancount ledger. 225 | 226 | 227 | #### Replace_Expense 228 | 229 | The `Replace_Expense` rule is used to assign an account to a transaction based on the value of the `counterparty` index from the CSV file. This rule is particularly helpful for categorizing transactions into the appropriate expense accounts. 230 | 231 | This rule requires a lookup file named `account.rules`, which should be located in the directory defined by the `rules.rules_folder` option in the configuration file. 232 | 233 | Suppose you want to categorize a transaction where the counterparty is "Fresh Food Inc." under the account `Expenses:Grocery` when importing the data into Beancount. 234 | 235 | Given the following CSV transaction: 236 | 237 | ``` 238 | 04.11.2020;04.11.2020;Direct Debit;"Fresh Food Inc.";-21,30;EUR;0000001;UK0000001444555 239 | ``` 240 | 241 | You would follow these steps: 242 | 243 | 1. Add the `Replace_Expense` rule to the list of rules in the configuration file for the relevant financial institution. 244 | 2. In the `account.rules` lookup file, add the following entry: 245 | 246 | ``` 247 | Fresh Food Inc.;equals;Expenses:Groceries 248 | ``` 249 | 250 | This will ensure that any transaction with "Fresh Food Inc." as the counterparty will be assigned to the `Expenses:Grocery` account in your Beancount ledger. 251 | 252 | 253 | #### Replace_Asset 254 | 255 | 256 | The `Replace_Asset` rule assigns an "origin" account to a transaction based on the value of the `account` index in a CSV file. 257 | This rule is useful for ensuring that transactions are recorded with the correct source account in Beancount. 258 | 259 | The `Replace_Asset` rule is automatically added to the ruleset, even if it is not explicitly declared in the configuration file. 260 | 261 | ##### Origin Account Resolution 262 | 263 | The rule can resolve the origin account in two ways: 264 | 265 | 1. Using a lookup file named `asset.rules`, located in the directory defined by the `rules.rules_folder` option in the config file. 266 | 2. Using the `rules.origin_account` property specified directly in the configuration file. 267 | 268 | Suppose you want to import the following CSV transaction and assign the origin account as `Assets:Jim:Current`: 269 | 270 | ``` 271 | 04.11.2020;04.11.2020;Direct Debit;"Fresh Food Inc.";-21,30;EUR;0000001;UK0000001444555 272 | ``` 273 | 274 | ##### Steps: 275 | 276 | 1. Create an `asset.rules` lookup file and add the following entry: 277 | 278 | ``` 279 | value;expression;result 280 | UK0000001444555;equals;Assets:Jim:Current 281 | ``` 282 | 283 | This entry will match the `account` index value (`UK0000001444555`) and assign the origin account as `Assets:Jim:Current` in your Beancount ledger. 284 | If no match is found, the rule will default to `Assets:Unknown`. 285 | 286 | #### Handling Missing `account` Index 287 | 288 | If the CSV file does not contain an `account` index, you can specify the account directly in the configuration file by using the `account` property: 289 | 290 | ```yaml 291 | --- !Config 292 | ... 293 | rules: 294 | account: UK0000001444555 295 | ``` 296 | 297 | This will assign the account `Assets:Jim:Current` to all transactions in the CSV file, regardless of the actual account value in the CSV. 298 | 299 | Alternatively, you can set the `origin_account` property in the `rules` block and skip this rule completely. 300 | 301 | ```yaml 302 | --- !Config 303 | ... 304 | rules: 305 | origin_account: Assets:Jim:Current 306 | ``` 307 | 308 | #### Set_Accounts 309 | 310 | Assigns an "origin" account to a transaction, based on value of the `account` index of a CSV file row. 311 | This rule is useful to assign the correct source account of a CSV transaction. This rule is **implicitly added** to the ruleset, even if it doesn't get declared 312 | 313 | The rule can resolve the origin account in two ways: 314 | 315 | - using a look-up file named `asset.rules` located in the directory defined by the `rules.rules_folder` option of the config file 316 | - using the value of the property `rules.origin_account` of the config file in use 317 | 318 | As an example, let's take this CSV transaction. We want to import the transaction so that the origin account is set to `Assets:Jim:Current`. 319 | 320 | ``` 321 | 04.11.2020;04.11.2020;Direct Debit;"Fresh Food Inc.";-21,30;EUR;0000001;UK0000001444555 322 | ``` 323 | 324 | Add the `Replace_Asset` to the `ruleset` and create an `asset.rules` file. Add the following snippet to the `asset.rules` file: 325 | 326 | ``` 327 | value;expression;result 328 | UK0000001444555;equals;Assets:Jim:Current 329 | ``` 330 | 331 | The rule will match the value of the `account` CSV index (`UK0000001444555`) to `Assets:Jim:Current` and create the Beancount transaction. If no match is found, the rule will default to `Assets:Unknown`. 332 | 333 | In a scenario where a CSV file does not contain any `account` index, it is possible to specify the account value by setting the `account` property in the config file in use. 334 | 335 | ``` 336 | --- !Config 337 | ... 338 | rules: 339 | account: UK0000001444555 340 | ``` 341 | 342 | Note that in the majority of situations, it is more intuitive to set the `origin_account` property on the `rules` block and skip this rule completely. 343 | 344 | ``` 345 | --- !Config 346 | ... 347 | rules: 348 | origin_account: Assets:Jim:Current 349 | ``` 350 | 351 | #### Set_Accounts 352 | 353 | The `Set_Accounts` sets both the **origin** and **destination** account for a given transaction, based on one or more values of a given CSV index. 354 | This rule is useful for transactions like ATM withdrawals, where both accounts need to be defined. 355 | 356 | As an example, consider the following CSV transaction representing an ATM withdrawal: 357 | 358 | ``` 359 | 01.12.2020;01.11.2020;Cash Withdrawal;Bank Of Holland;-100;EUR;0000001;UK0000001444555 360 | ``` 361 | 362 | In this case, we want to set the **origin** account to `Assets:Jim:Current` and the **destination** account to `Assets:Jim:Cash`. 363 | 364 | The `Set_Accounts` rule can be configured as follows: 365 | 366 | ``` 367 | - name: Set_Accounts 368 | from: Assets:Jim:Current 369 | to: Assets:Jim:Cash 370 | csv_index: 2 371 | csv_values: Cash Withdrawal 372 | ``` 373 | 374 | 375 | - The rule points to `csv_index: 2`, which refers to the third column in the CSV (indexing starts from 0). 376 | - If the value at index 2 matches `Cash Withdrawal`, the origin account is set to `Assets:Jim:Current` and the destination account is set to `Assets:Jim:Cash`. 377 | 378 | The `Set_Accounts` rule supports multiple `csv_values` separated by a semicolon (`;`). 379 | If any of the specified values match, the rule is applied. 380 | For example, if you want the rule to apply to different forms of "withdrawal" in multiple languages: 381 | 382 | 383 | ``` 384 | - name: Set_Accounts 385 | from: Assets:Jim:Current 386 | to: Assets:Jim:Cash 387 | csv_index: 2 388 | csv_values: Cash Withdrawal;*Retiro*;*Ritiro* 389 | ``` 390 | 391 | - The `csv_values` are case-insensitive. 392 | - Wildcards are supported using `fnmatch`. In the example above, 393 | the wildcard * is used to match any string that contains `Retiro` or `Ritiro`. 394 | 395 | #### Ignore_By_Payee 396 | 397 | The `Ignore_By_Payee` rule can be used to ignore transactions based on the value of the `counterparty` index in a CSV file. 398 | This is useful when you want to exclude specific transactions from being imported into the ledger. 399 | 400 | 401 | Suppose you want to ignore any transactions where the counterparty is "Mc Donald" or "Best Shoes". You can configure the rule as follows: 402 | 403 | ``` 404 | - name: Ignore_By_Payee 405 | ignore_payee: 406 | - Mc Donald 407 | - Best Shoes 408 | ``` 409 | 410 | The names of counterparties in the `ignore_payee` list are case-insensitive. This means both "Mc Donald" and "mc donald" would be matched and ignored. 411 | 412 | 413 | #### Ignore_By_StringAtPos 414 | 415 | The `Ignore_By_StringAtPos` rule allows you to ignore a transaction based on the value found at a specific index in the CSV file. This is useful for filtering out transactions that meet specific criteria in a particular column. 416 | 417 | ### Example 418 | 419 | To ignore transactions where the value in index 4 (fifth column) matches `abc0102`, configure the rule like this: 420 | 421 | ```yaml 422 | - name: Ignore_By_StringAtPos 423 | ignore_string_at_pos: 424 | - abc0102;4 425 | ``` 426 | - The index in the CSV file starts from `0`, so `4` refers to the fifth column. 427 | - The values specified in `ignore_string_at_pos` are case-insensitive, meaning `abc0102` and `ABC0102` would both be matched and ignored. 428 | 429 | ### Custom rules 430 | 431 | TODO 432 | 433 | ### Advanced Duplicate Detection 434 | 435 | Beanborg employs a simple duplicate detection method. When a transaction is imported into the ledger, the transaction CSV data are hashed and the hash is permanently associated 436 | to the ledger entry (using [transaction metadata](https://beancount.github.io/docs/beancount_language_syntax.html#metadata)). 437 | 438 | Beanborg includes a robust duplicate detection mechanism to prevent importing the same transaction multiple times. This method works by hashing the transaction data from the CSV file and associating the resulting hash with the ledger entry using [transaction metadata](https://beancount.github.io/docs/beancount_language_syntax.html#metadata). 439 | 440 | 441 | #### Basic Duplicate Detection 442 | 443 | When a transaction is imported, Beanborg generates a hash of the CSV data. For example, consider the following CSV entry: 444 | 445 | ``` 446 | 2019-03-17,2019-03-18,Überweisung,nick sammy,-520,00,IT389328932723787832,Personal,E-d3be986080315683eee5efbeb297243a,Gebucht,Privat 447 | ``` 448 | 449 | 450 | The corresponding hash (`2454abe7257b2b40dfa9e5d24b6e16e7`) is stored in the ledger's metadata under the `md5` key. 451 | If you attempt to import the same CSV row again, Beanborg detects that the hash already exists and rejects the transaction, preventing duplicates. 452 | 453 | #### Handling Inconsistent Data 454 | 455 | In practice, banks may modify transaction details in the CSV file after the first export. For example, consider the following modified entry: 456 | 457 | ``` 458 | 2019-03-17,2019-03-18,Überweisung,Nick Sammy,-520,00,IT389328932723787832,Personal,E-d3be986080315683eee5efbeb297243a,Gebucht,Privat 459 | ``` 460 | 461 | In this case, the payee’s name has changed from `nick sammy` to `Nick Sammy`. Since this small variation alters the transaction's hash, Beanborg would treat it as a different entry, bypassing the basic duplicate detection mechanism. 462 | 463 | To address these inconsistencies, Beanborg implements a secondary, advanced duplicate detection system. In addition to hashing the transaction, it checks if a transaction with the **same date and amount** already exists in the ledger for the current account. If a potential duplicate is found, Beanborg prompts the user to confirm whether the transaction should be imported. 464 | 465 | The advanced duplicate detection can be disabled by setting the `advanced_duplicate_detection` option to `false` in the account’s configuration file, allowing Beanborg to rely solely on hash-based detection. 466 | 467 | ```yaml 468 | rules: 469 | advanced_duplicate_detection: false 470 | ``` 471 | 472 | ### Machine Learning-Based Transaction Categorization 473 | 474 | Beanborg integrates an advanced Machine Learning (ML) mechanism to automatically categorize transactions when rule-based categorization is not possible. This system ensures that transactions are accurately classified by leveraging both machine learning and, optionally, the ChatGPT API. 475 | 476 | 477 | #### How It Works 478 | 479 | When Beanborg is unable to categorize a transaction through its predefined rules, it invokes an ML model trained on historical data to predict the most likely categories. This provides an additional layer of automation to reduce the need for manual intervention. 480 | 481 | - **Top Predictions**: The system generates up to three category predictions using the ML model. These predictions are displayed to the user, who can select one of the suggested categories or manually assign a category if none of the suggestions are appropriate. 482 | 483 | - **Optional GPT Integration**: If enabled, a fourth prediction is provided by querying the ChatGPT API, offering an AI-based suggestion that complements the ML model's predictions. 484 | 485 | #### Prediction Workflow 486 | 487 | The categorization workflow follows a structured process: 488 | 489 | 1. **Transaction Evaluation**: If no rule matches a transaction, Beanborg invokes the ML model to generate category predictions. 490 | 2. **Top 3 ML Predictions**: The system displays the three most likely categories for the transaction based on the training dataset and the features extracted. 491 | 3. **User Interaction**: The user can choose one of the three ML-generated categories or manually assign a category if the predictions are not suitable. 492 | 4. **Optional GPT Suggestion**: If enabled, a fourth prediction generated by the ChatGPT API is displayed, offering an alternative suggestion. 493 | 5. **Dynamic Learning**: The system updates the training dataset based on the user's final choice, enabling continuous model improvement. 494 | 495 | #### Enabling the ChatGPT API predictions 496 | 497 | To enable the optional ChatGPT API-based prediction, follow these steps: 498 | 499 | 1. Set the `OPENAI_API_KEY` environment variable with your OpenAI API key. 500 | 2. Update the configuration file to activate the feature by setting the `rules.use_llm` property to `true`. 501 | 502 | Your configuration should look like this: 503 | 504 | ```yaml 505 | rules: 506 | use_llm: true 507 | ``` 508 | 509 | With these settings enabled, Beanborg will include an additional category prediction generated by the ChatGPT API alongside the machine learning model’s top predictions. 510 | 511 | -------------------------------------------------------------------------------- /beanborg/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/luciano-fiandesio/beanborg/a2e3ddf5dfea1f23cf51e5bdaf930d5495616469/beanborg/__init__.py -------------------------------------------------------------------------------- /beanborg/arg_parser.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | 3 | 4 | def eval_args(help_message): 5 | 6 | parser = argparse.ArgumentParser(description=help_message) 7 | 8 | parser.add_argument( 9 | "-f", 10 | "--file", 11 | help="Configuration file to load", 12 | required=True, 13 | ) 14 | 15 | parser.add_argument( 16 | "-v", "--debug", required=False, default=False, action="store_true" 17 | ) 18 | 19 | parser.add_argument( 20 | "--fix-only", 21 | required=False, 22 | default=False, 23 | action="store_true", 24 | help="Only fix transactions without an account", 25 | ) 26 | 27 | args = parser.parse_args() 28 | return args 29 | -------------------------------------------------------------------------------- /beanborg/bb_archive.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | 4 | __copyright__ = "Copyright (C) 2023 Luciano Fiandesio" 5 | __license__ = "GNU GPLv2" 6 | 7 | import csv 8 | import os 9 | import shutil 10 | import sys 11 | from datetime import datetime 12 | 13 | from rich import print as rprint 14 | 15 | from beanborg.arg_parser import eval_args 16 | from beanborg.config import init_config 17 | 18 | 19 | def main(): 20 | 21 | args = eval_args("Archives imported CVS file") 22 | config = init_config(args.file, args.debug) 23 | 24 | target_csv = os.path.join(config.csv.target, config.csv.ref + ".csv") 25 | 26 | if not os.path.isfile(target_csv): 27 | rprint(f"[red]file: {target_csv} does not exist![red]") 28 | sys.exit(-1) 29 | 30 | if not os.path.isdir(config.csv.archive): 31 | os.mkdir(config.csv.archive) 32 | 33 | dates = [] 34 | print("\u2713" + " detecting start and end date of transaction file...") 35 | with open(target_csv) as csv_file: 36 | csv_reader = csv.reader(csv_file, delimiter=config.csv.separator) 37 | for _ in range(config.csv.skip): 38 | next(csv_reader) # skip the line 39 | 40 | for row in csv_reader: 41 | try: 42 | dates.append( 43 | datetime.strptime( 44 | row[config.indexes.date].strip(), config.csv.date_format 45 | ) 46 | ) 47 | except Exception as ex: 48 | print("error: " + str(ex)) 49 | 50 | print("\u2713" + " moving file to archive...") 51 | os.rename( 52 | target_csv, 53 | config.csv.archive 54 | + "/" 55 | + config.csv.ref 56 | + "_" 57 | + str(min(dates).date()) 58 | + "_" 59 | + str(max(dates).date()) 60 | + ".csv", 61 | ) 62 | 63 | print("\u2713" + " removing temp folder") 64 | shutil.rmtree(config.csv.target) 65 | 66 | 67 | if __name__ == "__main__": 68 | main() 69 | -------------------------------------------------------------------------------- /beanborg/bb_import.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | 4 | __copyright__ = "Copyright (C) 2024 Luciano Fiandesio" 5 | __license__ = "GNU GPLv2" 6 | 7 | 8 | from beanborg.importer import Importer 9 | 10 | 11 | def main(): 12 | imp = Importer() 13 | imp.import_transactions() 14 | 15 | 16 | if __name__ == "__main__": 17 | main() 18 | -------------------------------------------------------------------------------- /beanborg/bb_mover.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | 4 | __copyright__ = "Copyright (C) 2023 Luciano Fiandesio" 5 | __license__ = "GNU GPLv2" 6 | 7 | import glob 8 | import os 9 | import shutil 10 | import sys 11 | from subprocess import CalledProcessError, check_call 12 | 13 | from rich import print as rprint 14 | 15 | from beanborg.arg_parser import eval_args 16 | from beanborg.config import init_config 17 | 18 | 19 | def main(): 20 | 21 | args = eval_args("Move bank csv file to processing folder") 22 | config = init_config(args.file, args.debug) 23 | current_dir = os.getcwd() 24 | # support path like ~/Downloads 25 | path = os.path.expanduser(config.csv.download_path) 26 | if not os.path.isdir(path): 27 | rprint(f"[red]folder: {config.csv.download_path} does not exist![/red]") 28 | sys.exit(-1) 29 | 30 | if not os.path.isdir(config.csv.target): 31 | os.mkdir(config.csv.target) 32 | 33 | # count number of files starting with: 34 | file_count = len(glob.glob1(path, config.csv.name + "*")) 35 | 36 | if file_count > 1: 37 | print( 38 | "more than one file starting with % s found in %s. \ 39 | Can not continue ." 40 | % (config.csv.name, config.csv.download_path) 41 | ) 42 | sys.exit(-1) 43 | 44 | if file_count == 0: 45 | rprint( 46 | f"[red]No file found in [bold]{config.csv.download_path}[/bold] " 47 | f"with name starting with: [bold]{config.csv.name}[/bold][/red]" 48 | ) 49 | sys.exit(-1) 50 | 51 | if config.csv.post_script_path and not os.path.isfile(config.csv.post_script_path): 52 | print("No post-move script found: %s" % (config.csv.post_script_path)) 53 | sys.exit(-1) 54 | 55 | for f in os.listdir(path): 56 | if f.startswith(config.csv.name): 57 | src = os.path.join(path, f) 58 | moved_csv = os.path.join(config.csv.target, config.csv.ref + ".csv") 59 | if config.csv.keep_original: 60 | shutil.copy(src, moved_csv) 61 | else: 62 | os.rename(src, moved_csv) 63 | 64 | if config.csv.post_script_path: 65 | try: 66 | check_call( 67 | [ 68 | config.csv.post_script_path, 69 | os.path.join(current_dir, moved_csv), 70 | ] 71 | ) 72 | except CalledProcessError as e: 73 | rprint( 74 | "[red]An error occurred executing: %s\n%s[/red]" 75 | % (config.csv.post_script_path, str(e)) 76 | ) 77 | print("Done :) ") 78 | 79 | 80 | if __name__ == "__main__": 81 | main() 82 | -------------------------------------------------------------------------------- /beanborg/classification/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/luciano-fiandesio/beanborg/a2e3ddf5dfea1f23cf51e5bdaf930d5495616469/beanborg/classification/__init__.py -------------------------------------------------------------------------------- /beanborg/classification/classifier.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | import pandas as pd 4 | from beancount.core.data import Posting 5 | from prompt_toolkit import prompt 6 | from prompt_toolkit.key_binding import KeyBindings 7 | from prompt_toolkit.keys import Keys 8 | from rich import print 9 | from rich.prompt import Confirm 10 | 11 | from beanborg.classification.custom_fuzzy_wordf_completer import ( 12 | CustomFuzzyWordCompleter, 13 | ) 14 | from beanborg.classification.data_loader import DataLoader 15 | from beanborg.classification.gpt_service import GPTService 16 | from beanborg.classification.transaction_model import TransactionModel 17 | from beanborg.classification.ui_service import UIService 18 | from beanborg.utils.journal_utils import JournalUtils 19 | from beanborg.utils.string_utils import StringUtils 20 | 21 | 22 | class Classifier: 23 | 24 | def __init__(self, data="training_data.csv", use_llm=False, bc_file=None): 25 | self.trainingDataFile = data 26 | self.use_llm = use_llm 27 | self.bc_file = bc_file 28 | self.training_data = DataLoader.load_data(self.trainingDataFile) 29 | try: 30 | self.model = TransactionModel(self.training_data, data) 31 | except Exception as e: 32 | print(f"Error initializing TransactionModel: {e}") 33 | self.model = None 34 | 35 | self.gpt_service = GPTService(self.use_llm) 36 | self.ui_service = UIService() 37 | 38 | def has_no_category(self, tx, args) -> bool: 39 | return tx.postings[1].account == args.rules.default_expense 40 | 41 | def get_day_of_month(self, date): 42 | return pd.to_datetime(date).day 43 | 44 | def get_day_of_week(self, date): 45 | return pd.to_datetime(date).dayofweek 46 | 47 | def get_predictions(self, text, day_of_month, day_of_week): 48 | 49 | if self.model is None: 50 | return [], [], self.get_llm_prediction(text) 51 | 52 | # Use the TransactionModel for predictions 53 | top_labels, top_probs = self.model.predict(text, day_of_month, day_of_week) 54 | 55 | alternative_label = self.get_llm_prediction(text) 56 | 57 | return top_labels, top_probs, alternative_label 58 | 59 | def confirm_classification(self, txs, args): 60 | return Confirm.ask( 61 | f"\n[red]You have [bold]{txs.count_no_category(args.rules.default_expense)}[/bold] " 62 | f"transactions without category, do you want to fix them now?[/red]" 63 | ) 64 | 65 | def get_llm_prediction(self, text): 66 | 67 | if self.use_llm: 68 | # This function queries the GPT service for a label prediction based on the provided text. 69 | # It uses the available accounts from the journal to help the GPT service make a more informed prediction. 70 | # If the GPT service is not available, it returns None. 71 | accounts = JournalUtils().get_accounts(self.bc_file) 72 | alternative_label = self.gpt_service.query_gpt_for_label(text, accounts) 73 | else: 74 | alternative_label = None 75 | 76 | return alternative_label 77 | 78 | def process_transaction(self, tx, index, txs, args): 79 | stripped_text = StringUtils.strip_digits(tx.payee.upper()) 80 | day_of_month = self.get_day_of_month(tx.date) 81 | day_of_week = self.get_day_of_week(tx.date) 82 | 83 | top_labels, top_probs, chatgpt_prediction = self.get_predictions( 84 | stripped_text, day_of_month, day_of_week 85 | ) 86 | self.ui_service.display_transaction( 87 | tx, top_labels, top_probs, chatgpt_prediction 88 | ) 89 | 90 | selected_category = self.get_user_selection( 91 | top_labels, chatgpt_prediction, args 92 | ) 93 | if selected_category is None: 94 | return "quit" 95 | elif selected_category: 96 | narration = self.get_user_narration() 97 | self.update_transaction(tx, index, txs, selected_category, narration) 98 | amount = tx.postings[0].units.number 99 | if selected_category != args.rules.default_expense: 100 | if self.model is not None: 101 | self.model.update_training_data( 102 | tx.date, 103 | stripped_text, 104 | amount, 105 | selected_category, 106 | day_of_month, 107 | day_of_week, 108 | ) 109 | else: 110 | row = pd.DataFrame( 111 | { 112 | "date": [tx.date], 113 | "desc": [stripped_text], 114 | "amount": [amount], 115 | "cat": [selected_category], 116 | } 117 | ) 118 | DataLoader.add_training_row(self, self.trainingDataFile, row) 119 | 120 | return "continue" 121 | 122 | def get_user_narration(self): 123 | narration = input( 124 | "Enter a comment for the transaction (press Enter to skip): " 125 | ).strip() 126 | return narration if narration else None 127 | 128 | def get_user_selection(self, top_labels, chatgpt_prediction, args): 129 | options = len(top_labels) + (1 if chatgpt_prediction else 0) 130 | if options == 0: 131 | return self.handle_custom_input(args) 132 | 133 | while True: 134 | selected_number = input( 135 | f"Enter your selection (1-{options}, or 'Enter' to choose the category, 'q' to quit): " 136 | ) 137 | if selected_number.lower() == "q": 138 | return None 139 | if selected_number.isdigit(): 140 | return self.handle_numeric_selection( 141 | int(selected_number), top_labels, chatgpt_prediction 142 | ) 143 | return self.handle_custom_input(args) 144 | 145 | def handle_numeric_selection(self, selected_number, top_labels, chatgpt_prediction): 146 | if chatgpt_prediction and selected_number == len(top_labels) + 1: 147 | return chatgpt_prediction 148 | elif 1 <= selected_number <= len(top_labels): 149 | return top_labels[selected_number - 1] 150 | return None 151 | 152 | def handle_custom_input(self, args): 153 | accounts = JournalUtils().get_accounts(args.rules.bc_file) 154 | account_completer = CustomFuzzyWordCompleter(accounts) 155 | kb = self.create_key_bindings() 156 | selected_category = prompt( 157 | "Enter account: ", 158 | completer=account_completer, 159 | complete_while_typing=True, 160 | key_bindings=kb, 161 | default=args.rules.default_expense, 162 | ) 163 | if selected_category not in accounts: 164 | print( 165 | "[bold red]Invalid account. Please select a valid account.[/bold red]" 166 | ) 167 | return self.handle_custom_input(args) 168 | return selected_category 169 | 170 | def create_key_bindings(self): 171 | kb = KeyBindings() 172 | 173 | @kb.add(Keys.Backspace) 174 | def _(event): 175 | event.current_buffer.delete_before_cursor(count=1) 176 | event.current_buffer.start_completion(select_first=False) 177 | 178 | return kb 179 | 180 | def update_transaction(self, tx, index, txs, category, narration=None): 181 | posting = Posting(category, None, None, None, None, None) 182 | new_postings = [tx.postings[0]] + [posting] 183 | new_tx = tx._replace(postings=new_postings) 184 | if narration: 185 | new_tx = new_tx._replace(narration=narration) 186 | txs.getTransactions()[index] = new_tx 187 | 188 | def classify(self, txs, args): 189 | if not self.confirm_classification(txs, args): 190 | return 191 | 192 | for i, tx in enumerate(txs.getTransactions()): 193 | if self.has_no_category(tx, args): 194 | result = self.process_transaction(tx, i, txs, args) 195 | if result == "quit": 196 | break 197 | -------------------------------------------------------------------------------- /beanborg/classification/custom_fuzzy_wordf_completer.py: -------------------------------------------------------------------------------- 1 | from prompt_toolkit.completion import Completion, FuzzyWordCompleter 2 | 3 | 4 | class CustomFuzzyWordCompleter(FuzzyWordCompleter): 5 | def get_completions(self, document, complete_event): 6 | word_before_cursor = document.get_word_before_cursor(WORD=True) 7 | for word in self.words: 8 | if word.lower().startswith(word_before_cursor.lower()): 9 | yield Completion(word, start_position=-len(word_before_cursor)) 10 | -------------------------------------------------------------------------------- /beanborg/classification/data_loader.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import pandas as pd 4 | 5 | 6 | class DataLoader: 7 | @staticmethod 8 | def load_data(filepath: str) -> pd.DataFrame: 9 | 10 | expanded_filepath = os.path.expanduser(filepath) 11 | if not os.path.exists(expanded_filepath): 12 | os.makedirs(os.path.dirname(expanded_filepath), exist_ok=True) 13 | with open(expanded_filepath, "w") as f: 14 | f.write("date,desc,amount,cat\n") 15 | 16 | data = pd.read_csv(filepath) 17 | data["date"] = pd.to_datetime(data["date"], format="%Y-%m-%d") 18 | data["day_of_month"] = pd.to_datetime(data["date"], errors="coerce").dt.day 19 | data["day_of_week"] = pd.to_datetime(data["date"], errors="coerce").dt.dayofweek 20 | data["desc"] = data["desc"].astype(str) 21 | return data 22 | 23 | @staticmethod 24 | def add_training_row(self, filepath: str, row: pd.Series): 25 | expanded_filepath = os.path.expanduser(filepath) 26 | if os.path.exists(expanded_filepath): 27 | data = pd.read_csv(filepath) 28 | data = pd.concat([data, row], ignore_index=True) 29 | data.to_csv(filepath, index=False) 30 | -------------------------------------------------------------------------------- /beanborg/classification/gpt_service.py: -------------------------------------------------------------------------------- 1 | from typing import List 2 | 3 | from openai import AuthenticationError, OpenAI 4 | 5 | 6 | class GPTService: 7 | def __init__(self, use_llm: bool): 8 | if use_llm: 9 | try: 10 | self.client = OpenAI() 11 | # Test the API key by making a simple request 12 | self.client.models.list() 13 | except AuthenticationError: 14 | self.client = None 15 | print("OpenAI API key is invalid or not set.") 16 | except Exception as e: 17 | self.client = None 18 | print(f"Failed to initialize OpenAI client: {str(e)}") 19 | 20 | def query_gpt_for_label(self, description: str, labels: List[str]) -> str: 21 | if not self.client: 22 | return "OpenAI not available" 23 | 24 | try: 25 | response = self.client.chat.completions.create( 26 | model="gpt-4", 27 | messages=[ 28 | { 29 | "role": "system", 30 | "content": "You are 'TransactionBud' a helpful and concise utility designed to categorize bank transactions efficiently. Your primary function is to assign a category to each transaction presented to you", 31 | }, 32 | { 33 | "role": "user", 34 | "content": f"Given the description '{description}', what would be the most appropriate category among the following: {', '.join(labels)}? Only output the category name without any additional text.", 35 | }, 36 | ], 37 | temperature=0.7, 38 | top_p=1, 39 | ) 40 | return response.choices[0].message.content 41 | except Exception as e: 42 | print(f"Failed to query GPT: {str(e)}") 43 | return "OpenAI not available" 44 | -------------------------------------------------------------------------------- /beanborg/classification/transaction_model.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import numpy as np 4 | import pandas as pd 5 | from imblearn.over_sampling import SMOTE 6 | from imblearn.pipeline import Pipeline as ImbPipeline 7 | from imblearn.pipeline import make_pipeline 8 | from sklearn.compose import ColumnTransformer 9 | from sklearn.feature_extraction.text import CountVectorizer 10 | from sklearn.neighbors import KNeighborsClassifier 11 | from sklearn.preprocessing import LabelEncoder, StandardScaler 12 | 13 | 14 | class TransactionModel: 15 | def __init__(self, training_data, data_file): 16 | self.training_data = training_data 17 | self.data_file = data_file 18 | self._create_and_fit_model() 19 | 20 | def _remove_single_sample_classes(self, X, y): 21 | class_counts = y.value_counts() 22 | classes_to_keep = class_counts[class_counts >= 2].index 23 | mask = y.isin(classes_to_keep) 24 | return X[mask], y[mask] 25 | 26 | def _create_and_fit_model(self): 27 | X = self.training_data[["desc", "day_of_month", "day_of_week"]] 28 | y = self.training_data["cat"] 29 | 30 | # Remove classes with only one sample 31 | X, y = self._remove_single_sample_classes(X, y) 32 | 33 | # Encode target labels 34 | self.label_encoder = LabelEncoder() 35 | y_encoded = self.label_encoder.fit_transform(y) 36 | 37 | # Create feature processing pipeline 38 | feature_pipeline = ColumnTransformer( 39 | [ 40 | ( 41 | "text", 42 | make_pipeline( 43 | CountVectorizer(analyzer=str.split), # Removed token_pattern 44 | StandardScaler(with_mean=False), 45 | ), 46 | "desc", 47 | ), 48 | ("num", StandardScaler(), ["day_of_month", "day_of_week"]), 49 | ] 50 | ) 51 | 52 | # Create KNN classifier 53 | n_neighbors = min(5, len(y) - 1) 54 | knn = KNeighborsClassifier(n_neighbors=n_neighbors) 55 | 56 | # Create pipeline with SMOTE 57 | self.model = ImbPipeline( 58 | [ 59 | ("features", feature_pipeline), 60 | ("smote", SMOTE(k_neighbors=min(5, min(y.value_counts()) - 1))), 61 | ("classifier", knn), 62 | ] 63 | ) 64 | 65 | # Fit the model 66 | self.model.fit(X, y_encoded) 67 | 68 | def predict(self, text, day_of_month, day_of_week, n=3): 69 | # Create a DataFrame for the input text with the same structure as the training data 70 | data = { 71 | "desc": [text], 72 | "day_of_month": [day_of_month], 73 | "day_of_week": [day_of_week], 74 | } 75 | input_df = pd.DataFrame(data) 76 | 77 | # Predict the probabilities for the input DataFrame 78 | probs = self.model.predict_proba(input_df) 79 | 80 | # Get the indices of the top n probabilities 81 | top_indices = np.argsort(probs[0])[-n:][::-1] 82 | 83 | # Map indices to class labels and probabilities 84 | top_classes = self.label_encoder.classes_[top_indices] 85 | top_probabilities = probs[0][top_indices] 86 | 87 | return top_classes, top_probabilities 88 | 89 | def update_training_data( 90 | self, date, description, amount, category, day_of_month, day_of_week 91 | ): 92 | """ 93 | Updates the training data with a new or existing entry and retrains the model. 94 | """ 95 | 96 | tokenized_description = self._tokenize_description(description) 97 | 98 | # Check if the description already exists 99 | existing_entry = self.training_data[ 100 | self.training_data["desc"] == tokenized_description 101 | ] 102 | 103 | if not existing_entry.empty: 104 | existing_category = existing_entry["cat"].iloc[0] 105 | 106 | if existing_category != category: 107 | # Conflict found: Ask user how to handle the conflicting category 108 | self._handle_existing_entry_conflict( 109 | tokenized_description, 110 | existing_category, 111 | date, 112 | amount, 113 | category, 114 | day_of_month, 115 | day_of_week, 116 | ) 117 | else: 118 | # Entry already exists with the same category, no update needed 119 | print( 120 | f"Entry already exists with category '{existing_category}'. Skipping update." 121 | ) 122 | return 123 | else: 124 | # Add a new entry 125 | self._add_new_entry( 126 | date, tokenized_description, amount, category, day_of_month, day_of_week 127 | ) 128 | print(f"New entry added: '{description}' with category '{category}'.") 129 | 130 | # Append the new data to the CSV file instead of rewriting it entirely 131 | self._append_to_csv( 132 | date, tokenized_description, amount, category, day_of_month, day_of_week 133 | ) 134 | 135 | self._create_and_fit_model() 136 | 137 | def _append_to_csv( 138 | self, date, description, amount, category, day_of_month, day_of_week 139 | ): 140 | """ 141 | Append the new entry to the CSV file without overwriting the whole file. 142 | Ensures a newline is present before appending the new entry. 143 | """ 144 | new_data = pd.DataFrame( 145 | { 146 | "date": [date], 147 | "desc": [description], 148 | "amount": [amount], 149 | "cat": [category], 150 | } 151 | ) 152 | 153 | # Check if the file already exists 154 | file_exists = os.path.isfile(self.data_file) 155 | 156 | # Ensure there's a newline at the end of the file before appending new data 157 | if file_exists: 158 | with open(self.data_file, "rb+") as f: 159 | f.seek(-1, os.SEEK_END) # Move to the last byte 160 | last_char = f.read(1) 161 | if last_char != b"\n": # Check if the last character is a newline 162 | f.write(b"\n") # If not, add a newline 163 | 164 | # Now append the new data 165 | new_data.to_csv(self.data_file, mode="a", header=False, index=False) 166 | 167 | def _handle_existing_entry_conflict( 168 | self, 169 | description, 170 | existing_category, 171 | date, 172 | amount, 173 | new_category, 174 | day_of_month, 175 | day_of_week, 176 | ): 177 | """ 178 | Handle the case where an entry with the same description exists but has a different category. 179 | Allows the user to choose between updating, adding a new entry, or skipping. 180 | """ 181 | print( 182 | f"Description '{description}' already exists with category '{existing_category}'." 183 | ) 184 | action = input( 185 | "Choose action:\n" 186 | "1. Update existing entry\n" 187 | "2. Add new entry\n" 188 | "3. Skip update\n" 189 | "Enter choice (1/2/3): " 190 | ) 191 | 192 | if action == "1": 193 | # Update the existing entry with the new category 194 | self.training_data.loc[self.training_data["desc"] == description, "cat"] = ( 195 | new_category 196 | ) 197 | print( 198 | f"Existing entry for '{description}' updated to category '{new_category}'." 199 | ) 200 | elif action == "2": 201 | # Add a new entry despite the conflict 202 | self._add_new_entry( 203 | date, description, amount, new_category, day_of_month, day_of_week 204 | ) 205 | print( 206 | f"New entry added for '{description}' with category '{new_category}'." 207 | ) 208 | else: 209 | # Skip the update process 210 | print("Update skipped.") 211 | 212 | def _add_new_entry( 213 | self, date, description, amount, category, day_of_month, day_of_week 214 | ): 215 | """ 216 | Add a new entry to the training data. 217 | """ 218 | new_data = pd.DataFrame( 219 | { 220 | "date": [date], 221 | "desc": [description], 222 | "amount": [amount], 223 | "cat": [category], 224 | "day_of_month": [day_of_month], 225 | "day_of_week": [day_of_week], 226 | } 227 | ) 228 | self.training_data = pd.concat( 229 | [self.training_data, new_data], ignore_index=True 230 | ) 231 | 232 | def _tokenize_description(self, description): 233 | """ 234 | Tokenize the description using CountVectorizer. 235 | """ 236 | vectorizer = CountVectorizer(analyzer=str.split) 237 | tokens = vectorizer.build_analyzer()(description) 238 | return " ".join(tokens) 239 | -------------------------------------------------------------------------------- /beanborg/classification/ui_service.py: -------------------------------------------------------------------------------- 1 | from typing import List 2 | 3 | from beancount.parser.printer import format_entry 4 | from rich import box 5 | from rich.console import Console 6 | from rich.panel import Panel 7 | from rich.syntax import Syntax 8 | 9 | 10 | class UIService: 11 | @staticmethod 12 | def display_transaction( 13 | tx, top_labels: List[str], top_probs: List[float], chatgpt_prediction: str 14 | ): 15 | console = Console() 16 | console.clear() 17 | 18 | # Convert the transaction to a string and apply syntax highlighting 19 | tx_str = format_entry(tx) 20 | highlighted_tx = Syntax(tx_str, "python", theme="monokai", line_numbers=False) 21 | 22 | tx_panel = Panel( 23 | highlighted_tx, 24 | title="Transaction", 25 | width=80, 26 | expand=False, 27 | border_style="cyan", 28 | box=box.ROUNDED, 29 | ) 30 | 31 | predictions_content = ["Top 3 predictions:"] 32 | for i, (label, prob) in enumerate(zip(top_labels, top_probs), 1): 33 | predictions_content.append( 34 | f"[bold cyan]{i}.[/] [cyan]{label}[/] ({prob:.2f})" 35 | ) 36 | if chatgpt_prediction: 37 | predictions_content.append( 38 | f"[bold cyan]{len(top_labels) + 1}.[/] ChatGPT: [cyan]{chatgpt_prediction}[/]" 39 | ) 40 | 41 | console.print(tx_panel) 42 | 43 | # Only print the predictions panel if there are predictions to show 44 | if predictions_content and (len(predictions_content) > 1 or chatgpt_prediction): 45 | pred_panel = Panel( 46 | "\n".join(predictions_content), 47 | title="Predictions", 48 | width=80, 49 | expand=False, 50 | border_style="magenta", 51 | box=box.ROUNDED, 52 | ) 53 | console.print(pred_panel) 54 | -------------------------------------------------------------------------------- /beanborg/config.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | 4 | import yaml 5 | 6 | 7 | class Rules: 8 | def __init__( 9 | self, 10 | bc_file=None, 11 | rules_folder=None, 12 | account=None, 13 | currency=None, 14 | default_expense=None, 15 | force_negative=None, 16 | invert_negative=None, 17 | origin_account=None, 18 | ruleset=[], 19 | advanced_duplicate_detection=None, 20 | training_data=None, 21 | use_llm=None, 22 | ): 23 | self.bc_file = bc_file 24 | self.rules_folder = rules_folder 25 | self.account = account 26 | self.currency = currency 27 | self.default_expense = default_expense 28 | self.force_negative = force_negative 29 | self.invert_negative = invert_negative 30 | self.origin_account = origin_account 31 | self.ruleset = ruleset 32 | self.advanced_duplicate_detection = advanced_duplicate_detection 33 | self.training_data = training_data 34 | self.use_llm = use_llm 35 | 36 | 37 | class Indexes: 38 | def __init__( 39 | self, 40 | date=None, 41 | counterparty=None, 42 | amount=None, 43 | account=None, 44 | currency=None, 45 | tx_type=None, 46 | amount_in=None, 47 | narration=None, 48 | ): 49 | self.date = date 50 | self.counterparty = counterparty 51 | self.amount = amount 52 | self.account = account 53 | self.currency = currency 54 | self.tx_type = tx_type 55 | self.amount_in = amount_in 56 | self.narration = narration 57 | 58 | 59 | class Csv: 60 | def __init__( 61 | self, 62 | download_path, 63 | name, 64 | ref, 65 | separator=None, 66 | date_format=None, 67 | skip=None, 68 | target=None, 69 | archive=None, 70 | post_script_path=None, 71 | keep_original=None, 72 | ): 73 | self.download_path = download_path 74 | self.name = name 75 | self.ref = ref 76 | self.separator = separator 77 | self.date_format = date_format 78 | self.skip = skip 79 | self.target = target 80 | self.archive = archive 81 | self.post_script_path = post_script_path 82 | self.keep_original = keep_original 83 | 84 | 85 | class Config: 86 | def __init__(self, csv, indexes, rules, debug=False): 87 | self.csv = csv 88 | self.indexes = indexes 89 | self.rules = rules 90 | self.debug = debug 91 | 92 | def load(loader, node): 93 | values = loader.construct_mapping(node, deep=True) 94 | 95 | csv_data = values["csv"] 96 | 97 | csv = Csv( 98 | csv_data["download_path"], 99 | csv_data["name"], 100 | csv_data["bank_ref"], 101 | csv_data.get("separator", ","), 102 | csv_data["date_format"], 103 | csv_data.get("skip", 1), 104 | csv_data.get("target", "tmp"), 105 | csv_data.get("archive_path", "archive"), 106 | csv_data.get("post_move_script"), 107 | csv_data.get("keep_original", False), 108 | ) 109 | 110 | idx = values.get("indexes", dict()) 111 | 112 | indexes = Indexes( 113 | idx.get("date", 0), 114 | idx.get("counterparty", 3), 115 | idx.get("amount", 4), 116 | idx.get("account", 1), 117 | idx.get("currency", 5), 118 | idx.get("tx_type", 2), 119 | idx.get("amount_in", None), 120 | idx.get("narration", None), 121 | ) 122 | 123 | rls = values.get("rules", dict()) 124 | 125 | rules = Rules( 126 | rls.get("beancount_file", "main.ldg"), 127 | rls.get("rules_folder", "rules"), 128 | rls.get("account", None), 129 | rls.get("currency", None), 130 | rls.get("default_expense", "Expenses:Unknown"), 131 | rls.get("force_negative", False), 132 | rls.get("invert_negative", False), 133 | rls.get("origin_account", None), 134 | rls.get("ruleset", []), 135 | rls.get("advanced_duplicate_detection", True), 136 | rls.get("training_data", "training_data.csv"), 137 | rls.get("use_llm", False), 138 | ) 139 | 140 | return Config(csv, indexes, rules) 141 | 142 | 143 | def init_config(file, debug): 144 | 145 | yaml.add_constructor("!Config", Config.load) 146 | 147 | if not os.path.isfile(file): 148 | print("file: %s does not exist!" % (file)) 149 | sys.exit(-1) 150 | 151 | with open(file, "r") as file: 152 | try: 153 | config = yaml.load(file, Loader=yaml.FullLoader) 154 | except yaml.scanner.ScannerError: 155 | print("file: %s is malformed, please check" % (file.name)) 156 | sys.exit(-1) 157 | 158 | config.debug = debug 159 | return config 160 | -------------------------------------------------------------------------------- /beanborg/handlers/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/luciano-fiandesio/beanborg/a2e3ddf5dfea1f23cf51e5bdaf930d5495616469/beanborg/handlers/__init__.py -------------------------------------------------------------------------------- /beanborg/handlers/amount_handler.py: -------------------------------------------------------------------------------- 1 | __copyright__ = "Copyright (C) 2022 Luciano Fiandesio" 2 | __license__ = "GNU GPLv2" 3 | 4 | from beancount.core.number import D 5 | 6 | 7 | class AmountHandler: 8 | 9 | # create mapping tables for currency conversion 10 | sign_trans = str.maketrans({"$": "", " ": ""}) # remove $ and space 11 | dot_trans = str.maketrans({".": "", ",": ""}) # remove . and , 12 | 13 | def handle(self, val, args): 14 | 15 | if args.indexes.amount_in: 16 | return self.__convert(val[args.indexes.amount_in].strip()) - self.__convert( 17 | val 18 | ) 19 | 20 | if args.rules.invert_negative and val[0] == "-": 21 | val = val.replace("-", "+") 22 | 23 | if args.rules.force_negative == 1 and val[0].isdigit(): 24 | val = "-" + val 25 | 26 | return self.__convert(val) 27 | 28 | def __convert(self, num, sign_trans=sign_trans, dot_trans=dot_trans): 29 | """ 30 | Converts the given string into a decimal, where the last 31 | two digits are always assumed to be the decimals: 32 | 33 | "22 000,76" -> 22000.76 34 | "22.000,76" -> 22000.76 35 | "22,000.76" -> 22000.76 36 | "1022000,76" -> 1022000.76 37 | "-1,022,000.76", -> -1022000.76 38 | "1022000", -> 1022000.0 39 | "22 000,76$", -> 22000.76 40 | "$22 000,76" -> 22000.76 41 | 42 | """ 43 | 44 | num = num.translate(sign_trans) 45 | num = num[:-3].translate(dot_trans) + num[-3:] 46 | return D(num.replace(",", ".")) 47 | -------------------------------------------------------------------------------- /beanborg/importer.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import csv 3 | import os 4 | import re 5 | import sys 6 | import traceback 7 | from dataclasses import dataclass 8 | from datetime import datetime, timedelta 9 | from random import SystemRandom 10 | 11 | from beancount.core.data import Amount 12 | from beancount.parser.printer import format_entry 13 | from rich import print as rprint 14 | from rich.table import Table 15 | 16 | from beanborg.arg_parser import eval_args 17 | from beanborg.classification.classifier import Classifier 18 | from beanborg.config import init_config 19 | from beanborg.handlers.amount_handler import AmountHandler 20 | from beanborg.model.transactions import Transactions 21 | from beanborg.rule_engine.Context import Context 22 | from beanborg.rule_engine.rules_engine import RuleEngine 23 | from beanborg.utils.duplicate_detector import ( 24 | hash_tuple, 25 | init_duplication_store, 26 | print_duplication_warning, 27 | to_tuple, 28 | ) 29 | from beanborg.utils.hash_utils import hash 30 | from beanborg.utils.journal_utils import JournalUtils 31 | 32 | 33 | @dataclass 34 | class ImportStats: 35 | tx_in_file: int = 0 36 | processed: int = 0 37 | error: int = 0 38 | no_category: int = 0 39 | hash_collision: int = 0 40 | ignored_by_rule: int = 0 41 | skipped_by_user: int = 0 42 | 43 | 44 | class Importer: 45 | """ 46 | Initialize the import rule engine using the arguments from 47 | the configuration file 48 | """ 49 | 50 | def debug(self): 51 | """check if the importer is started using the debug flag 52 | 53 | Returns: 54 | boolean: is debug 55 | """ 56 | return self.args.debug 57 | 58 | def log_error(self, row): 59 | """simple error logger""" 60 | print(f'CSV: {",".join(row)}') 61 | rprint("-" * 80) 62 | 63 | def __init__(self): 64 | self.stats = ImportStats() 65 | self.args = None 66 | self.accounts = set() 67 | self.txs = Transactions({}) 68 | 69 | def gen_datetime(self, min_year=1900, max_year=datetime.now().year): 70 | """generate a datetime in format yyyy-mm-dd hh:mm:ss.000000""" 71 | start = datetime(min_year, 1, 1, 00, 00, 00) 72 | years = max_year - min_year + 1 73 | end = start + timedelta(days=365 * years) 74 | return start + (end - start) * SystemRandom.random(self) 75 | 76 | def init_rule_engine(self): 77 | """ 78 | Initialize the import rule engine using the arguments from 79 | the configuration file 80 | """ 81 | 82 | folder = self.args.rules.rules_folder 83 | 84 | if ( 85 | len(self.args.rules.ruleset) > 1 86 | and not os.path.isfile(folder + "/asset.rules") 87 | and self.args.rules.account is None 88 | and self.args.rules.origin_account is None 89 | ): 90 | 91 | rprint( 92 | "[red]Please specify an account in your config file " 93 | "or create an entry in the asset.rules file[/red]" 94 | ) 95 | sys.exit(-1) 96 | 97 | return RuleEngine( 98 | Context( 99 | date_fomat=self.args.csv.date_format, 100 | default_expense=self.args.rules.default_expense, 101 | date_pos=self.args.indexes.date, 102 | payee_pos=self.args.indexes.counterparty, 103 | tx_type_pos=self.args.indexes.tx_type, 104 | account_pos=self.args.indexes.account, 105 | narration_pos=self.args.indexes.narration, 106 | account=self.args.rules.account, 107 | ruleset=self.args.rules.ruleset, 108 | rules_dir=folder, 109 | force_account=self.args.rules.origin_account, 110 | debug=self.args.debug, 111 | ) 112 | ) 113 | 114 | def print_summary(self): 115 | table = Table(title="Import Summary") 116 | table.add_column("Counter", style="magenta") 117 | table.add_column("Value", style="green", justify="right") 118 | table.add_row("csv tx count", str(self.stats.tx_in_file)) 119 | table.add_row("imported", str(self.stats.processed)) 120 | table.add_row("tx already present", str(self.stats.hash_collision)) 121 | table.add_row("tx ignored by rule", str(self.stats.ignored_by_rule)) 122 | table.add_row("tx skipped by user", str(self.stats.skipped_by_user)) 123 | 124 | if self.stats.error > 0: 125 | table.add_row("error", str(self.stats.error), style="red") 126 | else: 127 | table.add_row("error", str(self.stats.error)) 128 | table.add_row("tx without category", str(self.stats.no_category)) 129 | print("\n") 130 | rprint(table) 131 | 132 | def get_account(self, row): 133 | """get the account value for the given csv line 134 | or use the specified account 135 | """ 136 | if self.args.rules.account: 137 | return self.args.rules.account 138 | 139 | return row[self.args.indexes.account] 140 | 141 | def get_currency(self, row): 142 | """get the currency value for the given csv line or 143 | use the specified currency 144 | """ 145 | if self.args.rules.currency: 146 | return self.args.rules.currency 147 | return row[self.args.indexes.currency] 148 | 149 | def warn_hash_collision(self, row, md5): 150 | rprint( 151 | "[red]warning[/red]: " 152 | "a transaction with identical hash exists in " 153 | "the journal: " 154 | f"[bold]{md5}[/bold]" 155 | ) 156 | self.log_error(row) 157 | self.stats.hash_collision += 1 158 | 159 | def fetch_account_transactions(self, account): 160 | 161 | account_file = account + ".ldg" 162 | account_tx = ( 163 | init_duplication_store(account_file, self.args.rules.bc_file) 164 | if self.args.rules.advanced_duplicate_detection 165 | else {} 166 | ) 167 | return account_tx 168 | 169 | def verify_accounts_count(self): 170 | if len(self.accounts) > 1 and len(self.transactions) > 0: 171 | rprint( 172 | "[red]Expecting only one account in csv" 173 | f"file, found: {str(len(self.accounts))}[/red]" 174 | ) 175 | 176 | def verify_unique_transactions(self, account): 177 | 178 | account_txs = self.fetch_account_transactions(account) 179 | pre_trans = [] 180 | for key in sorted(self.txs.getTransactions()): 181 | # check if the transaction being imported matches another 182 | # existing transaction 183 | # in the current ledger file. 184 | tup = to_tuple(self.txs.getTransactions()[key]) 185 | if hash_tuple(tup) in account_txs: 186 | if print_duplication_warning(account_txs[hash_tuple(tup)]): 187 | pre_trans.append(self.txs[key]) 188 | else: 189 | pre_trans.append(self.txs.getTransactions()[key]) 190 | 191 | return Transactions(pre_trans) 192 | 193 | def write_tx(self, file_handler, tx): 194 | file_handler.write(format_entry(tx) + "\n") 195 | 196 | def write_to_ledger(self, account_file, transactions): 197 | 198 | with open(account_file, "a") as exc: 199 | for tx in transactions: 200 | self.write_tx(exc, tx) 201 | 202 | def fix_uncategorized_tx(self): 203 | """ 204 | Fix uncategorized transactions in the ledger file. 205 | """ 206 | 207 | # Get target account 208 | account = self.args.rules.account 209 | txs = JournalUtils().get_transactions_by_account_name( 210 | self.args.rules.bc_file, account 211 | ) 212 | # Get the filename of the first transaction 213 | filename = txs[0].meta["filename"] 214 | 215 | # filter out txs that have already been categorized 216 | txs = Transactions( 217 | [ 218 | tx 219 | for tx in txs 220 | if tx.postings[1].account == self.args.rules.default_expense 221 | ] 222 | ) 223 | Classifier( 224 | self.args.rules.training_data, 225 | self.args.rules.use_llm, 226 | self.args.rules.bc_file, 227 | ).classify(txs, self.args) 228 | 229 | with open(filename, "r") as file: 230 | content = file.read() 231 | for tx in txs.getTransactions(): 232 | self.update_transaction( 233 | content, filename, tx.meta["md5"], tx.postings[1].account 234 | ) 235 | 236 | def update_transaction(self, ledger_content, ledger_file, md5, new_category): 237 | 238 | # Find the transaction block with the given md5 239 | pattern = rf'(.*?md5: "{md5}".*?Expenses:Unknown.*?\n\n)' 240 | match = re.search(pattern, ledger_content, re.DOTALL) 241 | 242 | if match: 243 | transaction_block = match.group(1) 244 | 245 | # Replace 'Expenses:Unknown' with the new category 246 | updated_block = re.sub( 247 | r"( Expenses:Unknown)", f" {new_category}", transaction_block 248 | ) 249 | 250 | # Replace the old block with the updated one 251 | updated_content = ledger_content.replace(transaction_block, updated_block) 252 | 253 | # Write the updated content back to the file 254 | with open(ledger_file, "w") as file: 255 | file.write(updated_content) 256 | else: 257 | print(f"Skipping transaction with md5 {md5} not found.") 258 | 259 | def import_transactions(self): 260 | 261 | options = eval_args("Parse bank csv file and import into beancount") 262 | self.args = init_config(options.file, options.debug) 263 | 264 | if options.fix_only: 265 | self.fix_uncategorized_tx() 266 | return 267 | 268 | # transactions csv file to import 269 | import_csv = os.path.join(self.args.csv.target, f"{self.args.csv.ref}.csv") 270 | 271 | if not os.path.isfile(import_csv): 272 | rprint("[red]file: %s does not exist![red]" % (import_csv)) 273 | sys.exit(-1) 274 | 275 | rule_engine = self.init_rule_engine() 276 | tx_hashes = JournalUtils().transaction_hashes(self.args.rules.bc_file) 277 | 278 | with open(import_csv) as csv_file: 279 | csv_reader = csv.reader(csv_file, delimiter=self.args.csv.separator) 280 | for _ in range(self.args.csv.skip): 281 | next(csv_reader) # skip the line 282 | for row in csv_reader: 283 | self.stats.tx_in_file += 1 284 | try: 285 | # calculate hash of csv row 286 | md5 = hash(row) 287 | 288 | # keep track of the accounts for each tx: 289 | # the system expects one account per imported file 290 | res_account = self.get_account(row) 291 | if self.debug(): 292 | print("resolved account: " + str(res_account)) 293 | self.accounts.add(res_account) 294 | 295 | if md5 not in tx_hashes: 296 | self.process_tx(row, md5, rule_engine) 297 | else: 298 | self.warn_hash_collision(row, md5) 299 | 300 | except Exception as e: 301 | print("error: " + str(e)) 302 | self.log_error(row) 303 | self.stats.error += 1 304 | if self.debug(): 305 | traceback.print_exc() 306 | 307 | self.verify_accounts_count() 308 | working_account = self.accounts.pop() 309 | filtered_txs = self.verify_unique_transactions(working_account) 310 | 311 | self.stats.skipped_by_user = self.txs.count() - filtered_txs.count() 312 | self.stats.processed = filtered_txs.count() 313 | 314 | if filtered_txs.count_no_category(self.args.rules.default_expense) > 0: 315 | Classifier( 316 | self.args.rules.training_data, 317 | self.args.rules.use_llm, 318 | self.args.rules.bc_file, 319 | ).classify(filtered_txs, self.args) 320 | 321 | # write transactions to file 322 | account_file = working_account + ".ldg" 323 | self.write_to_ledger(account_file, filtered_txs.getTransactions()) 324 | self.print_summary() 325 | 326 | def validate(self, tx): 327 | """ 328 | Handle the origin account: if the tx processed by the 329 | rules engin has no origin account, try to assign one 330 | from the property file: args.rules.origin_account 331 | """ 332 | if tx.postings[0].account is None: 333 | raise Exception( 334 | "Unable to resolve the origin account for this transaction, " 335 | "please check that the `Replace_Asset` rule " 336 | "is in use for this account or set the " 337 | " `origin_account` property " 338 | "in the config file." 339 | ) 340 | 341 | return tx 342 | 343 | def enrich(self, row, tx, tx_date, md5): 344 | 345 | tx_meta = {"csv": ",".join(row), "md5": md5} 346 | 347 | # replace date """ 348 | tx = tx._replace(date=str(tx_date.date())) 349 | 350 | # add md5 and csv """ 351 | tx = tx._replace(meta=tx_meta) 352 | 353 | # get a decimal, with the minus sign, 354 | # if it's an expense 355 | amount = AmountHandler().handle( 356 | row[self.args.indexes.amount].strip(), self.args 357 | ) 358 | # add units (how much was spent) 359 | new_posting = tx.postings[0]._replace( 360 | units=Amount(amount, self.get_currency(row)) 361 | ) 362 | tx = tx._replace(postings=[new_posting] + [tx.postings[1]]) 363 | 364 | # add narration 365 | if self.args.indexes.narration: 366 | tx = tx._replace(narration=row[self.args.indexes.narration].strip()) 367 | 368 | if self.debug(): 369 | print(tx) 370 | 371 | return tx 372 | 373 | def process_tx(self, row, md5, rule_engine): 374 | 375 | tx = rule_engine.execute(row) 376 | 377 | if tx: 378 | # check if the a category is assigned 379 | if tx.postings[1].account == self.args.rules.default_expense: 380 | self.stats.no_category += 1 381 | 382 | tx_date = datetime.strptime( 383 | row[self.args.indexes.date].strip(), self.args.csv.date_format 384 | ) 385 | 386 | tx = self.validate(self.enrich(row, tx, tx_date, md5)) 387 | 388 | # generate a key based on: 389 | # - the tx date 390 | # - a random time (tx time is not important, but date is!) 391 | key = str(tx_date) + str(self.gen_datetime().time()) 392 | self.txs.getTransactions()[key] = tx 393 | 394 | else: 395 | self.stats.ignored_by_rule += 1 396 | -------------------------------------------------------------------------------- /beanborg/model/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/luciano-fiandesio/beanborg/a2e3ddf5dfea1f23cf51e5bdaf930d5495616469/beanborg/model/__init__.py -------------------------------------------------------------------------------- /beanborg/model/transactions.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | 4 | class Transactions: 5 | __transactions = {} 6 | 7 | def __init__(self, transactions): 8 | self.__transactions = transactions 9 | 10 | def count_no_category(self, default_expense) -> int: 11 | txs = [] 12 | for tx in self.__transactions: 13 | if tx.postings[1].account == default_expense: 14 | txs.append(tx) 15 | 16 | return len(txs) 17 | 18 | def count(self) -> int: 19 | return len(self.__transactions) 20 | 21 | def getTransactions(self): 22 | return self.__transactions 23 | -------------------------------------------------------------------------------- /beanborg/rule_engine/Context.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from dataclasses import dataclass 4 | 5 | 6 | @dataclass 7 | class Context: 8 | # ruleset 9 | ruleset: [] 10 | # custom rules folder 11 | rules_dir: str 12 | # the date format used in the CSV file 13 | date_fomat: str 14 | # the default account (Expense) to use for a the second "leg" of a 15 | # transaction 16 | default_expense: str 17 | # the index of the date field in the csv file 18 | date_pos: int 19 | # # the index of the counterparty field in the csv file 20 | payee_pos: int 21 | # the index of the transaction type field in the csv file 22 | tx_type_pos: int 23 | # the index of the account id field in the csv file 24 | account_pos: int 25 | # the index of the narration field in the csv file 26 | narration_pos: int 27 | # if the CSV file has no account id, use "account" to lookup the Account 28 | # Origin when using the Replace_Asset rule 29 | account: str 30 | # Force the Account Origin to the value specifed 31 | force_account: str 32 | # Output debug info 33 | debug: bool 34 | -------------------------------------------------------------------------------- /beanborg/rule_engine/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/luciano-fiandesio/beanborg/a2e3ddf5dfea1f23cf51e5bdaf930d5495616469/beanborg/rule_engine/__init__.py -------------------------------------------------------------------------------- /beanborg/rule_engine/decision_tables.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | import csv 4 | import os 5 | 6 | 7 | def init_decision_table(file, debug=False): 8 | table = {} 9 | tablefile = os.path.join(os.getcwd(), file) 10 | if not os.path.isfile(tablefile) or os.stat(file).st_size == 0: 11 | if debug: 12 | print("The decision table file: " + file + " is missing or empty.") 13 | else: 14 | with open(tablefile) as csv_file: 15 | csv_reader = csv.reader(decomment(csv_file), delimiter=";") 16 | next(csv_reader) # skip first line 17 | for row in csv_reader: 18 | if any(row): 19 | if len(row) == 3: 20 | table[row[0]] = (row[1], row[2]) 21 | else: 22 | print("invalid rule: " + ", ".join(row)) 23 | return table 24 | 25 | 26 | def decomment(csvfile): 27 | for row in csvfile: 28 | raw = row.split("#")[0].strip() 29 | if raw: 30 | yield row 31 | 32 | 33 | def resolve_from_decision_table(table, string, default): 34 | 35 | eq_check_func = { 36 | "equals": _equals, 37 | "equals_ic": _equals_ignore_case, 38 | "startsWith": _startsWith, 39 | "endsWith": _endsWith, 40 | "contains": _contains, 41 | "contains_ic": _contains_ignore_case, 42 | "eq": _equals, 43 | "sw": _startsWith, 44 | "ew": _endsWith, 45 | "co": _contains, 46 | } 47 | for k in table.keys(): 48 | t = table[k] 49 | eq_check_type = t[0] 50 | # TODO: do not fail if string (equals, contains, etc does not match) 51 | if eq_check_func.get(eq_check_type)(string, k): 52 | return t[1] 53 | 54 | return default 55 | 56 | 57 | def _equals(string_a, string_b): 58 | return string_a == string_b 59 | 60 | 61 | def _equals_ignore_case(string_a, string_b): 62 | return string_a.casefold() == string_b.casefold() 63 | 64 | 65 | def _startsWith(string_a, string_b): 66 | return string_a.startswith(string_b) 67 | 68 | 69 | def _endsWith(string_a, string_b): 70 | return string_a.endswith(string_b) 71 | 72 | 73 | def _contains(string_a, string_b): 74 | return string_b in string_a 75 | 76 | 77 | def _contains_ignore_case(string_a, string_b): 78 | return string_b.casefold() in string_a.casefold() 79 | -------------------------------------------------------------------------------- /beanborg/rule_engine/rules.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | import abc 4 | import fnmatch 5 | import os 6 | import sys 7 | 8 | from beancount.core.data import Posting 9 | 10 | from .Context import Context 11 | from .decision_tables import init_decision_table, resolve_from_decision_table 12 | 13 | 14 | class LookUpCache: 15 | """ 16 | Simple cache for lookup tables 17 | """ 18 | 19 | cache = dict() 20 | 21 | @staticmethod 22 | def init_decision_table(key, path): 23 | 24 | if key in LookUpCache.cache: 25 | return LookUpCache.cache[key] 26 | 27 | data = init_decision_table(path) 28 | LookUpCache.cache[key] = data 29 | return data 30 | 31 | 32 | class Rule: 33 | __metaclass__ = abc.ABCMeta 34 | 35 | def __init__(self, name: str, context: Context): 36 | self.name = name 37 | self.context = context 38 | 39 | @abc.abstractmethod 40 | def execute(self, csv_line, transaction=None, ruleDef=None): 41 | 42 | return 43 | 44 | def checkAccountFromTo(self, ruleDef): 45 | if ruleDef.get("from") is None or ruleDef.get("to") is None: 46 | raise Exception( 47 | "Account from and to required for rule: {rule}".format( 48 | rule=ruleDef.rule.__name__ 49 | ) 50 | ) 51 | 52 | def failIfAttributeMissing(self, ruleDef, attributeName): 53 | if ruleDef.get(attributeName) is None: 54 | raise Exception( 55 | "Attribute {attribute_name} required for rule: {rule} ".format( 56 | attribute_name=attributeName, rule=ruleDef.rule.__name__ 57 | ) 58 | ) 59 | 60 | 61 | class Set_Accounts(Rule): 62 | """ 63 | Assign a from/to asset or account to a transaction, depending on the 64 | value of a given cvs index. 65 | 66 | Rule attributes: 67 | name: rule name (Set_Accounts) 68 | from: asset or account 69 | to: asset or account 70 | csv_index: csv row index to analyze (base-0) 71 | csv_values: semicolon delimited list of strings. 72 | If any of the values matches the 73 | value at the csv row's index, the from/to values 74 | are assigned. 75 | The string evaluation is case insensitive. 76 | 77 | Example: 78 | - name: Set_Accounts 79 | from: Assets:Bank1:Bob:Savings 80 | to: Account:Groceries 81 | csv_index: 4 82 | csv_values: superfood;super_food; 83 | 84 | """ 85 | 86 | def __init__(self, name, context): 87 | Rule.__init__(self, name, context) 88 | 89 | def execute(self, csv_line, tx, ruleDef=None): 90 | 91 | # current value at index for the current row 92 | # csv_field_val = csv_line[ruleDef.csv_index].lower() 93 | csv_field_val = csv_line[ruleDef.get("csv_index")].lower().strip() 94 | 95 | # values specified in the rule definition 96 | vals = ruleDef.get("csv_values").split(";") 97 | 98 | match = False 99 | for val in vals: 100 | # Use fnmatch to allow wildcard matching 101 | if fnmatch.fnmatch(csv_field_val, val.lower().strip()): 102 | match = True 103 | break 104 | 105 | if match: 106 | newPosting = [ 107 | Posting( 108 | account=ruleDef.get("from"), 109 | units=None, 110 | cost=None, 111 | price=None, 112 | flag=None, 113 | meta=None, 114 | ), 115 | Posting( 116 | account=ruleDef.get("to"), 117 | units=None, 118 | cost=None, 119 | price=None, 120 | flag=None, 121 | meta=None, 122 | ), 123 | ] 124 | 125 | return (True, tx._replace(postings=newPosting)) 126 | 127 | return (False, tx) 128 | 129 | 130 | class Replace_Payee(Rule): 131 | """ 132 | Replaces the name of the transaction counterparty 133 | (for instance: McDonald -> Mc Donald Restaurant) 134 | The rule file containing the substitution rules 135 | must be located in the rules folder and must be named "payee.rules" 136 | """ 137 | 138 | def __init__(self, name, context): 139 | Rule.__init__(self, name, context) 140 | 141 | def execute(self, csv_line, tx, ruleDef=None): 142 | table = os.path.join(self.context.rules_dir, "payee.rules") 143 | if not os.path.isfile(table): 144 | print( 145 | "file: %s does not exist! - The 'Replace_Payee' rules \ 146 | requires the payee.rules file." 147 | % (table) 148 | ) 149 | sys.exit(-1) 150 | 151 | return ( 152 | False, 153 | tx._replace( 154 | payee=resolve_from_decision_table( 155 | LookUpCache.init_decision_table("payee", table), 156 | csv_line[self.context.payee_pos], 157 | csv_line[self.context.payee_pos], 158 | ) 159 | ), 160 | ) 161 | 162 | 163 | class Replace_Asset(Rule): 164 | """ 165 | Assigns an account to a transaction, based on value of the 'account' index 166 | of a CSV file row. 167 | This rule is useful to assign the correct source account 168 | of a CSV transaction. 169 | 170 | The rule is based on the 'asset.rules' look-up file. 171 | If no 'asset.rules' file is found, the account 172 | will be resolved to "Assets:Unknown" or 173 | to the value of the property `rules.origin_account` of the config file. 174 | """ 175 | 176 | def __init__(self, name, context): 177 | Rule.__init__(self, name, context) 178 | 179 | def execute(self, csv_line, tx=None, ruleDef=None): 180 | 181 | asset = None 182 | table = os.path.join(self.context.rules_dir, "asset.rules") 183 | if self.context.force_account: 184 | asset = self.context.force_account 185 | else: 186 | if not os.path.isfile(table): 187 | print( 188 | "file: %s does not exist! - \ 189 | The 'Replace_Asset' rules requires the asset.rules \ 190 | file." 191 | % (table) 192 | ) 193 | sys.exit(-1) 194 | 195 | asset = resolve_from_decision_table( 196 | LookUpCache.init_decision_table("asset", table), 197 | ( 198 | self.context.account 199 | if self.context.account is not None 200 | else csv_line[self.context.account_pos] 201 | ), 202 | "Assets:Unknown", 203 | ) 204 | 205 | if asset: 206 | posting = Posting(asset, None, None, None, None, None) 207 | new_postings = [posting] + [tx.postings[1]] 208 | return (False, tx._replace(postings=new_postings)) 209 | 210 | return (False, tx) 211 | 212 | 213 | class Replace_Expense(Rule): 214 | """ 215 | Categorizes a transaction by assigning the account 216 | extracted from a look-up table 217 | based on the 'payee_pos' index of a CSV file row. 218 | 219 | The rule is based on the 'payee.rules' look-up file. 220 | """ 221 | 222 | def __init__(self, name, context): 223 | Rule.__init__(self, name, context) 224 | 225 | def execute(self, csv_line, tx=None, ruleDef=None): 226 | table = os.path.join(self.context.rules_dir, "account.rules") 227 | 228 | if not os.path.isfile(table): 229 | print( 230 | "file: % s does not exist! - The 'Replace_Expense' rules \ 231 | requires the account.rules file." 232 | % (table) 233 | ) 234 | sys.exit(-1) 235 | 236 | expense = resolve_from_decision_table( 237 | LookUpCache.init_decision_table("account", table), 238 | csv_line[self.context.payee_pos], 239 | self.context.default_expense, 240 | ) 241 | if expense: 242 | posting = Posting(expense, None, None, None, None, None) 243 | new_postings = [tx.postings[0]] + [posting] 244 | return (False, tx._replace(postings=new_postings)) 245 | 246 | return (False, tx) 247 | 248 | 249 | class Ignore_By_Payee(Rule): 250 | def __init__(self, name, context): 251 | Rule.__init__(self, name, context) 252 | 253 | def execute(self, csv_line, tx=None, ruleDef=None): 254 | 255 | self.failIfAttributeMissing(ruleDef, "ignore_payee") 256 | for ignorablePayee in ruleDef.get("ignore_payee"): 257 | if ignorablePayee.lower() in csv_line[self.context.payee_pos].lower(): 258 | return (True, None) 259 | 260 | return (False, tx) 261 | 262 | 263 | class Ignore_By_StringAtPos(Rule): 264 | """ 265 | Ignores a transaction based on the value of the specified index. 266 | 267 | For instance, given this csv entry: 268 | 269 | 10.12.2022,bp-fuel,20US$ 270 | 271 | and this rule: 272 | 273 | - name: Ignore_By_ContainsStringAtPos 274 | ignore_string_at_pos: 275 | - bp-fuel;1 276 | 277 | The row will be ignored, because the string "bp-fuel" matches 278 | the index at position 1. 279 | 280 | Example: 281 | - name: Ignore_By_StringAtPos 282 | ignore_string_at_pos: 283 | - val;3 284 | """ 285 | 286 | def __init__(self, name, context): 287 | Rule.__init__(self, name, context) 288 | 289 | def execute(self, csv_line, tx=None, ruleDef=None): 290 | 291 | self.failIfAttributeMissing(ruleDef, "ignore_string_at_pos") 292 | for ignorable in ruleDef.get("ignore_string_at_pos"): 293 | pos = int(ignorable.split(";")[1]) 294 | strToIgnore = ignorable.split(";")[0] 295 | 296 | if strToIgnore.lower().strip() == csv_line[pos].lower().strip(): 297 | return (True, None) 298 | 299 | return (False, tx) 300 | 301 | 302 | class Ignore_By_ContainsStringAtPos(Rule): 303 | """ 304 | Ignores a transaction if the specified value is present 305 | in the specified index. 306 | For instance, given this csv entry: 307 | 308 | 10.12.2022,mega supermarket,20US$ 309 | 310 | and this rule: 311 | 312 | - name: Ignore_By_ContainsStringAtPos 313 | ignore_string_at_pos: 314 | - mega;1 315 | 316 | The row will be ignored, because the string "mega" is part of 317 | the index at position 1. 318 | 319 | Note that this rule supports multiple string specifications. 320 | 321 | Example: 322 | - name: Ignore_By_ContainsStringAtPos 323 | ignore_string_at_pos: 324 | - val;3 325 | - another val;6 326 | """ 327 | 328 | def __init__(self, name, context): 329 | Rule.__init__(self, name, context) 330 | 331 | def execute(self, csv_line, tx=None, ruleDef=None): 332 | 333 | self.failIfAttributeMissing(ruleDef, "ignore_string_contains_at_pos") 334 | 335 | for ignorable in ruleDef.get("ignore_string_contains_at_pos"): 336 | pos = int(ignorable.split(";")[1]) 337 | strToIgnore = ignorable.split(";")[0] 338 | if strToIgnore.lower() in csv_line[pos].lower(): 339 | return (True, None) 340 | -------------------------------------------------------------------------------- /beanborg/rule_engine/rules_engine.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | import fnmatch 4 | import os 5 | import sys 6 | import uuid 7 | from dataclasses import dataclass 8 | from typing import Dict, List 9 | 10 | from beancount.core.data import Posting, Transaction 11 | 12 | from .Context import Context 13 | from .rules import * 14 | 15 | __location__ = os.path.realpath(os.path.join(os.getcwd(), os.path.dirname(__file__))) 16 | 17 | 18 | @dataclass 19 | class RuleDef: 20 | rule: str 21 | attributes: Dict[str, List[str]] 22 | 23 | def get(self, key): 24 | return self.attributes[key] 25 | 26 | 27 | class Rule_Init(Rule): 28 | def __init__(self, name, context): 29 | Rule.__init__(self, name, context) 30 | 31 | def execute(self, csv_line, transaction=None): 32 | 33 | return ( 34 | False, 35 | Transaction( 36 | meta=None, 37 | date=None, 38 | flag="*", 39 | payee=None, 40 | narration=None, 41 | tags=None, 42 | links=None, 43 | postings=[ 44 | Posting( 45 | account=None, 46 | units=None, 47 | cost=None, 48 | price=None, 49 | flag=None, 50 | meta=None, 51 | ), 52 | Posting( 53 | account=None, 54 | units=None, 55 | cost=None, 56 | price=None, 57 | flag=None, 58 | meta=None, 59 | ), 60 | ], 61 | ), 62 | ) 63 | 64 | 65 | class RuleEngine: 66 | 67 | def handle(self, cr): 68 | 69 | return cr 70 | 71 | def __init__(self, ctx: Context): 72 | 73 | self._ctx = ctx 74 | self.rules = {} 75 | 76 | custom_rules = self.load_custom_rules() 77 | 78 | if self._ctx.ruleset is None: 79 | print( 80 | "\u26A0" 81 | + " no rules file spefified for this financial \ 82 | institution" 83 | ) 84 | self.rules = {} 85 | else: 86 | for yrule in self._ctx.ruleset: 87 | rule_props = {} 88 | for key in yrule: 89 | if key == "name": 90 | rule_name = yrule["name"] 91 | else: 92 | rule_props[key] = yrule.get(key) 93 | 94 | if rule_name in custom_rules: 95 | self.rules[rule_name] = RuleDef(custom_rules[rule_name], rule_props) 96 | else: 97 | unique_rule_name = rule_name + "|" + uuid.uuid4().hex.upper()[0:6] 98 | self.rules[unique_rule_name] = RuleDef( 99 | globals()[rule_name], rule_props 100 | ) 101 | # assign default rules, if they are not already specified 102 | if ctx.rules_dir and not self.is_rule_in_list("Replace_Asset"): 103 | self.rules["Replace_Asset"] = RuleDef(globals()["Replace_Asset"], None) 104 | 105 | def is_rule_in_list(self, name): 106 | for rule_name in self.rules: 107 | if rule_name.startswith(name): 108 | return True 109 | 110 | return False 111 | 112 | def load_custom_rules(self): 113 | 114 | custom_rulez = {} 115 | if self._ctx.rules_dir is not None: 116 | custom_rules_path = os.path.join(os.getcwd(), self._ctx.rules_dir) 117 | if not os.path.isdir(custom_rules_path): 118 | if self._ctx.debug: 119 | print("Custom rules folder not found...ignoring") 120 | return custom_rulez 121 | sys.path.append(custom_rules_path) 122 | custom_rules = fnmatch.filter(os.listdir(custom_rules_path), "*.py") 123 | for r in custom_rules: 124 | mod_name = r[:-3] 125 | mod = __import__(mod_name, globals={}) 126 | class_ = getattr(mod, mod_name) 127 | # TODO check if custom rule is of type rule before adding 128 | custom_rulez[mod_name] = class_ 129 | 130 | return custom_rulez 131 | 132 | def execute(self, csv_line): 133 | 134 | final, tx = Rule_Init("init", self._ctx).execute(csv_line) 135 | 136 | for key in self.rules: 137 | if not final: 138 | if self._ctx.debug: 139 | print("Executing rule: " + str(self.rules[key].rule)) 140 | rulez = self.rules[key].rule(key, self._ctx) 141 | final, tx = rulez.execute(csv_line, tx, self.rules[key]) 142 | 143 | return tx 144 | -------------------------------------------------------------------------------- /beanborg/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/luciano-fiandesio/beanborg/a2e3ddf5dfea1f23cf51e5bdaf930d5495616469/beanborg/utils/__init__.py -------------------------------------------------------------------------------- /beanborg/utils/duplicate_detector.py: -------------------------------------------------------------------------------- 1 | import hashlib 2 | 3 | from beancount import loader 4 | from beancount.core.data import Transaction 5 | from rich import print as rprint 6 | from rich.prompt import Confirm 7 | 8 | 9 | def hash_tuple(tuple): 10 | 11 | m = hashlib.md5() 12 | for s in tuple: 13 | m.update(str(s).encode("utf-8")) 14 | return m.hexdigest() 15 | 16 | 17 | def to_tuple(transaction): 18 | 19 | return (str(transaction.date), transaction.postings[0].units) 20 | 21 | 22 | def init_duplication_store(account, journal): 23 | """ 24 | Builds a map of existing transactions for the account being imported. 25 | Each map entry has an hash of the value as key and a tuple of 26 | transaction date and amount value. 27 | This map is used to report identical transactions being imported, 28 | should the standard hash based approach fail. 29 | """ 30 | transactions = {} 31 | entries, _, _ = loader.load_file(journal) 32 | for entry in entries: 33 | if isinstance(entry, Transaction) and entry.meta["filename"].endswith(account): 34 | tup = to_tuple(entry) 35 | transactions[hash_tuple(tup)] = tup 36 | 37 | return transactions 38 | 39 | 40 | def print_duplication_warning(tx): 41 | 42 | rprint( 43 | "[red]Warning[/red]: a transaction with identical date and" 44 | " amount already exists in the ledger. " 45 | f"\ndate: [bold]{tx[0]}[/bold]\namount [bold]{tx[1]}[/bold]" 46 | ) 47 | return Confirm.ask("Do you want to import it?") 48 | -------------------------------------------------------------------------------- /beanborg/utils/hash_utils.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | import hashlib 4 | 5 | 6 | def hash(csv_row): 7 | 8 | return hashlib.md5(",".join(csv_row).encode("utf-8")).hexdigest() 9 | -------------------------------------------------------------------------------- /beanborg/utils/journal_utils.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from beancount import loader 3 | from beancount.core.data import Transaction 4 | from beancount.core.getters import get_accounts 5 | 6 | 7 | class JournalUtils: 8 | 9 | def get_entries(self, journal): 10 | """ 11 | Load in-memory all the entries of the provided ledger. 12 | """ 13 | entries, _, _ = loader.load_file(journal) 14 | return entries 15 | 16 | def transaction_hashes(self, journal): 17 | """ 18 | Load in-memory all the hashes (md5 property) of the provided ledger. 19 | This is required for the duplication detecting algo 20 | """ 21 | 22 | md5s = [] 23 | entries = self.get_entries(journal) 24 | for entry in entries: 25 | if isinstance(entry, Transaction): 26 | md5 = entry.meta.get("md5", "") 27 | if md5: 28 | md5s.append(md5) 29 | return md5s 30 | 31 | def get_accounts(self, journal): 32 | 33 | return get_accounts(self.get_entries(journal)) 34 | 35 | def get_transactions_by_account_name(self, journal, account): 36 | """ 37 | Get all transactions for a given account name. 38 | """ 39 | entries = self.get_entries(journal) 40 | txs = [] 41 | for entry in entries: 42 | if isinstance(entry, Transaction): 43 | if str(entry.meta["filename"]).endswith(f"{account}.ldg"): 44 | txs.append(entry) 45 | return txs 46 | -------------------------------------------------------------------------------- /beanborg/utils/string_utils.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import re 3 | 4 | 5 | class StringUtils: 6 | 7 | def strip_digits(str): 8 | # return ''.join([c for c in str if not c.isdigit()]) 9 | return re.sub("[^A-Z ]", "", str) 10 | -------------------------------------------------------------------------------- /bin/bb_archive: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | import re 4 | import sys 5 | from beanborg import bb_archive 6 | if __name__ == '__main__': 7 | sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0]) 8 | sys.exit(bb_archive.main()) -------------------------------------------------------------------------------- /bin/bb_import: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | import re 4 | import sys 5 | from beanborg import bb_import 6 | if __name__ == '__main__': 7 | sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0]) 8 | sys.exit(bb_import.main()) 9 | -------------------------------------------------------------------------------- /bin/bb_mover: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | import re 4 | import sys 5 | from beanborg import bb_mover 6 | if __name__ == '__main__': 7 | sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0]) 8 | sys.exit(bb_mover.main()) -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | beancount>=2.3.5, <3.0.0 2 | pyyaml==6.0.1 3 | rich==13.6.0 4 | prompt-toolkit==3.0.39 5 | pandas==1.5.2 6 | PyYAML==6.0.1 7 | textblob==0.17.1 8 | openai>=1.14.1 9 | numpy==1.26.3 10 | imblearn==0.0 11 | -------------------------------------------------------------------------------- /run_tests.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -e 2 | PYTHONPATH=. pytest -s 3 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup, find_packages 2 | 3 | # Read requirements.txt 4 | with open('requirements.txt') as f: 5 | required = f.read().splitlines() 6 | 7 | setup( 8 | name='beanborg', 9 | version='0.1', 10 | author='Luciano Fiandesio', 11 | author_email='luciano@fiandes.io', 12 | url='https://github.com/luciano-fiandesio/beanborg', 13 | packages=find_packages(), 14 | install_requires=required, 15 | include_package_data=True, 16 | scripts=['bin/bb_import', 'bin/bb_mover', 'bin/bb_archive'] 17 | ) -------------------------------------------------------------------------------- /tests/files/1234.ldg: -------------------------------------------------------------------------------- 1 | 2020-02-13 * "Dummy Supermarket" "" 2 | Assets:MyBank:Savings -105.12 EUR 3 | Expenses:Groceries 4 | 5 | -------------------------------------------------------------------------------- /tests/files/My_Custom_Rule.py: -------------------------------------------------------------------------------- 1 | from beanborg.rule_engine.rules import * 2 | 3 | class My_Custom_Rule(Rule): 4 | def __init__(self, name, context): 5 | 6 | # invoking the __init__ of the parent class 7 | Rule.__init__(self, name, context) 8 | 9 | def execute(self, csv_line, tx = None, ruleDef = None ): 10 | 11 | self.checkAccountFromTo(ruleDef) 12 | 13 | if "Withdrawal".lower() in csv_line[self.context.tx_type_pos].lower(): 14 | cashPosting = [Posting( 15 | account=ruleDef.get("from"), 16 | units=None, 17 | cost=None, 18 | price=None, 19 | flag=None, 20 | meta=None), 21 | Posting( 22 | account=ruleDef.get("to"), 23 | units=None, 24 | cost=None, 25 | price=None, 26 | flag=None, 27 | meta=None)] 28 | return (True, tx._replace(postings=cashPosting)) 29 | 30 | return (False,tx) 31 | -------------------------------------------------------------------------------- /tests/files/_1234.ldg: -------------------------------------------------------------------------------- 1 | 2020-02-13 * "Dummy Supermarket changed" "" 2 | Assets:MyBank:Savings -105.12 EUR 3 | Expenses:Groceries 4 | -------------------------------------------------------------------------------- /tests/files/account.rules: -------------------------------------------------------------------------------- 1 | value;expression;result 2 | freshfood;sw;Expenses:Groceries -------------------------------------------------------------------------------- /tests/files/amount_handler.yaml: -------------------------------------------------------------------------------- 1 | --- !Config 2 | csv: 3 | download_path: "/Users/luciano/Desktop" 4 | name: bbk_statement 5 | bank_ref: bbk 6 | archive_path: archive2 7 | date_format: "%d/%m/%Y" 8 | 9 | indexes: 10 | date: 8 11 | counterparty: 9 12 | amount: 10 13 | account: 11 14 | currency: 12 15 | tx_type: 13 16 | -------------------------------------------------------------------------------- /tests/files/asset.rules: -------------------------------------------------------------------------------- 1 | value;expression;result 2 | ZZ03100400000608903100;eq;Assets:Bob:Savings 3 | -------------------------------------------------------------------------------- /tests/files/bank1.yaml: -------------------------------------------------------------------------------- 1 | --- !Config 2 | csv: 3 | download_path: "/Users/luciano/Desktop" 4 | name: bbk_statement 5 | bank_ref: bbk 6 | target: tmp2 7 | archive_path: archive2 8 | separator: '|' 9 | date_format: "%d/%m/%Y" 10 | currency_sep: "," 11 | skip: 3 12 | 13 | indexes: 14 | date: 8 15 | counterparty: 9 16 | amount: 10 17 | account: 11 18 | currency: 12 19 | tx_type: 13 20 | amount_in: 14 21 | 22 | rules: 23 | beancount_file: 'main1.ldg' 24 | #rules_file: luciano.amex.rules 25 | account: '1234567' 26 | currency: GBP 27 | default_expense: 'Expense:Magic' 28 | force_negative: true 29 | invert_negative: true 30 | ruleset: 31 | - name: hello_rule 32 | test: 1 -------------------------------------------------------------------------------- /tests/files/bank1_custom_rule.yaml: -------------------------------------------------------------------------------- 1 | --- !Config 2 | csv: 3 | download_path: "/Users/luciano/Desktop" 4 | name: bbk_statement 5 | bank_ref: bbk 6 | target: tmp2 7 | archive_path: archive2 8 | separator: '|' 9 | date_format: "%d/%m/%Y" 10 | currency_sep: "," 11 | skip: 3 12 | 13 | indexes: 14 | date: 8 15 | counterparty: 9 16 | amount: 10 17 | account: 11 18 | currency: 12 19 | tx_type: 13 20 | amount_in: 14 21 | 22 | rules: 23 | beancount_file: 'main1.ldg' 24 | #rules_file: luciano.amex.rules 25 | account: '1234567' 26 | currency: GBP 27 | default_expense: 'Expense:Magic' 28 | force_negative: true 29 | invert_negative: true 30 | ruleset: 31 | - name: My_Custom_Rule 32 | from: Assets:UK:Alice:Savings 33 | to: Assets:UK:Alice:Cash -------------------------------------------------------------------------------- /tests/files/bank1_ignore_at_pos.yaml: -------------------------------------------------------------------------------- 1 | --- !Config 2 | csv: 3 | download_path: "/Users/luciano/Desktop" 4 | name: bbk_statement 5 | bank_ref: bbk 6 | target: tmp2 7 | archive_path: archive2 8 | separator: '|' 9 | date_format: "%d/%m/%Y" 10 | currency_sep: "," 11 | skip: 3 12 | 13 | indexes: 14 | date: 8 15 | counterparty: 9 16 | amount: 10 17 | account: 11 18 | currency: 12 19 | tx_type: 13 20 | amount_in: 14 21 | 22 | rules: 23 | beancount_file: 'main1.ldg' 24 | #rules_file: luciano.amex.rules 25 | account: '1234567' 26 | currency: GBP 27 | default_expense: 'Expense:Magic' 28 | force_negative: true 29 | invert_negative: true 30 | ruleset: 31 | - name: Ignore_By_StringAtPos 32 | ignore_string_at_pos: 33 | - waiting;4 34 | -------------------------------------------------------------------------------- /tests/files/bank1_ignore_by_counterparty.yaml: -------------------------------------------------------------------------------- 1 | --- !Config 2 | csv: 3 | download_path: "/Users/luciano/Desktop" 4 | name: bbk_statement 5 | bank_ref: bbk 6 | target: tmp2 7 | archive_path: archive2 8 | separator: '|' 9 | date_format: "%d/%m/%Y" 10 | currency_sep: "," 11 | skip: 3 12 | 13 | indexes: 14 | date: 8 15 | counterparty: 9 16 | amount: 10 17 | account: 11 18 | currency: 12 19 | tx_type: 13 20 | amount_in: 14 21 | 22 | rules: 23 | beancount_file: 'main1.ldg' 24 | #rules_file: luciano.amex.rules 25 | account: '1234567' 26 | currency: GBP 27 | default_expense: 'Expense:Magic' 28 | force_negative: true 29 | invert_negative: true 30 | ruleset: 31 | - name: Ignore_By_Payee 32 | ignore_payee: 33 | - alfa 34 | - beta 35 | -------------------------------------------------------------------------------- /tests/files/bank1_ignore_contains_string_at_pos.yaml: -------------------------------------------------------------------------------- 1 | --- !Config 2 | csv: 3 | download_path: "/Users/luciano/Desktop" 4 | name: bbk_statement 5 | bank_ref: bbk 6 | target: tmp2 7 | archive_path: archive2 8 | separator: '|' 9 | date_format: "%d/%m/%Y" 10 | currency_sep: "," 11 | skip: 3 12 | 13 | indexes: 14 | date: 8 15 | counterparty: 9 16 | amount: 10 17 | account: 11 18 | currency: 12 19 | tx_type: 13 20 | amount_in: 14 21 | 22 | rules: 23 | beancount_file: 'main1.ldg' 24 | #rules_file: luciano.amex.rules 25 | account: '1234567' 26 | currency: GBP 27 | default_expense: 'Expense:Magic' 28 | force_negative: true 29 | invert_negative: true 30 | ruleset: 31 | - name: Ignore_By_ContainsStringAtPos 32 | ignore_string_contains_at_pos: 33 | - waiting;4 34 | -------------------------------------------------------------------------------- /tests/files/bank1_replace_asset.yaml: -------------------------------------------------------------------------------- 1 | --- !Config 2 | csv: 3 | download_path: "/Users/luciano/Desktop" 4 | name: bbk_statement 5 | bank_ref: bbk 6 | target: tmp2 7 | archive_path: archive2 8 | separator: '|' 9 | date_format: "%d/%m/%Y" 10 | currency_sep: "," 11 | skip: 3 12 | 13 | indexes: 14 | date: 8 15 | counterparty: 9 16 | amount: 10 17 | account: 11 18 | currency: 12 19 | tx_type: 13 20 | amount_in: 14 21 | 22 | rules: 23 | beancount_file: 'main1.ldg' 24 | #rules_file: luciano.amex.rules 25 | account: '1234567' 26 | currency: GBP 27 | default_expense: 'Expense:Magic' 28 | force_negative: true 29 | invert_negative: true 30 | ruleset: 31 | - name: Replace_Asset 32 | -------------------------------------------------------------------------------- /tests/files/bank1_replace_counterparty.yaml: -------------------------------------------------------------------------------- 1 | --- !Config 2 | csv: 3 | download_path: "/Users/luciano/Desktop" 4 | name: bbk_statement 5 | bank_ref: bbk 6 | target: tmp2 7 | archive_path: archive2 8 | separator: '|' 9 | date_format: "%d/%m/%Y" 10 | currency_sep: "," 11 | skip: 3 12 | 13 | indexes: 14 | date: 8 15 | counterparty: 9 16 | amount: 10 17 | account: 11 18 | currency: 12 19 | tx_type: 13 20 | amount_in: 14 21 | 22 | rules: 23 | beancount_file: 'main1.ldg' 24 | #rules_file: luciano.amex.rules 25 | account: '1234567' 26 | currency: GBP 27 | default_expense: 'Expense:Magic' 28 | force_negative: true 29 | invert_negative: true 30 | ruleset: 31 | - name: Replace_Payee 32 | -------------------------------------------------------------------------------- /tests/files/bank1_replace_expense.yaml: -------------------------------------------------------------------------------- 1 | --- !Config 2 | csv: 3 | download_path: "/Users/luciano/Desktop" 4 | name: bbk_statement 5 | bank_ref: bbk 6 | target: tmp2 7 | archive_path: archive2 8 | separator: '|' 9 | date_format: "%d/%m/%Y" 10 | currency_sep: "," 11 | skip: 3 12 | 13 | indexes: 14 | date: 8 15 | counterparty: 9 16 | amount: 10 17 | account: 11 18 | currency: 12 19 | tx_type: 13 20 | amount_in: 14 21 | 22 | rules: 23 | beancount_file: 'main1.ldg' 24 | #rules_file: luciano.amex.rules 25 | account: '1234567' 26 | currency: GBP 27 | default_expense: 'Expense:Magic' 28 | force_negative: true 29 | invert_negative: true 30 | ruleset: 31 | - name: Replace_Expense 32 | -------------------------------------------------------------------------------- /tests/files/payee.rules: -------------------------------------------------------------------------------- 1 | value;expression;result 2 | ford;contains;Ford Auto -------------------------------------------------------------------------------- /tests/files/payee_with_comments.rules: -------------------------------------------------------------------------------- 1 | value;expression;result 2 | # this is a comment 3 | ford;contains;Ford Auto -------------------------------------------------------------------------------- /tests/test_config.py: -------------------------------------------------------------------------------- 1 | from beanborg.rule_engine.rules_engine import RuleEngine 2 | from beanborg.rule_engine.Context import * 3 | from beanborg.rule_engine.decision_tables import * 4 | from beanborg.config import * 5 | 6 | def test_config1(): 7 | 8 | config = init_config('tests/files/bank1.yaml', False) 9 | 10 | assert config.csv.download_path == "/Users/luciano/Desktop" 11 | assert config.csv.name == "bbk_statement" 12 | assert config.csv.ref == "bbk" 13 | assert config.csv.target == 'tmp2' 14 | assert config.csv.archive == 'archive2' 15 | assert config.csv.separator == '|' 16 | assert config.csv.date_format == '%d/%m/%Y' 17 | assert config.csv.skip == 3 18 | 19 | assert config.indexes.date == 8 20 | assert config.indexes.counterparty == 9 21 | assert config.indexes.amount == 10 22 | assert config.indexes.account == 11 23 | assert config.indexes.currency == 12 24 | assert config.indexes.tx_type == 13 25 | assert config.indexes.amount_in == 14 26 | 27 | assert config.rules.bc_file == 'main1.ldg' 28 | assert config.rules.account == '1234567' 29 | assert config.rules.currency == 'GBP' 30 | assert config.rules.default_expense == "Expense:Magic" 31 | assert config.rules.force_negative == True 32 | assert config.rules.invert_negative == True 33 | 34 | assert len(config.rules.ruleset) == 1 35 | assert config.rules.ruleset[0]['name'] == 'hello_rule' 36 | assert config.rules.ruleset[0]['test'] == 1 37 | 38 | -------------------------------------------------------------------------------- /tests/test_currency_handler.py: -------------------------------------------------------------------------------- 1 | from beanborg.handlers.amount_handler import AmountHandler 2 | from beanborg.config import * 3 | from beancount.core.number import D 4 | 5 | def test_handler(): 6 | 7 | config = init_config('tests/files/amount_handler.yaml', False) 8 | 9 | handler = AmountHandler() 10 | 11 | assert D("100.00") == handler.handle("100.00", config) 12 | assert D("22000.76") == handler.handle("22 000,76", config) 13 | assert D("22000.76") == handler.handle("22.000,76", config) 14 | assert D("1022000.76") == handler.handle("1022000,76", config) 15 | assert D("-1022000.76") == handler.handle("-1,022,000.76", config) 16 | assert D("1022000.00") == handler.handle("1022000", config) 17 | assert D("22000.76") == handler.handle("22 000,76$", config) 18 | 19 | -------------------------------------------------------------------------------- /tests/test_decision_tables.py: -------------------------------------------------------------------------------- 1 | from beanborg.rule_engine.decision_tables import * 2 | 3 | 4 | def test_equal_value(): 5 | table = {} 6 | table["superman"] = ("equals", "batman") 7 | assert "batman" == resolve_from_decision_table(table, "superman", "mini") 8 | 9 | def test_equal_value_different_case(): 10 | table = {} 11 | table["superman"] = ("equals", "batman") 12 | assert "Batman" != resolve_from_decision_table(table, "superman", "mini") 13 | 14 | def test_equal_value_ignore_different_case(): 15 | table = {} 16 | table["rewe"] = ("equals_ic", "Expenses:Groceries") 17 | assert "Expenses:Groceries" == resolve_from_decision_table(table, "rewe", "Expenses:Unknown") 18 | 19 | def test_startsWith_value(): 20 | table = {} 21 | table["superman"] = ("startsWith", "batman") 22 | assert "batman" == resolve_from_decision_table(table, "superman_is_cool", "mini") 23 | 24 | def test_endsWith_value(): 25 | table = {} 26 | table["superman"] = ("endsWith", "batman") 27 | assert "batman" == resolve_from_decision_table(table, "hello_superman", "mini") 28 | 29 | def test_contains_value(): 30 | table = {} 31 | table["superman"] = ("contains", "batman") 32 | assert "batman" == resolve_from_decision_table( 33 | table, "hello_superman_hello", "mini" 34 | ) 35 | 36 | def test_contains_value_ignore_case(): 37 | table = {} 38 | table["rewe"] = ("contains_ic", "Expenses:Groceries") 39 | 40 | assert "Expenses:Groceries" == resolve_from_decision_table( 41 | table, "card transaction - supermarket REWE", "Expenses:Unknown" 42 | ) 43 | 44 | def test_loadfile(): 45 | table = init_decision_table("tests/files/payee_with_comments.rules") 46 | assert table["ford"] != None 47 | assert table["ford"][0] == "contains" 48 | assert table["ford"][1] == "Ford Auto" 49 | -------------------------------------------------------------------------------- /tests/test_duplicate_detector.py: -------------------------------------------------------------------------------- 1 | from beanborg.utils.duplicate_detector import * 2 | from beancount import loader 3 | 4 | def test_duplication(): 5 | 6 | # load dummy ledger file 7 | txs = init_duplication_store('1234.ldg', 'tests/files/1234.ldg' ) 8 | 9 | # load a second dummy ledger file, that contains an identical transaction 10 | entries, _, _ = loader.load_file('tests/files/_1234.ldg') 11 | for entry in entries: 12 | tup = to_tuple(entry) 13 | assert (hash_tuple(tup) in txs) 14 | 15 | 16 | 17 | -------------------------------------------------------------------------------- /tests/test_rules_engine.py: -------------------------------------------------------------------------------- 1 | from beanborg.rule_engine.rules_engine import RuleEngine 2 | from beanborg.rule_engine.Context import * 3 | from beanborg.rule_engine.decision_tables import * 4 | from beanborg.config import * 5 | 6 | def test_payee_replacement(): 7 | 8 | rule_engine = make_rule_engine('tests/files/bank1_replace_counterparty.yaml') 9 | 10 | entries = "31.10.2019,b,auszahlung,electro ford,x,ZZ03100400000608903100".split(",") 11 | tx = rule_engine.execute(entries) 12 | assert tx.payee == "Ford Auto" 13 | 14 | def test_asset_replacement(): 15 | 16 | rule_engine = make_rule_engine('tests/files/bank1_replace_asset.yaml') 17 | entries = "31.10.2019,b,auszahlung,electro ford,x,ZZ03100400000608903100".split(",") 18 | tx = rule_engine.execute(entries) 19 | assert tx.postings[0].account == "Assets:Bob:Savings" 20 | 21 | def test_expense_replacement(): 22 | 23 | rule_engine = make_rule_engine('tests/files/bank1_replace_expense.yaml') 24 | entries = "31.10.2019,b,auszahlung,freshfood Bonn,x,ZZ03100400000608903100".split( 25 | "," 26 | ) 27 | tx = rule_engine.execute(entries) 28 | assert tx.postings[1].account == "Expenses:Groceries" 29 | 30 | def test_ignore(): 31 | 32 | rule_engine = make_rule_engine('tests/files/bank1_ignore_by_counterparty.yaml') 33 | 34 | entries = "31.10.2019,b,auszahlung,alfa,x,ZZ03100400000608903100".split(",") 35 | tx = rule_engine.execute(entries) 36 | assert tx == None 37 | 38 | entries = "31.10.2019,b,auszahlung,beta,x,ZZ03100400000608903100".split(",") 39 | tx = rule_engine.execute(entries) 40 | assert tx == None 41 | 42 | def test_ignore_at_position(): 43 | 44 | rule_engine = make_rule_engine('tests/files/bank1_ignore_at_pos.yaml') 45 | entries = "31.10.2019,b,auszahlung,alfa,waiting,ZZ03100400000608903100".split(",") 46 | tx = rule_engine.execute(entries) 47 | assert tx == None 48 | 49 | def test_ignore_by_contains_string_at_position(): 50 | rule_engine = make_rule_engine('tests/files/bank1_ignore_contains_string_at_pos.yaml') 51 | entries = "31.10.2019,b,auszahlung,alfa,this is waiting alfa,ZZ03100400000608903100".split(",") 52 | tx = rule_engine.execute(entries) 53 | assert tx == None 54 | 55 | 56 | def test_custom_rule(): 57 | 58 | rule_engine = make_rule_engine('tests/files/bank1_custom_rule.yaml') 59 | entries = "31.10.2019,b,Withdrawal,alfa,waiting,ZZ03100400000608903100".split(",") 60 | tx = rule_engine.execute(entries) 61 | 62 | assert tx.postings[0].account == "Assets:UK:Alice:Savings" 63 | assert tx.postings[1].account == "Assets:UK:Alice:Cash" 64 | 65 | def test_no_rulefile(): 66 | 67 | rule_engine = RuleEngine( 68 | Context( 69 | rules_dir=None, 70 | account=None, 71 | date_fomat="%d.%m.%Y", 72 | default_expense="Expenses:Unknown", 73 | date_pos=0, 74 | payee_pos=3, 75 | tx_type_pos=2, 76 | narration_pos=-1, 77 | account_pos=5, 78 | ruleset=None, 79 | force_account=None, 80 | debug=False 81 | ) 82 | ) 83 | 84 | entries = "31.10.2019,b,Withdrawal,alfa,waiting,ZZ03100400000608903100".split(",") 85 | tx = rule_engine.execute(entries) 86 | 87 | # no exception - the transaction is empty 88 | assert tx 89 | 90 | def make_rule_engine(config_file): 91 | config = init_config(config_file, False) 92 | 93 | return RuleEngine( 94 | Context( 95 | ruleset=config.rules.ruleset, 96 | rules_dir="tests/files", 97 | account=None, 98 | date_fomat="%d.%m.%Y", 99 | default_expense="Expenses:Unknown", 100 | date_pos=0, 101 | payee_pos=3, 102 | tx_type_pos=2, 103 | narration_pos=-1, 104 | account_pos=5, 105 | force_account=None, 106 | debug=False 107 | ) 108 | ) 109 | -------------------------------------------------------------------------------- /tox.ini: -------------------------------------------------------------------------------- 1 | [tox] 2 | envlist = py{36,37,38,39} 3 | 4 | # Activate isolated build environment. tox will use a virtual environment 5 | # to build a source distribution from the source tree. For build tools and 6 | # arguments use the pyproject.toml file as specified in PEP-517 and PEP-518. 7 | isolated_build = true 8 | 9 | [testenv] 10 | deps = 11 | # If your project uses README.rst, uncomment the following: 12 | # readme_renderer 13 | flake8 14 | pytest 15 | commands = 16 | # This repository uses a Markdown long_description, so the -r flag to 17 | # `setup.py check` is not needed. If your project contains a README.rst, 18 | # use `python setup.py check -m -r -s` instead. 19 | #python setup.py check -m -s 20 | # flake8 . 21 | py.test tests {posargs} 22 | 23 | [flake8] 24 | exclude = .tox,*.egg,build,data 25 | #select = E,W,F 26 | extend-ignore = C408,B006,DUO130 27 | max-line-length = 120 -------------------------------------------------------------------------------- /tutorial/README.md: -------------------------------------------------------------------------------- 1 | # Beanborg tutorial 2 | 3 | This tutorial guides the user through the steps required to import financial transactions from a fictional bank (Eagle Bank) into an existing Beancount ledger, using [Beanborg](https://github.com/luciano-fiandesio/beanborg). 4 | 5 | ## Initial setup 6 | 7 | The tutorial is based on an existing Beancount setup, structred like [so](https://github.com/luciano-fiandesio/beanborg/tree/master/tutorial): 8 | 9 | ``` 10 | accounts.ldg 11 | main.ldg 12 | | 13 | |__ UK0000001444555.ldg 14 | ``` 15 | 16 | To get started, install beanborg using `pip`: 17 | 18 | ``` 19 | pip install git+https://github.com/luciano-fiandesio/beanborg.git 20 | ``` 21 | 22 | To get an idea of the Beanborg workflow and learn about the different configuration options, you might want to take a quick look at the project's [README](https://github.com/luciano-fiandesio/beanborg/blob/master/README.md). 23 | 24 | The goal of this tutorial is to be able to import the transactions from the [sample CSV](https://github.com/luciano-fiandesio/beanborg/blob/master/tutorial/test-data/eagle-bank-statement.csv) file into the ledger-managed `UK0000001444555.ldg` file. 25 | 26 | ## Creating a configuration file for Eagle bank 27 | 28 | Beanborg requires a configuration file for each type of CSV file that we wish to import. 29 | Normally, each CSV file is bound to a financial institution, so it's good practice to name our config file after the bank. In this case, `eagle.yaml`. 30 | 31 | Let's create a new folder where we will store the import configuraton. 32 | 33 | ``` 34 | mkdir config 35 | ``` 36 | 37 | Create a new `eagle.yaml` file in the `config` folder. 38 | 39 | Now, let's open the fictional CSV file, located in the `test-data` folder. It is important to understand the structure of the CSV file in order to configure Beanborg properly. 40 | 41 | ``` 42 | OPEN BOOK;VALUE DATE;TX TYPE;BOOKING TEXT;AMOUNT;CURRENCY;ACCOUNT;IBAN 43 | 04.11.2020;04.11.2020;Direct Debit;"Fresh Food";-21,30;EUR;0000001;UK0000001444555 44 | 04.11.2020;03.11.2020;Credit;"Best Company";1000,00;EUR;0000001;UK0000001444555 45 | 01.11.2020;01.11.2020;Direct Debit;"Doctor Bill";-540,10;EUR;0000001;UK0000001444555 46 | 01.12.2020;01.11.2020;Cash Withdrawal;Bank Of Mars;-100;EUR;0000001;UK0000001444555 47 | ``` 48 | 49 | By observing the CSV file, we can determine the following information: 50 | 51 | - we want to skip the first line 52 | - the field delimiter uses the `;` character rather than the more standard comma 53 | - the currency separator uses the `,` character rather than the more standard `.` 54 | - the date format uses day, month and year, separated by a dot. 55 | - in order to match the ledger file `UK0000001444555.ldg` to this account we can use the `IBAN` field of the CSV file 56 | 57 | Let's start creating the configuration file for Eagle Bank. Paste the following snippet in the previously created file,`eagle.yaml`: 58 | 59 | 60 | ``` 61 | --- !Config 62 | csv: 63 | download_path: !CHANGE ME! 64 | name: eagle 65 | bank_ref: eag 66 | separator: ';' 67 | date_format: "%d.%m.%Y" 68 | currency_sep: "," 69 | ``` 70 | 71 | During a normal import operation, the CSV file is downloaded from the bank app - mobile or web and placed in a download folder. For the sake of the tutorial, you can copy the file `eagle-bank-statement.csv` to your Downloads folder and replace `!CHANGE ME` with the path to the folder, e.g. `/Users/tom/Downloads`. 72 | 73 | Let's look at this initial configuration. 74 | The `name` property is required to find the CSV file in the path specified by the `download_path` property. It is enough to specify the first letters of the CSV file, without the `csv` extension. 75 | 76 | The `bank_ref` property is very important, because it is used by Beanborg to rename the CSV file and move it to the staging area. If one has multiple bank accounts to import, it is crucial that the value of `bank_ref` is unique. 77 | 78 | The `date_format`, `separator` and `currency_sep` should be self-explanatory. 79 | 80 | We don't need to specify the `skip` property, since the default value is `1`. 81 | 82 | Let's try to import the CVS file into the working area, using `bb_mover`. 83 | 84 | ``` 85 | bb_mover -f config/eagle.yaml 86 | ``` 87 | 88 | If the file is found, the script should return: 89 | 90 | ``` 91 | Done :) 92 | ``` 93 | 94 | ## Add the mapping information and rules to the configuration file 95 | 96 | In order to successfully import the transaction from Eagle Bank into our ledger, we need to supply some more information to Beanborg: CSV mapping info and rules. 97 | 98 | Append the following configuration to the `eagle.yaml` file: 99 | 100 | ``` 101 | indexes: 102 | date: 1 103 | tx_type: 2 104 | counterparty: 3 105 | amount: 4 106 | currency: 5 107 | account: 7 108 | ``` 109 | 110 | Note that the `indexes` block sits at the root of the yaml file (same "level" as `csv`) 111 | 112 | 113 | With this block of configuration we are instructing Beanborg about the data within our CSV file. 114 | This image should hopefully makes the concept more clear: 115 | 116 | ![Alt text](assets/csv.png) 117 | 118 | Beanborg is now able to map the most relevant information of the CSV file with the Beancount structure and create a valid transaction. 119 | 120 | The last section of the configuration relates to **rules**. 121 | 122 | Rules can be considered as a list of "actions" that are executed one after the other and are applied to each row of the CSV file we want to import. 123 | 124 | There are different type of rules: some can be used to change the Accounts of a transaction or ignore a specific transaction. 125 | 126 | Let's focus on a simple rule, that will assign the correct Expense to each transaction in our CSV file. 127 | 128 | 129 | Append the following configuration to the `eagle.yaml` file: 130 | 131 | ``` 132 | rules: 133 | ruleset: 134 | - name: Replace_Expense 135 | ``` 136 | 137 | Before importing the CSV data, we need one last step: a configuration file (named `asset.rules`) that helps Beanborg associate the bank account asset definition (`Assets:Bank1:Bob:Current`, which is the bank account defined in Beanborg used in this tutorial) to the bank account identifier in the CSV file (in this tutorial, the IBAN number). 138 | Note that this file is required by the `Replace_Asset` rule, which is automatically executed, even if it's not specified in the rules list. 139 | 140 | In Beanborg, all configuration files are placed into the `rules` folder - note that the folder name can be changed using the `rules_folder` property of the `rules` configuration. 141 | 142 | ``` 143 | mkdir rules 144 | cd rules 145 | touch asset.rules 146 | ``` 147 | 148 | Copy the following content into `asset.rules` 149 | 150 | ``` 151 | value;expression;result 152 | UK0000001444555;equals;Assets:Bank1:Bob:Current 153 | ``` 154 | 155 | 156 | It's now time to run the second Beanborg script, `bb_import`, which imports the transaction into the ledger. 157 | 158 | ``` 159 | bb_import -f config/eagle.yaml 160 | ``` 161 | 162 | The script should exit immediately with the following error: 163 | 164 | ``` 165 | file: rules/account.rules does not exist! - The 'Replace_Expense' rules requires the account.rules file. 166 | ``` 167 | 168 | The `Replace_Expense` rules requires an additional look-up table file to map counterparty names to Expense categories. 169 | This file (named `account.rules`) should be located in the `rules` folder. 170 | 171 | 172 | Create a new `account.rules` file in the `rules` folder and paste the following data: 173 | 174 | ``` 175 | value;expression;result 176 | Fresh Food;contains;Expenses:Groceries 177 | Best Company;contains;Expenses:Clothing 178 | Doctor Bill;eq;Expenses:Medical 179 | ``` 180 | 181 | Run `bb_import -f config/eagle.yaml` again and, this time, the import should be successful. 182 | 183 | ``` 184 | summary: 185 | 186 | csv tx count: 4 187 | imported: 4 188 | tx already present: 0 189 | ignored by rule: 0 190 | error: 0 191 | ``` 192 | 193 | Each row in the CSV file is matched against the `account.rules` file, and if the `counterparty` index matches the first part of the expression (e.g. `Fresh Food`), the second leg of the transaction is replaced with the propert Expenses category, in this case `Expenses:Groceries`. 194 | 195 | The `UK0000001444555.ldg` should now contain the 4 transactions from the CVS file and both "sides" of the transaction should be correctly set - except for one transaction: the cash withdrawal from bank of Mars. We will see how to correctly categorize this transaction as well. 196 | 197 | Running the same script again `bb_import -f config/eagle.yaml` will trigger the automatic duplication detection mechanism: 198 | 199 | ``` 200 | summary: 201 | 202 | csv tx count: 4 203 | imported: 0 204 | tx already present: 4 205 | ignored by rule: 0 206 | error: 0 207 | ``` 208 | 209 | Note that the value of `tx already present` is `4` and `imported` is set to `0`. 210 | 211 | At this time, Beanborg does not support executing the rules without importing the data. In order to show how to import the cash withdrawal entry from our sample bank file, we need to delete and recreate the sample ledger file: 212 | 213 | ``` 214 | rm UK0000001444555.ldg 215 | touch UK0000001444555.ldg 216 | ``` 217 | 218 | Let's take a look at the cash withdrawal entry from the CVS file: 219 | 220 | ``` 221 | 01.12.2020;01.11.2020;Cash Withdrawal;Bank Of Mars;-100;EUR;0000001;UK0000001444555 222 | ``` 223 | 224 | We want to create a transaction that has the origin account set to our bank and the destination account set to `Assets:Cash:Bob` 225 | We can create a new rule in the `account.rules` file: 226 | 227 | ``` 228 | Bank Of Mars;contains;Assets:Cash:Bob 229 | ``` 230 | 231 | but this is probably not such a good idea, because we may have multiple type of transactions from `Bank of Mars`, for instance bank fees. 232 | Since the CSV entry clearly specifies `Cash Withdrawal` as transaction type, we can simply add a new `Set_Accounts` rule that makes use of the transaction type to assign the accounts to the transaction; add the following rule definition to the `eagle.yaml`: 233 | 234 | ``` 235 | - name: Set_Accounts 236 | from: Assets:Bank1:Bob:Current 237 | to: Assets:Cash:Bob 238 | csv_index: 2 239 | csv_values: Cash Withdrawal 240 | ``` 241 | 242 | Let's re-run the import script `bb_import -f config/eagle.yaml`: this time all four transactions should be properly categorized. 243 | The `Set_Accounts` rules uses the `csv_index` to determine which index of the csv to analyze (remember, the indexes count starts from `0`) and the `csv_values` determines the string that should match the value of the index. If a match is found, both `from` and `to` accounts are set on the transaction. 244 | 245 | ## Archive the CSV bank file 246 | 247 | Once the CSV file is imported, we need to archive the CSV file. Note that this step is mandatory. If one do not need to archive the CSV file, it is important to clean the "staging" folder of the working file: `rm tmp/*.*`, assuming the default value of the `csv.target` property is used. 248 | 249 | The archiving script simply moves the CSV file from the stage directory (`tmp`) to an `archive` directory. Additionally, it renames the CSV file by appending to the name the first and last date of the transaction. 250 | Let's take as an example the CSV file from Eagle bank. When the file is imported into the staging area, it gets renamed to `eag.csv`. 251 | The archive script analyzes the CSV file and extracts the first and last transaction, so that the file is renamed to `eag_2020-11-01_2020-11-04` and moved to the `archive` folder. 252 | Let's try: 253 | 254 | ``` 255 | bb_archive -f config/eagle.yaml 256 | ``` 257 | 258 | The output should look like: 259 | 260 | ``` 261 | ✓ detecting start and end date of transaction file... 262 | ✓ moving file to archive... 263 | ✓ removing temp folder 264 | ``` 265 | 266 | Note that the `bb_archive` has also removed the stage folder `tmp`. 267 | 268 | 269 | 270 | -------------------------------------------------------------------------------- /tutorial/UK0000001444555.ldg: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /tutorial/accounts.ldg: -------------------------------------------------------------------------------- 1 | ; account opening declaration 2 | 2000-01-01 open Equity:Opening-Balances 3 | 4 | ; *** ASSETS *** 5 | ; Bob 6 | 2000-01-01 open Assets:Bank1:Bob:Current EUR 7 | 2000-01-01 open Assets:Cash:Bob EUR 8 | 9 | 10 | 11 | 12 | ; *** EXPENSES *** 13 | 2000-01-01 open Expenses:Unknown 14 | ; monthly expenses 15 | 2000-01-01 open Expenses:Groceries 16 | 2000-01-01 open Expenses:Medical 17 | 2000-01-01 open Expenses:EatingOut 18 | 2000-01-01 open Expenses:Clothing 19 | 2000-01-01 open Expenses:Utilities:Electricity 20 | 2000-01-01 open Expenses:Utilities:Gas 21 | 2000-01-01 open Expenses:Utilities:Internet 22 | 23 | ; *** INCOME *** 24 | 2000-01-01 open Income:Salary:Company1 25 | 2000-01-01 open Income:Salary:Company2 26 | 2000-01-01 open Income:Sale:Ebay 27 | 28 | 29 | ; *** LIABILITIES *** 30 | 2000-01-01 open Liabilities:Master:Alice 31 | -------------------------------------------------------------------------------- /tutorial/assets/csv.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/luciano-fiandesio/beanborg/a2e3ddf5dfea1f23cf51e5bdaf930d5495616469/tutorial/assets/csv.png -------------------------------------------------------------------------------- /tutorial/main.ldg: -------------------------------------------------------------------------------- 1 | option "title" "Shared Ledger" 2 | option "operating_currency" "EUR" 3 | 4 | include "accounts.ldg" 5 | ;include "budget.ldg" 6 | 7 | include "UK0000001444555.ldg" 8 | ;include "BANK200000.ldg" 9 | -------------------------------------------------------------------------------- /tutorial/test-data/eagle-bank-statement.csv: -------------------------------------------------------------------------------- 1 | OPEN BOOK;VALUE DATE;TX TYPE;BOOKING TEXT;AMOUNT;CURRENCY;ACCOUNT;IBAN 2 | 04.11.2020;04.11.2020;Direct Debit;"Fresh Food";-21,30;EUR;0000001;UK0000001444555 3 | 04.11.2020;03.11.2020;Credit;"Best Company";1000,00;EUR;0000001;UK0000001444555 4 | 01.11.2020;01.11.2020;Direct Debit;"Doctor Bill";-540,10;EUR;0000001;UK0000001444555 5 | 01.12.2020;01.11.2020;Cash Withdrawal;Bank Of Mars;-100;EUR;0000001;UK0000001444555 6 | --------------------------------------------------------------------------------