├── tests
    ├── __init__.py
    ├── grammar
    │   ├── __init__.py
    │   ├── directives
    │   │   ├── __init__.py
    │   │   ├── test_option.py
    │   │   ├── test_note.py
    │   │   ├── test_include.py
    │   │   ├── test_plugin.py
    │   │   ├── test_document.py
    │   │   ├── test_event.py
    │   │   ├── test_price.py
    │   │   ├── test_close.py
    │   │   ├── test_commodity.py
    │   │   ├── test_pad.py
    │   │   ├── test_balance.py
    │   │   ├── test_open.py
    │   │   ├── test_transaction.py
    │   │   └── test_posting.py
    │   ├── terminals
    │   │   ├── __init__.py
    │   │   ├── test_comment.py
    │   │   ├── test_section_header.py
    │   │   ├── test_tag.py
    │   │   ├── test_link.py
    │   │   ├── test_date.py
    │   │   ├── test_currency.py
    │   │   ├── test_account.py
    │   │   ├── test_amount.py
    │   │   └── test_numbers.py
    │   ├── test_cost.py
    │   ├── conftest.py
    │   ├── test_metadata.py
    │   └── test_beancount.py
    ├── fixtures
    │   ├── traverse
    │   │   ├── circular
    │   │   │   ├── other.bean
    │   │   │   ├── file0.bean
    │   │   │   ├── file1.bean
    │   │   │   └── main.bean
    │   │   ├── glob
    │   │   │   ├── a
    │   │   │   │   ├── b
    │   │   │   │   │   ├── file1.bean
    │   │   │   │   │   └── file2.bean
    │   │   │   │   └── file0.bean
    │   │   │   └── main.bean
    │   │   ├── nested
    │   │   │   ├── other.bean
    │   │   │   ├── a
    │   │   │   │   ├── b
    │   │   │   │   │   └── file1.bean
    │   │   │   │   └── file0.bean
    │   │   │   └── main.bean
    │   │   ├── secret
    │   │   │   └── main.bean
    │   │   └── contain_root_dir
    │   │   │   └── main.bean
    │   ├── includes.bean
    │   └── simple.bean
    ├── conftest.py
    ├── test_helpers.py
    └── test_parser.py
├── beancount_parser
    ├── __init__.py
    ├── grammar
    │   ├── comment.lark
    │   ├── boolean.lark
    │   ├── section_header.lark
    │   ├── currency.lark
    │   ├── numbers.lark
    │   ├── flag.lark
    │   ├── link.lark
    │   ├── tag.lark
    │   ├── account.lark
    │   ├── escaped_string.lark
    │   ├── date.lark
    │   └── beancount.lark
    ├── data_types.py
    ├── helpers.py
    └── parser.py
├── .pre-commit-config.yaml
├── pyproject.toml
├── LICENSE
├── .circleci
    └── config.yml
├── .gitignore
├── README.md
└── poetry.lock


/tests/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/beancount_parser/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/tests/grammar/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/tests/grammar/directives/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/tests/grammar/terminals/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/tests/fixtures/traverse/circular/other.bean:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/tests/fixtures/traverse/glob/a/b/file1.bean:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/tests/fixtures/traverse/glob/a/b/file2.bean:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/tests/fixtures/traverse/nested/other.bean:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/tests/fixtures/traverse/secret/main.bean:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/tests/fixtures/traverse/nested/a/b/file1.bean:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/beancount_parser/grammar/comment.lark:
--------------------------------------------------------------------------------
1 | COMMENT: ";" /[^\n]*/
2 | 


--------------------------------------------------------------------------------
/beancount_parser/grammar/boolean.lark:
--------------------------------------------------------------------------------
1 | BOOLEAN: "TRUE" | "FALSE"
2 | 


--------------------------------------------------------------------------------
/tests/fixtures/traverse/circular/file0.bean:
--------------------------------------------------------------------------------
1 | include "file1.bean"
2 | 


--------------------------------------------------------------------------------
/tests/fixtures/traverse/circular/file1.bean:
--------------------------------------------------------------------------------
1 | include "main.bean"
2 | 


--------------------------------------------------------------------------------
/tests/fixtures/traverse/circular/main.bean:
--------------------------------------------------------------------------------
1 | include "file0.bean"
2 | 


--------------------------------------------------------------------------------
/tests/fixtures/traverse/glob/a/file0.bean:
--------------------------------------------------------------------------------
1 | include "b/*.bean"
2 | 


--------------------------------------------------------------------------------
/tests/fixtures/traverse/glob/main.bean:
--------------------------------------------------------------------------------
1 | include "*/file0.bean"
2 | 


--------------------------------------------------------------------------------
/tests/fixtures/traverse/nested/main.bean:
--------------------------------------------------------------------------------
1 | include "a/file0.bean"
2 | 


--------------------------------------------------------------------------------
/tests/fixtures/traverse/nested/a/file0.bean:
--------------------------------------------------------------------------------
1 | include "b/file1.bean"
2 | 


--------------------------------------------------------------------------------
/beancount_parser/grammar/section_header.lark:
--------------------------------------------------------------------------------
1 | SECTION_HEADER: "*" /[^\n]*/
2 | 


--------------------------------------------------------------------------------
/tests/fixtures/traverse/contain_root_dir/main.bean:
--------------------------------------------------------------------------------
1 | include "../secret/main.bean"
2 | 


--------------------------------------------------------------------------------
/tests/fixtures/includes.bean:
--------------------------------------------------------------------------------
1 | ; comment0
2 | include "foo.bean"
3 | 
4 | 2024-05-05 open Assets:Cash
5 | 
6 | ; comment1
7 | include "bar.bean"
8 | include "2024/*.bean"
9 | 


--------------------------------------------------------------------------------
/beancount_parser/grammar/currency.lark:
--------------------------------------------------------------------------------
1 | CURRENCY_CHARS: UCASE_LETTER | DIGIT | /['._-]/
2 | CURRENCY: UCASE_LETTER [CURRENCY_CHARS* (UCASE_LETTER | DIGIT)]
3 | 
4 | %import common.DIGIT
5 | %import common.UCASE_LETTER
6 | 


--------------------------------------------------------------------------------
/beancount_parser/grammar/numbers.lark:
--------------------------------------------------------------------------------
1 | COMMA_SEP_NUMBER: DIGIT~1..3 ("," DIGIT~3)+ ["." DIGIT+]
2 | NUMBER: INT | DECIMAL | COMMA_SEP_NUMBER
3 | 
4 | %import common.INT
5 | %import common.DIGIT
6 | %import common.DECIMAL
7 | 


--------------------------------------------------------------------------------
/beancount_parser/grammar/flag.lark:
--------------------------------------------------------------------------------
1 | // Source: https://github.com/beancount/beancount/blob/a6352005a466bf3377d7caf5b1570d3bd08207fe/beancount/parser/lexer.l#L135
2 | FLAG: "*" | "!" | "&" | "#" | "?" | "%" | "P" | "S" | "T" | "C" | "U" | "R" | "M"
3 | 


--------------------------------------------------------------------------------
/beancount_parser/grammar/link.lark:
--------------------------------------------------------------------------------
1 | // Source: https://github.com/beancount/beancount/blob/a6352005a466bf3377d7caf5b1570d3bd08207fe/beancount/parser/lexer.l#L243
2 | 
3 | LINK: "^" (LETTER | DIGIT | "-" | "_" | "." | "/")+
4 | 
5 | %import common.LETTER
6 | %import common.DIGIT
7 | 


--------------------------------------------------------------------------------
/tests/conftest.py:
--------------------------------------------------------------------------------
 1 | import pathlib
 2 | 
 3 | import pytest
 4 | 
 5 | TEST_PACKAGE_FOLDER = pathlib.Path(__file__).parent
 6 | FIXTURE_FOLDER = TEST_PACKAGE_FOLDER / "fixtures"
 7 | 
 8 | 
 9 | @pytest.fixture
10 | def fixtures_folder() -> pathlib.Path:
11 |     return FIXTURE_FOLDER
12 | 


--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
 1 | repos:
 2 |   - repo: https://github.com/psf/black
 3 |     rev: 24.3.0
 4 |     hooks:
 5 |       - id: black
 6 |         language_version: python3.11
 7 |   - repo: https://github.com/asottile/reorder_python_imports
 8 |     rev: v3.12.0
 9 |     hooks:
10 |     -   id: reorder-python-imports
11 | 


--------------------------------------------------------------------------------
/beancount_parser/grammar/tag.lark:
--------------------------------------------------------------------------------
1 | // Source: https://github.com/beancount/beancount/blob/a6352005a466bf3377d7caf5b1570d3bd08207fe/beancount/parser/lexer.l#L238
2 | 
3 | TAG: "#" (LETTER | DIGIT | "-" | "_" | "." | "/")+
4 | TAGS: TAG [_WS_INLINE+ TAG]
5 | 
6 | %import common.LETTER
7 | %import common.DIGIT
8 | %import common.WS_INLINE -> _WS_INLINE
9 | 


--------------------------------------------------------------------------------
/beancount_parser/grammar/account.lark:
--------------------------------------------------------------------------------
1 | // See: https://github.com/beancount/beancount/blob/e1716b492c7619682a6d7c33c4873aa41954af1e/beancount/parser/lexer.l#L129-L130
2 | 
3 | NON_ASCII: /[^\x00-\x7f]/
4 | 
5 | ACCOUNT_TYPE: (/[A-Z]/ | NON_ASCII) (/[A-Za-z0-9\-]/ | NON_ASCII)*
6 | ACCOUNT_NAME: (/[A-Z0-9]/ | NON_ASCII) (/[A-Za-z0-9\-]/ | NON_ASCII)*
7 | ACCOUNT: ACCOUNT_TYPE (":" ACCOUNT_NAME)+
8 | 


--------------------------------------------------------------------------------
/beancount_parser/grammar/escaped_string.lark:
--------------------------------------------------------------------------------
1 | // The ESCAPED_STRING from official common.lark file doesn't support multi-line,
2 | // we change the inner string a bit to make it support multi-line
3 | // ref: https://github.com/lark-parser/lark/blob/8a77e42c83a034cf19e86f755013f8a432f36c79/lark/grammars/common.lark#L26-L29
4 | _STRING_INNER: /(.|\r?\n)*?/
5 | _STRING_ESC_INNER: _STRING_INNER /(?<!\\)(\\\\)*?/
6 | 
7 | ESCAPED_STRING : "\"" _STRING_ESC_INNER "\""
8 | 


--------------------------------------------------------------------------------
/beancount_parser/grammar/date.lark:
--------------------------------------------------------------------------------
 1 | // Source: https://github.com/beancount/beancount/blob/a6352005a466bf3377d7caf5b1570d3bd08207fe/beancount/parser/lexer.l#L212
 2 | 
 3 | FOUR_DIGIT: DIGIT DIGIT DIGIT DIGIT+
 4 | TWO_DIGIT: DIGIT+
 5 | YEAR: FOUR_DIGIT
 6 | MONTH: TWO_DIGIT
 7 | DAY: TWO_DIGIT
 8 | // Date is given higher priority as if NUMBER or number_expr is in the list of
 9 | // candidate for a rule
10 | DATE.10: YEAR ("-" | "/") MONTH ("-" | "/") DAY
11 | 
12 | %import common.DIGIT
13 | 


--------------------------------------------------------------------------------
/tests/grammar/terminals/test_comment.py:
--------------------------------------------------------------------------------
 1 | import typing
 2 | 
 3 | import pytest
 4 | from lark import Lark
 5 | 
 6 | 
 7 | @pytest.fixture
 8 | def comment_parser(make_parser: typing.Callable) -> Lark:
 9 |     return make_parser(module="comment", rule="COMMENT")
10 | 
11 | 
12 | @pytest.mark.parametrize(
13 |     "text",
14 |     [
15 |         ";",
16 |         ";;",
17 |         "; whatever",
18 |         ";; whatever",
19 |         ";     ",
20 |     ],
21 | )
22 | def test_parse_comment(comment_parser: Lark, text: str):
23 |     comment_parser.parse(text)
24 | 


--------------------------------------------------------------------------------
/tests/grammar/terminals/test_section_header.py:
--------------------------------------------------------------------------------
 1 | import typing
 2 | 
 3 | import pytest
 4 | from lark import Lark
 5 | 
 6 | 
 7 | @pytest.fixture
 8 | def section_header_parser(make_parser: typing.Callable) -> Lark:
 9 |     return make_parser(module="section_header", rule="SECTION_HEADER")
10 | 
11 | 
12 | @pytest.mark.parametrize(
13 |     "text",
14 |     [
15 |         "*",
16 |         "**",
17 |         "* whatever",
18 |         "*whatever",
19 |         "*     ",
20 |     ],
21 | )
22 | def test_parse_section_header(section_header_parser: Lark, text: str):
23 |     section_header_parser.parse(text)
24 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [tool.poetry]
 2 | name = "beancount-parser"
 3 | version = "1.2.3"
 4 | description = "Standalone Lark based Beancount syntax parser (not relying on Beancount library), MIT license"
 5 | authors = ["Fang-Pen Lin <fangpen@launchplatform.com>"]
 6 | license = "MIT"
 7 | repository = "https://github.com/LaunchPlatform/beancount-parser"
 8 | readme = "README.md"
 9 | 
10 | [tool.poetry.dependencies]
11 | python = "^3.9"
12 | lark = "^1.1.2"
13 | 
14 | [tool.poetry.dev-dependencies]
15 | pytest = "^7.1.1"
16 | 
17 | [build-system]
18 | requires = ["poetry-core>=1.0.0"]
19 | build-backend = "poetry.core.masonry.api"
20 | 


--------------------------------------------------------------------------------
/tests/test_helpers.py:
--------------------------------------------------------------------------------
 1 | import pathlib
 2 | 
 3 | import pytest
 4 | from lark import Lark
 5 | 
 6 | from beancount_parser.helpers import collect_entries
 7 | from beancount_parser.parser import make_parser
 8 | 
 9 | 
10 | @pytest.fixture
11 | def parser() -> Lark:
12 |     return make_parser()
13 | 
14 | 
15 | def test_collect_entries(parser: Lark, fixtures_folder: pathlib.Path):
16 |     bean_file = fixtures_folder / "simple.bean"
17 |     tree = parser.parse(bean_file.read_text())
18 |     entries, tail_comments = collect_entries(tree)
19 |     # TODO: assert more stuff here
20 |     assert len(entries) == 12
21 |     assert len(tail_comments) == 1
22 | 


--------------------------------------------------------------------------------
/tests/grammar/directives/test_option.py:
--------------------------------------------------------------------------------
 1 | import typing
 2 | 
 3 | import pytest
 4 | from lark import Lark
 5 | from lark.exceptions import UnexpectedInput
 6 | 
 7 | 
 8 | @pytest.fixture
 9 | def option_parser(make_parser: typing.Callable) -> Lark:
10 |     return make_parser(module="beancount", rule="option", ignore_spaces=True)
11 | 
12 | 
13 | @pytest.mark.parametrize(
14 |     "text",
15 |     [
16 |         'option "key" "value"',
17 |     ],
18 | )
19 | def test_parse_option(option_parser, text: str):
20 |     option_parser.parse(text)
21 | 
22 | 
23 | @pytest.mark.parametrize(
24 |     "text",
25 |     [
26 |         'option "key"',
27 |         'option key "value"',
28 |         "option 'key' 'value'",
29 |     ],
30 | )
31 | def test_parse_bad_option(option_parser, text: str):
32 |     with pytest.raises(UnexpectedInput):
33 |         option_parser.parse(text)
34 | 


--------------------------------------------------------------------------------
/tests/grammar/terminals/test_tag.py:
--------------------------------------------------------------------------------
 1 | import typing
 2 | 
 3 | import pytest
 4 | from lark import Lark
 5 | from lark.exceptions import UnexpectedCharacters
 6 | 
 7 | 
 8 | @pytest.fixture
 9 | def tag_parser(make_parser: typing.Callable) -> Lark:
10 |     return make_parser(module="tag", rule="TAG")
11 | 
12 | 
13 | @pytest.mark.parametrize(
14 |     "text",
15 |     [
16 |         "#abc",
17 |         "#a123",
18 |         "#this-is-fine",
19 |         "#this.is.also.fine",
20 |         "#so_is_this",
21 |         "#and/this"
22 |     ],
23 | )
24 | def test_parse_tag(tag_parser: Lark, text: str):
25 |     tag_parser.parse(text)
26 | 
27 | 
28 | @pytest.mark.parametrize(
29 |     "text",
30 |     ["@123", "#", "abc"],
31 | )
32 | def test_parse_bad_tag(tag_parser: Lark, text: str):
33 |     with pytest.raises(UnexpectedCharacters):
34 |         tag_parser.parse(text)
35 | 


--------------------------------------------------------------------------------
/tests/grammar/directives/test_note.py:
--------------------------------------------------------------------------------
 1 | import typing
 2 | 
 3 | import pytest
 4 | from lark import Lark
 5 | from lark.exceptions import UnexpectedInput
 6 | 
 7 | 
 8 | @pytest.fixture
 9 | def note_parser(make_parser: typing.Callable) -> Lark:
10 |     return make_parser(module="beancount", rule="note", ignore_spaces=True)
11 | 
12 | 
13 | @pytest.mark.parametrize(
14 |     "text",
15 |     [
16 |         '2022-03-31 note Assets:Foo "this account looks good"',
17 |     ],
18 | )
19 | def test_parse_note(note_parser: Lark, text: str):
20 |     note_parser.parse(text)
21 | 
22 | 
23 | @pytest.mark.parametrize(
24 |     "text",
25 |     [
26 |         'note Assets:Foo "this account looks good"',
27 |         "2022-03-31 note Assets:Foo",
28 |     ],
29 | )
30 | def test_parse_bad_note(note_parser: Lark, text: str):
31 |     with pytest.raises(UnexpectedInput):
32 |         note_parser.parse(text)
33 | 


--------------------------------------------------------------------------------
/tests/grammar/terminals/test_link.py:
--------------------------------------------------------------------------------
 1 | import typing
 2 | 
 3 | import pytest
 4 | from lark import Lark
 5 | from lark.exceptions import UnexpectedCharacters
 6 | 
 7 | 
 8 | @pytest.fixture
 9 | def link_parser(make_parser: typing.Callable) -> Lark:
10 |     return make_parser(module="link", rule="LINK", ignore_spaces=True)
11 | 
12 | 
13 | @pytest.mark.parametrize(
14 |     "text",
15 |     [
16 |         "^abc",
17 |         "^a123",
18 |         "^this-is-fine",
19 |         "^this.is.also.fine",
20 |         "^so_is_this",
21 |         "^and/this"
22 |     ],
23 | )
24 | def test_parse_link(link_parser: Lark, text: str):
25 |     link_parser.parse(text)
26 | 
27 | 
28 | @pytest.mark.parametrize(
29 |     "text",
30 |     ["@123", "#", "abc"],
31 | )
32 | def test_parse_link_tag(link_parser: Lark, text: str):
33 |     with pytest.raises(UnexpectedCharacters):
34 |         link_parser.parse(text)
35 | 


--------------------------------------------------------------------------------
/tests/grammar/directives/test_include.py:
--------------------------------------------------------------------------------
 1 | import typing
 2 | 
 3 | import pytest
 4 | from lark import Lark
 5 | from lark.exceptions import UnexpectedInput
 6 | 
 7 | 
 8 | @pytest.fixture
 9 | def include_parser(make_parser: typing.Callable) -> Lark:
10 |     return make_parser(module="beancount", rule="include", ignore_spaces=True)
11 | 
12 | 
13 | @pytest.mark.parametrize(
14 |     "text",
15 |     [
16 |         'include "/path/to/file.bean"',
17 |     ],
18 | )
19 | def test_parse_include(include_parser: Lark, text: str):
20 |     include_parser.parse(text)
21 | 
22 | 
23 | @pytest.mark.parametrize(
24 |     "text",
25 |     [
26 |         'INCLUDE "/path/to/file.bean"',
27 |         "include '/path/to/file.bean'" "include /path/to/file.bean",
28 |     ],
29 | )
30 | def test_parse_bad_include(include_parser: Lark, text: str):
31 |     with pytest.raises(UnexpectedInput):
32 |         include_parser.parse(text)
33 | 


--------------------------------------------------------------------------------
/tests/grammar/directives/test_plugin.py:
--------------------------------------------------------------------------------
 1 | import typing
 2 | 
 3 | import pytest
 4 | from lark import Lark
 5 | from lark.exceptions import UnexpectedInput
 6 | 
 7 | 
 8 | @pytest.fixture
 9 | def plugin_parser(make_parser: typing.Callable) -> Lark:
10 |     return make_parser(module="beancount", rule="plugin", ignore_spaces=True)
11 | 
12 | 
13 | @pytest.mark.parametrize(
14 |     "text",
15 |     [
16 |         'plugin "beancount.plugin"',
17 |         'plugin "beancount.plugin" "config"',
18 |     ],
19 | )
20 | def test_parse_plugin(plugin_parser, text: str):
21 |     plugin_parser.parse(text)
22 | 
23 | 
24 | @pytest.mark.parametrize(
25 |     "text",
26 |     [
27 |         'plugin beancount.plugin "value"',
28 |         "plugin 'beancount.plugin' 'value'",
29 |     ],
30 | )
31 | def test_parse_bad_plugin(plugin_parser, text: str):
32 |     with pytest.raises(UnexpectedInput):
33 |         plugin_parser.parse(text)
34 | 


--------------------------------------------------------------------------------
/tests/grammar/directives/test_document.py:
--------------------------------------------------------------------------------
 1 | import typing
 2 | 
 3 | import pytest
 4 | from lark import Lark
 5 | from lark.exceptions import UnexpectedInput
 6 | 
 7 | 
 8 | @pytest.fixture
 9 | def document_parser(make_parser: typing.Callable) -> Lark:
10 |     return make_parser(module="beancount", rule="document", ignore_spaces=True)
11 | 
12 | 
13 | @pytest.mark.parametrize(
14 |     "text",
15 |     [
16 |         '2022-03-31 document Assets:Bank "/path/to/the/file.pdf"',
17 |     ],
18 | )
19 | def test_parse_document(document_parser: Lark, text: str):
20 |     document_parser.parse(text)
21 | 
22 | 
23 | @pytest.mark.parametrize(
24 |     "text",
25 |     [
26 |         'document Assets:Bank "this account looks good"',
27 |         "2022-03-31 document Assets:Bank",
28 |     ],
29 | )
30 | def test_parse_bad_document(document_parser: Lark, text: str):
31 |     with pytest.raises(UnexpectedInput):
32 |         document_parser.parse(text)
33 | 


--------------------------------------------------------------------------------
/tests/grammar/directives/test_event.py:
--------------------------------------------------------------------------------
 1 | import typing
 2 | 
 3 | import pytest
 4 | from lark import Lark
 5 | from lark.exceptions import UnexpectedInput
 6 | 
 7 | 
 8 | @pytest.fixture
 9 | def event_parser(make_parser: typing.Callable) -> Lark:
10 |     return make_parser(module="beancount", rule="event", ignore_spaces=True)
11 | 
12 | 
13 | @pytest.mark.parametrize(
14 |     "text",
15 |     [
16 |         '2022-03-31 event "employer" "Launch Platform LLC"',
17 |         '2022-03-31 event "location" "San Francisco"',
18 |     ],
19 | )
20 | def test_parse_event(event_parser: Lark, text: str):
21 |     event_parser.parse(text)
22 | 
23 | 
24 | @pytest.mark.parametrize(
25 |     "text",
26 |     [
27 |         'event "foo" "this account looks good"',
28 |         '2022-03-31 event "foo"',
29 |     ],
30 | )
31 | def test_parse_bad_event(event_parser: Lark, text: str):
32 |     with pytest.raises(UnexpectedInput):
33 |         event_parser.parse(text)
34 | 


--------------------------------------------------------------------------------
/tests/grammar/directives/test_price.py:
--------------------------------------------------------------------------------
 1 | import typing
 2 | 
 3 | import pytest
 4 | from lark import Lark
 5 | from lark.exceptions import UnexpectedInput
 6 | 
 7 | 
 8 | @pytest.fixture
 9 | def price_parser(make_parser: typing.Callable) -> Lark:
10 |     return make_parser(module="beancount", rule="price", ignore_spaces=True)
11 | 
12 | 
13 | @pytest.mark.parametrize(
14 |     "text",
15 |     [
16 |         "2022-03-31 price BTC 12.34 USD",
17 |     ],
18 | )
19 | def test_parse_price(price_parser: Lark, text: str):
20 |     price_parser.parse(text)
21 | 
22 | 
23 | @pytest.mark.parametrize(
24 |     "text",
25 |     [
26 |         "price USD 12.34 USD",
27 |         "2022-03-1 price Assets:Bank 12.34 USD",
28 |         "2022-03 price BTC 12.34 USD",
29 |         "2022-03-01 price BTC 12.34",
30 |     ],
31 | )
32 | def test_parse_bad_price(price_parser: Lark, text: str):
33 |     with pytest.raises(UnexpectedInput):
34 |         price_parser.parse(text)
35 | 


--------------------------------------------------------------------------------
/tests/grammar/terminals/test_date.py:
--------------------------------------------------------------------------------
 1 | import typing
 2 | 
 3 | import pytest
 4 | from lark import Lark
 5 | from lark.exceptions import UnexpectedCharacters
 6 | 
 7 | 
 8 | @pytest.fixture
 9 | def date_parser(make_parser: typing.Callable) -> Lark:
10 |     return make_parser(module="date", rule="DATE")
11 | 
12 | 
13 | @pytest.mark.parametrize(
14 |     "text",
15 |     [
16 |         "2022-03-31",
17 |         "1970-01-01",
18 |         "2022/03/31",
19 |         "1970/01/01",
20 |         "2022-03/31",
21 |         "20222-03-31",
22 |         "1970-1-01",
23 |         "1970-1-1",
24 |     ],
25 | )
26 | def test_parse_date(date_parser: Lark, text: str):
27 |     date_parser.parse(text)
28 | 
29 | 
30 | @pytest.mark.parametrize(
31 |     "text",
32 |     [
33 |         "2022--01"
34 |         "foobar",
35 |     ],
36 | )
37 | def test_parse_invalid_date(date_parser: Lark, text: str):
38 |     with pytest.raises(UnexpectedCharacters):
39 |         date_parser.parse(text)
40 | 


--------------------------------------------------------------------------------
/tests/grammar/directives/test_close.py:
--------------------------------------------------------------------------------
 1 | import typing
 2 | 
 3 | import pytest
 4 | from lark import Lark
 5 | from lark.exceptions import UnexpectedInput
 6 | 
 7 | 
 8 | @pytest.fixture
 9 | def close_parser(make_parser: typing.Callable) -> Lark:
10 |     return make_parser(module="beancount", rule="close", ignore_spaces=True)
11 | 
12 | 
13 | @pytest.mark.parametrize(
14 |     "text",
15 |     [
16 |         "2022-03-31 close Assets:Bank",
17 |     ],
18 | )
19 | def test_parse_close(close_parser: Lark, text: str):
20 |     close_parser.parse(text)
21 | 
22 | 
23 | @pytest.mark.parametrize(
24 |     "text",
25 |     [
26 |         "close Assets:Bank",
27 |         "2022-03-31 close Assets",
28 |         "2022-03 close Assets:Bank",
29 |         "2022-03-31 close Assets:Bank USD",
30 |         "2022-03-31 close",
31 |     ],
32 | )
33 | def test_parse_bad_close(close_parser: Lark, text: str):
34 |     with pytest.raises(UnexpectedInput):
35 |         close_parser.parse(text)
36 | 


--------------------------------------------------------------------------------
/tests/grammar/test_cost.py:
--------------------------------------------------------------------------------
 1 | import typing
 2 | 
 3 | import pytest
 4 | from lark import Lark
 5 | from lark.exceptions import UnexpectedInput
 6 | 
 7 | 
 8 | @pytest.fixture
 9 | def cost_parser(make_parser: typing.Callable) -> Lark:
10 |     import logging
11 |     from lark import logger
12 | 
13 |     logger.setLevel(level=logging.DEBUG)
14 |     return make_parser(module="beancount", rule="cost", ignore_spaces=True)
15 | 
16 | 
17 | @pytest.mark.parametrize(
18 |     "text",
19 |     [
20 |         "{ 12.34 USD }",
21 |         "{{ 12.34 USD }}",
22 |         "{ 12.34 # 56.78 USD }",
23 |         "{ 12.34 USD, 2024-01-01 }",
24 |     ],
25 | )
26 | def test_parse_cost(cost_parser: Lark, text: str):
27 |     cost_parser.parse(text)
28 | 
29 | 
30 | @pytest.mark.parametrize(
31 |     "text",
32 |     [
33 |         "12.34 USD",
34 |     ],
35 | )
36 | def test_parse_bad_cost(cost_parser: Lark, text: str):
37 |     with pytest.raises(UnexpectedInput):
38 |         cost_parser.parse(text)
39 | 


--------------------------------------------------------------------------------
/tests/grammar/directives/test_commodity.py:
--------------------------------------------------------------------------------
 1 | import typing
 2 | 
 3 | import pytest
 4 | from lark import Lark
 5 | from lark.exceptions import UnexpectedInput
 6 | 
 7 | 
 8 | @pytest.fixture
 9 | def commodity_parser(make_parser: typing.Callable) -> Lark:
10 |     return make_parser(module="beancount", rule="commodity", ignore_spaces=True)
11 | 
12 | 
13 | @pytest.mark.parametrize(
14 |     "text",
15 |     [
16 |         "2022-03-31 commodity USD",
17 |         "2022-03-31 commodity BTC",
18 |     ],
19 | )
20 | def test_parse_commodity(commodity_parser: Lark, text: str):
21 |     commodity_parser.parse(text)
22 | 
23 | 
24 | @pytest.mark.parametrize(
25 |     "text",
26 |     [
27 |         "commodity Assets:Bank",
28 |         "2022-03-1 commodity Assets:Bank",
29 |         "2022-03 commodity USD",
30 |         "2022-03-31 commodity Assets:Bank ",
31 |     ],
32 | )
33 | def test_parse_bad_commodity(commodity_parser: Lark, text: str):
34 |     with pytest.raises(UnexpectedInput):
35 |         commodity_parser.parse(text)
36 | 


--------------------------------------------------------------------------------
/tests/grammar/directives/test_pad.py:
--------------------------------------------------------------------------------
 1 | import typing
 2 | 
 3 | import pytest
 4 | from lark import Lark
 5 | from lark.exceptions import UnexpectedInput
 6 | 
 7 | 
 8 | @pytest.fixture
 9 | def pad_parser(make_parser: typing.Callable) -> Lark:
10 |     return make_parser(module="beancount", rule="pad", ignore_spaces=True)
11 | 
12 | 
13 | @pytest.mark.parametrize(
14 |     "text",
15 |     [
16 |         "2022-03-31 pad Assets:Bank Equity:Opening-Balances",
17 |     ],
18 | )
19 | def test_parse_pad(pad_parser: Lark, text: str):
20 |     pad_parser.parse(text)
21 | 
22 | 
23 | @pytest.mark.parametrize(
24 |     "text",
25 |     [
26 |         "pad Assets:Bank",
27 |         "2022-03-1 pad Assets:Bank",
28 |         "2022-03 pad Assets:Bank USD",
29 |         "2022-03-31 pad Assets:Bank 123",
30 |         '2022-03-31 pad USD,BTC "STRICT"',
31 |         '2022-03-31 pad Foobar USD,BTC "NONE"',
32 |     ],
33 | )
34 | def test_parse_bad_pad(pad_parser: Lark, text: str):
35 |     with pytest.raises(UnexpectedInput):
36 |         pad_parser.parse(text)
37 | 


--------------------------------------------------------------------------------
/tests/grammar/conftest.py:
--------------------------------------------------------------------------------
 1 | import pathlib
 2 | import typing
 3 | from textwrap import dedent
 4 | 
 5 | import pytest
 6 | from lark import Lark
 7 | 
 8 | from beancount_parser.parser import GRAMMAR_FOLDER
 9 | 
10 | 
11 | @pytest.fixture
12 | def grammar_folder() -> pathlib.Path:
13 |     return GRAMMAR_FOLDER
14 | 
15 | 
16 | @pytest.fixture
17 | def make_parser(grammar_folder: pathlib.Path) -> typing.Callable:
18 |     def _make_parser(module: str, rule: str, ignore_spaces: bool = False):
19 |         ignore_statement = ""
20 |         if ignore_spaces:
21 |             ignore_statement = "\n".join(
22 |                 ["%import common.WS_INLINE", "%ignore WS_INLINE"]
23 |             )
24 |         return Lark(
25 |             dedent(
26 |                 f"""\
27 |         start: {rule}
28 |         %import .{module}.{rule}
29 |         {ignore_statement}
30 |         """
31 |             ),
32 |             import_paths=[grammar_folder],
33 |             parser="lalr",
34 |             debug=True,
35 |         )
36 | 
37 |     return _make_parser
38 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2022 Launch Platform LLC
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/tests/grammar/terminals/test_currency.py:
--------------------------------------------------------------------------------
 1 | import typing
 2 | 
 3 | import pytest
 4 | from lark import Lark
 5 | from lark.exceptions import UnexpectedCharacters
 6 | 
 7 | 
 8 | @pytest.fixture
 9 | def currency_parser(make_parser: typing.Callable) -> Lark:
10 |     return make_parser(module="currency", rule="CURRENCY", ignore_spaces=True)
11 | 
12 | 
13 | @pytest.mark.parametrize(
14 |     "text",
15 |     [
16 |         "A",
17 |         "AB",
18 |         "USD",
19 |         "ABC",
20 |         "A2BC",
21 |         "A2-3BC",
22 |         "A.B",
23 |         "A_B",
24 |         "A-B",
25 |         "A-B-C",
26 |         "INF879O01027",
27 |         "CONTRACT_HOURS",
28 |     ],
29 | )
30 | def test_parse_currency(currency_parser: Lark, text: str):
31 |     currency_parser.parse(text)
32 | 
33 | 
34 | @pytest.mark.parametrize(
35 |     "text",
36 |     [
37 |         "a",
38 |         "AbC",
39 |         "ABc",
40 |         "@",
41 |         "A@B",
42 |         "INF879O01027-",
43 |         "INF879O01027_",
44 |     ],
45 | )
46 | def test_parse_bad_currency(currency_parser: Lark, text: str):
47 |     with pytest.raises(UnexpectedCharacters):
48 |         currency_parser.parse(text)
49 | 


--------------------------------------------------------------------------------
/tests/fixtures/simple.bean:
--------------------------------------------------------------------------------
 1 | ;; -*- mode: org; mode: beancount; -*-
 2 | ;; this is my personal book
 3 | 
 4 | include "accounts.bean"
 5 | 
 6 | plugin "beancount.plugin"
 7 | 
 8 | option "foo" "bar"
 9 | 
10 | 1970-01-01 commodity USD
11 | 
12 | 1970-01-01 open Assets:Cash USD ; comment for Assets:Cash
13 | 1970-01-01 open Assets:Bank USD ; comment for Assets:Bank
14 | 1970-01-02 open Expenses:Food USD ; comment for Expenses:Food
15 | 1970-01-03 open Expenses:ServiceFee USD
16 | 
17 | 1970-01-01 custom "string val" 123.45 USD TRUE FALSE 2022-04-01 Assets:Bank
18 | 
19 | 2022-04-01 balance Assets:Cash                    -12,345,678.0 USD
20 | 
21 | 2022-04-02 * "Save money into bank" #hash-1 ; comment for save money into bank
22 |   Assets:Cash                                     -12,345,678.0 USD
23 |   Assets:Bank                                      10,000,000.0 USD
24 |   Expenses:ServiceFee
25 | 
26 | ; buy dinner comment
27 | 2022-04-03 * "In-N-Out Burger" "Buy dinner" ^link #hash-0 #hash-1
28 |   document: "mydoc.pdf"
29 |   Assets:Cash                                             -20.0 USD
30 |     invoice: "invoice.pdf"
31 |   Expenses:Food                                            20.0 USD
32 | 
33 | ; hello


--------------------------------------------------------------------------------
/tests/grammar/directives/test_balance.py:
--------------------------------------------------------------------------------
 1 | import typing
 2 | 
 3 | import pytest
 4 | from lark import Lark
 5 | from lark.exceptions import UnexpectedInput
 6 | 
 7 | 
 8 | @pytest.fixture
 9 | def balance_parser(make_parser: typing.Callable) -> Lark:
10 |     return make_parser(module="beancount", rule="balance", ignore_spaces=True)
11 | 
12 | 
13 | @pytest.mark.parametrize(
14 |     "text",
15 |     [
16 |         "2022-03-31 balance Assets:Bank 12.34 + 23.45 USD",
17 |         "2022-03-31 balance Assets:Bank 45.67 BTC",
18 |         "2022-03-31 balance Assets:Bank 12.34 ~ 0.01 USD",
19 |     ],
20 | )
21 | def test_parse_balance(balance_parser: Lark, text: str):
22 |     balance_parser.parse(text)
23 | 
24 | 
25 | @pytest.mark.parametrize(
26 |     "text",
27 |     [
28 |         "balance Assets:Bank",
29 |         "2022-03-31 balance Assets 12.34 USD",
30 |         "2022-03-1 balance Assets:Bank",
31 |         "2022-03 balance Assets:Bank USD",
32 |         "2022-03-31 balance Assets:Bank 123",
33 |         "2022-03-31 balance Assets:Bank 12.34 USD ~ 0.01 USD",
34 |     ],
35 | )
36 | def test_parse_bad_open(balance_parser: Lark, text: str):
37 |     with pytest.raises(UnexpectedInput):
38 |         balance_parser.parse(text)
39 | 


--------------------------------------------------------------------------------
/tests/grammar/directives/test_open.py:
--------------------------------------------------------------------------------
 1 | import typing
 2 | 
 3 | import pytest
 4 | from lark import Lark
 5 | from lark.exceptions import UnexpectedInput
 6 | 
 7 | 
 8 | @pytest.fixture
 9 | def open_parser(make_parser: typing.Callable) -> Lark:
10 |     return make_parser(module="beancount", rule="open", ignore_spaces=True)
11 | 
12 | 
13 | @pytest.mark.parametrize(
14 |     "text",
15 |     [
16 |         "2022-03-31 open Assets:Bank",
17 |         "2022-03-31 open Assets:Bank USD",
18 |         "2022-03-31 open Assets:Bank USD,BTC",
19 |         '2022-03-31 open Assets:Bank USD,BTC "STRICT"',
20 |         '2022-03-31 open Assets:Bank "STRICT"',
21 |         '2022-03-31 open Assets:Bank USD,BTC "NONE"',
22 |         '2022-03-1 open Assets:Bank',
23 |     ],
24 | )
25 | def test_parse_open(open_parser: Lark, text: str):
26 |     open_parser.parse(text)
27 | 
28 | 
29 | @pytest.mark.parametrize(
30 |     "text",
31 |     [
32 |         "open Assets:Bank",
33 |         "2022-03 open Assets:Bank USD",
34 |         "2022-03-31 open Assets",
35 |         "2022-03-31 open Assets:Bank 123",
36 |         '2022-03-31 open USD,BTC "STRICT"',
37 |         '2022-03-31 open assets:bank USD,BTC "NONE"',
38 |     ],
39 | )
40 | def test_parse_bad_open(open_parser: Lark, text: str):
41 |     with pytest.raises(UnexpectedInput):
42 |         open_parser.parse(text)
43 | 


--------------------------------------------------------------------------------
/tests/grammar/directives/test_transaction.py:
--------------------------------------------------------------------------------
 1 | import typing
 2 | 
 3 | import pytest
 4 | from lark import Lark
 5 | from lark.exceptions import UnexpectedInput
 6 | 
 7 | 
 8 | @pytest.fixture
 9 | def transaction_parser(make_parser: typing.Callable) -> Lark:
10 |     return make_parser(module="beancount", rule="txn", ignore_spaces=True)
11 | 
12 | 
13 | @pytest.mark.parametrize(
14 |     "text",
15 |     [
16 |         "1970-01-01",
17 |         '1970-01-01 * "Foobar"',
18 |         '1970-01-01 ! "Foobar"',
19 |         '1970-01-01 ! "\\"Foobar\\""',
20 |         '1970-01-01 ! "Jane Doe" "Foobar"',
21 |         '1970-01-01 ! "Jane Doe" "Foobar" #hash-tag',
22 |         '1970-01-01 ! "Jane Doe" "Foobar" #hash-tag ^link',
23 |         '1970-01-01 ! "Jane Doe" "Foobar" #hash-tag ^link-1 #hash2',
24 |         '1970-01-01 txn "Jane Doe" "Foobar"',
25 |     ],
26 | )
27 | def test_parse_transaction(transaction_parser: Lark, text: str):
28 |     transaction_parser.parse(text)
29 | 
30 | 
31 | @pytest.mark.parametrize(
32 |     "text",
33 |     [
34 |         '1970-01-01 @ "Foobar"',
35 |         "1970-01-01 ! Foobar",
36 |         '1970-01-01 ! "Jane Doe" Foobar',
37 |         '1970-01-01 TXN "Jane Doe" "Foobar"',
38 |         'TXN "Jane Doe" "Foobar"',
39 |     ],
40 | )
41 | def test_parse_bad_transaction(transaction_parser: Lark, text: str):
42 |     with pytest.raises(UnexpectedInput):
43 |         transaction_parser.parse(text)
44 | 


--------------------------------------------------------------------------------
/.circleci/config.yml:
--------------------------------------------------------------------------------
 1 | # Use the latest 2.1 version of CircleCI pipeline process engine.
 2 | # See: https://circleci.com/docs/2.0/configuration-reference
 3 | version: 2.1
 4 | 
 5 | orbs:
 6 |   python: circleci/python@2.1.1
 7 | 
 8 | jobs:
 9 |   test:
10 |     docker:
11 |       - image: cimg/python:3.10.11
12 |     steps:
13 |       - checkout
14 |       - python/install-packages:
15 |           pkg-manager: poetry
16 |       - run:
17 |           name: Run test
18 |           command: poetry run python -m pytest ./tests -svvvv
19 |   build-and-publish:
20 |     docker:
21 |       - image: cimg/python:3.10.11
22 |     steps:
23 |       - checkout
24 |       - python/install-packages:
25 |           pkg-manager: poetry
26 |       - run:
27 |           name: config
28 |           command: |
29 |             poetry config http-basic.pypi "__token__" "${POETRY_PYPI_TOKEN_PYPI}"
30 |       - run:
31 |           name: Build
32 |           command: poetry build
33 |       - run:
34 |           name: Publish
35 |           command: poetry publish
36 | 
37 | workflows:
38 |   test:
39 |     jobs:
40 |       - test:
41 |           filters:
42 |             # needed for deploy build
43 |             # ref: https://discuss.circleci.com/t/builds-for-tags-not-triggering/17681/7
44 |             tags:
45 |               only: /^.*/
46 |       - build-and-publish:
47 |           requires:
48 |             - test
49 |           filters:
50 |             branches:
51 |               ignore: /.*/
52 |             tags:
53 |               only: /^.*/
54 | 


--------------------------------------------------------------------------------
/tests/grammar/terminals/test_account.py:
--------------------------------------------------------------------------------
 1 | import typing
 2 | 
 3 | import pytest
 4 | from lark import Lark
 5 | from lark.exceptions import UnexpectedCharacters
 6 | 
 7 | 
 8 | @pytest.fixture
 9 | def account_parser(make_parser: typing.Callable) -> Lark:
10 |     return make_parser(module="account", rule="ACCOUNT")
11 | 
12 | 
13 | @pytest.mark.parametrize(
14 |     "text",
15 |     [
16 |         "Assets:A",
17 |         "Assets:2",
18 |         "Assets:Ab",
19 |         "Assets:AA",
20 |         "Assets:银行",
21 |         "Assets:A银行",
22 |         "Assets:Banks:AMEX",
23 |         "Assets:Banks:WellsFargo",
24 |         "Assets:Banks:Wells-Fargo",
25 |         "Assets:Banks:Chase",
26 |         "Expenses:Housing",
27 |         "Expenses:Travel",
28 |         "Liabilities:CreditCard",
29 |         "Income:Contracting",
30 |         "Income:ProjectNumber8",
31 |         "Equity:My1stHouse",
32 |         "Foobar:Eggs:Spam",
33 |     ],
34 | )
35 | def test_parse_account(account_parser: Lark, text: str):
36 |     account_parser.parse(text)
37 | 
38 | 
39 | @pytest.mark.parametrize(
40 |     "text",
41 |     [
42 |         "Assets",
43 |         "Expenses",
44 |         "Income",
45 |         "Liabilities",
46 |         "Foobar",
47 |         "assets:bank",
48 |         "Assets:bank",
49 |         ":Assets",
50 |         "Assets:",
51 |         "Assets::Banks:AMEX",
52 |         'USD',
53 |     ],
54 | )
55 | def test_parse_bad_account(account_parser: Lark, text: str):
56 |     with pytest.raises(UnexpectedCharacters):
57 |         account_parser.parse(text)
58 | 


--------------------------------------------------------------------------------
/tests/test_parser.py:
--------------------------------------------------------------------------------
 1 | import pathlib
 2 | 
 3 | import pytest
 4 | from lark import Lark
 5 | 
 6 | from beancount_parser.parser import extract_includes
 7 | from beancount_parser.parser import make_parser
 8 | from beancount_parser.parser import traverse
 9 | 
10 | 
11 | @pytest.fixture
12 | def parser() -> Lark:
13 |     return make_parser()
14 | 
15 | 
16 | @pytest.mark.parametrize(
17 |     "filename, expected",
18 |     [
19 |         (
20 |             "includes.bean",
21 |             [
22 |                 ("foo.bean", 2),
23 |                 ("bar.bean", 7),
24 |                 ("2024/*.bean", 8),
25 |             ],
26 |         )
27 |     ],
28 | )
29 | def test_extract_includes(
30 |     parser: Lark, fixtures_folder: pathlib.Path, filename: str, expected: list[str]
31 | ):
32 |     bean_file = fixtures_folder / filename
33 |     tree = parser.parse(bean_file.read_text())
34 |     assert frozenset(extract_includes(tree)) == frozenset(expected)
35 | 
36 | 
37 | @pytest.mark.parametrize(
38 |     "folder, expected",
39 |     [
40 |         ("nested", ["main.bean", "a/file0.bean", "a/b/file1.bean"]),
41 |         ("glob", ["main.bean", "a/file0.bean", "a/b/file1.bean", "a/b/file2.bean"]),
42 |         ("circular", ["main.bean", "file0.bean", "file1.bean"]),
43 |         ("contain_root_dir", ["main.bean"]),
44 |     ],
45 | )
46 | def test_traverse(
47 |     parser: Lark, fixtures_folder: pathlib.Path, folder: str, expected: list[str]
48 | ):
49 |     bean_file = fixtures_folder / "traverse" / folder / "main.bean"
50 |     assert frozenset(
51 |         str(bean_path.relative_to(bean_file.parent).as_posix())
52 |         for bean_path, _ in traverse(parser, bean_file)
53 |     ) == frozenset(expected)
54 | 


--------------------------------------------------------------------------------
/beancount_parser/data_types.py:
--------------------------------------------------------------------------------
 1 | import enum
 2 | import typing
 3 | 
 4 | from lark import Tree
 5 | 
 6 | 
 7 | @enum.unique
 8 | class EntryType(str, enum.Enum):
 9 |     # Date directives
10 |     OPEN = "OPEN"
11 |     CLOSE = "CLOSE"
12 |     BALANCE = "BALANCE"
13 |     EVENT = "EVENT"
14 |     COMMODITY = "COMMODITY"
15 |     DOCUMENT = "DOCUMENT"
16 |     PRICE = "PRICE"
17 |     NOTE = "NOTE"
18 |     PAD = "PAD"
19 |     CUSTOM = "CUSTOM"
20 |     TXN = "TXN"
21 |     # Simple directives
22 |     OPTION = "OPTION"
23 |     INCLUDE = "INCLUDE"
24 |     PLUGIN = "PLUGIN"
25 |     # Other
26 |     COMMENTS = "COMMENTS"
27 |     SECTION_HEADER = "SECTION_HEADER"
28 | 
29 | 
30 | DATE_DIRECTIVE_ENTRY_TYPES = {
31 |     "open": EntryType.OPEN,
32 |     "close": EntryType.CLOSE,
33 |     "balance": EntryType.BALANCE,
34 |     "event": EntryType.EVENT,
35 |     "commodity": EntryType.COMMODITY,
36 |     "document": EntryType.DOCUMENT,
37 |     "price": EntryType.PRICE,
38 |     "note": EntryType.NOTE,
39 |     "pad": EntryType.PAD,
40 |     "custom": EntryType.CUSTOM,
41 |     "txn": EntryType.TXN,
42 | }
43 | SIMPLE_DIRECTIVE_ENTRY_TYPES = {
44 |     "option": EntryType.OPTION,
45 |     "include": EntryType.INCLUDE,
46 |     "plugin": EntryType.PLUGIN,
47 | }
48 | 
49 | 
50 | class Metadata(typing.NamedTuple):
51 |     comments: list[Tree]
52 |     statement: Tree
53 | 
54 | 
55 | class Posting(typing.NamedTuple):
56 |     comments: list[Tree]
57 |     statement: Tree
58 |     metadata: list[Metadata]
59 | 
60 | 
61 | class Entry(typing.NamedTuple):
62 |     type: EntryType
63 |     comments: typing.List[Tree]
64 |     statement: Tree | None
65 |     metadata: typing.List[Metadata]
66 |     postings: typing.List[Posting]
67 | 


--------------------------------------------------------------------------------
/tests/grammar/terminals/test_amount.py:
--------------------------------------------------------------------------------
 1 | import typing
 2 | 
 3 | import pytest
 4 | from lark import Lark
 5 | from lark.exceptions import UnexpectedInput
 6 | 
 7 | 
 8 | @pytest.fixture
 9 | def amount_parser(make_parser: typing.Callable) -> Lark:
10 |     return make_parser(module="beancount", rule="amount", ignore_spaces=True)
11 | 
12 | 
13 | @pytest.fixture
14 | def amount_tolerance_parser(make_parser: typing.Callable) -> Lark:
15 |     return make_parser(module="beancount", rule="amount_tolerance", ignore_spaces=True)
16 | 
17 | 
18 | _INVALID_AMOUNT_TOLERANCES = (
19 |     '12.34 ~ USD',
20 |     '~ 0.01 USD',
21 |     '12.34 0.01 USD',
22 |     '12.34 USD 0.01 USD',
23 |     '12.34 USD ~ 0.01 USD',
24 |     '(12.34 ~ 0.01) USD',
25 |     '12.34 USD ~ 0.01',
26 |     '12.34 USD ~ 0.01 USD',
27 | )
28 | 
29 | @pytest.mark.parametrize(
30 |     "text",
31 |     [
32 |         "12.34 USD",
33 |         "-12.34 USD",
34 |         "0.0 USD",
35 |         "1+2 USD",
36 |         "(1+2) USD",
37 |         "500 BTC",
38 |     ],
39 | )
40 | def test_parse_amount(amount_parser: Lark, text: str):
41 |     amount_parser.parse(text)
42 | 
43 | 
44 | @pytest.mark.parametrize(
45 |     "text",
46 |     _INVALID_AMOUNT_TOLERANCES + (
47 |         "12.34",
48 |         "USD",
49 |         "0..0 USD",
50 |         "1+ USD",
51 |         '12.34 ~ 0.01 USD',
52 |     ),
53 | )
54 | def test_parse_bad_amount(amount_parser: Lark, text: str):
55 |     with pytest.raises(UnexpectedInput):
56 |         amount_parser.parse(text)
57 | 
58 | 
59 | @pytest.mark.parametrize(
60 |     "text",
61 |     [
62 |         '12.34 ~ 0.01 USD',
63 |     ],
64 | )
65 | def test_parse_amount_tolerance(amount_tolerance_parser: Lark, text: str):
66 |     amount_tolerance_parser.parse(text)
67 | 
68 | 
69 | @pytest.mark.parametrize(
70 |     "text",
71 |     _INVALID_AMOUNT_TOLERANCES,
72 | )
73 | def test_parse_bad_amount_tolerance(amount_tolerance_parser: Lark, text: str):
74 |     with pytest.raises(UnexpectedInput):
75 |         amount_tolerance_parser.parse(text)
76 | 


--------------------------------------------------------------------------------
/tests/grammar/directives/test_posting.py:
--------------------------------------------------------------------------------
 1 | import typing
 2 | 
 3 | import pytest
 4 | from lark import Lark
 5 | from lark.exceptions import UnexpectedInput
 6 | 
 7 | 
 8 | @pytest.fixture
 9 | def posting_parser(make_parser: typing.Callable) -> Lark:
10 |     return make_parser(module="beancount", rule="posting", ignore_spaces=True)
11 | 
12 | 
13 | @pytest.mark.parametrize(
14 |     "text",
15 |     [
16 |         "Assets:Bank 10 USD",
17 |         "Assets:Bank -10 USD",
18 |         "Assets:Bank -10.0 USD",
19 |         "Assets:Bank -10.0 TWD",
20 |         "Assets:Bank -10.0 TWD @ 2.56 USD",
21 |         "Assets:Bank -10.0 TWD @  2.56  USD",
22 |         "Assets:Bank -10.0 TWD @@ 2.56 USD",
23 |         "Assets:Bank -10.0 TWD {100.56 USD}",
24 |         "Assets:Bank -10.0 TWD { 100.56 USD }",
25 |         "Assets:Bank -10.0 TWD {{100.56 USD}}",
26 |         "Assets:Bank -10.0 TWD {{ 100.56  USD}}",
27 |         "Assets:Bank -10.0 TWD {100.56 # 12.34 USD}",
28 |         "Assets:Bank -10.0 TWD { 100.56  #  12.34 USD }",
29 |         "Assets:Bank -10.0 TWD {100.56 # 3.45 CAD }",
30 |         "Assets:Bank -10.0 TWD {100.56 USD, 2021-06-07}",
31 |         "Assets:Bank -10.0 TWD {100.56 USD  , 2021-06-07}",
32 |         "Assets:Bank -10.0 TWD { 100.56 USD , 2021-06-07 }",
33 |         "Assets:Bank -10.0 TWD { 2021-06-07, 100.56 USD }",
34 |         'Assets:Bank -10.0 TWD { 2021-06-07, 100.56 USD, "my-label" }',
35 |         'Assets:Bank -10.0 TWD { 100.56 USD, "my-label", 2021-06-07 }',
36 |         'Assets:Bank -10.0 TWD { "my-label", 2021-06-07, 100.56 USD }',
37 |         'Assets:Bank -10.0 TWD { "my-label", 2021-06-07, 100.56 USD, * }',
38 |         "Assets:Bank -10.0 TWD { * }",
39 |         "Assets:Bank -10.0 TWD {}",
40 |         "! Assets:Bank -10.0 TWD",
41 |         "* Assets:Bank -10.0 TWD",
42 |     ],
43 | )
44 | def test_parse_posting(posting_parser: Lark, text: str):
45 |     posting_parser.parse(text)
46 | 
47 | 
48 | @pytest.mark.parametrize(
49 |     "text",
50 |     [
51 |         "Assets:Bank 10",
52 |         "a -10 USD",
53 |         "@ Assets:Bank -10.0 TWD",
54 |     ],
55 | )
56 | def test_parse_bad_posting(posting_parser: Lark, text: str):
57 |     with pytest.raises(UnexpectedInput):
58 |         posting_parser.parse(text)
59 | 


--------------------------------------------------------------------------------
/tests/grammar/test_metadata.py:
--------------------------------------------------------------------------------
 1 | import typing
 2 | 
 3 | import pytest
 4 | from lark import Lark
 5 | from lark.exceptions import UnexpectedCharacters
 6 | 
 7 | 
 8 | @pytest.fixture
 9 | def metadata_key_parser(make_parser: typing.Callable) -> Lark:
10 |     return make_parser(module="beancount", rule="METADATA_KEY")
11 | 
12 | 
13 | @pytest.fixture
14 | def metadata_value_parser(make_parser: typing.Callable) -> Lark:
15 |     return make_parser(module="beancount", rule="metadata_value", ignore_spaces=True)
16 | 
17 | 
18 | @pytest.fixture
19 | def metadata_item_parser(make_parser: typing.Callable) -> Lark:
20 |     return make_parser(module="beancount", rule="metadata_item", ignore_spaces=True)
21 | 
22 | 
23 | @pytest.mark.parametrize(
24 |     "text",
25 |     [
26 |         "a",
27 |         "abc",
28 |         "abc-DEF",
29 |         "abc_DEF",
30 |         "abc123",
31 |         "aBC",
32 |     ],
33 | )
34 | def test_parse_metadata_key(metadata_key_parser: Lark, text: str):
35 |     metadata_key_parser.parse(text)
36 | 
37 | 
38 | @pytest.mark.parametrize(
39 |     "text",
40 |     [
41 |         "0",
42 |         "0abc",
43 |         "Abc",
44 |         "_abc",
45 |         "_ABC",
46 |         "_Abc",
47 |     ],
48 | )
49 | def test_parse_bad_metadata_key(metadata_key_parser: Lark, text: str):
50 |     with pytest.raises(UnexpectedCharacters):
51 |         metadata_key_parser.parse(text)
52 | 
53 | 
54 | @pytest.mark.parametrize(
55 |     "text",
56 |     [
57 |         '"String value"',
58 |         "2020-03-31",
59 |         "12.34",
60 |         "12.34 USD",
61 |         "USD",
62 |         "#foobar",
63 |         "#foo #bar",
64 |     ],
65 | )
66 | def test_parse_metadata_value(metadata_value_parser: Lark, text: str):
67 |     metadata_value_parser.parse(text)
68 | 
69 | 
70 | @pytest.mark.parametrize(
71 |     "text",
72 |     [
73 |         "abc",
74 |         "_abc",
75 |         "_ABC",
76 |         "_Abc",
77 |     ],
78 | )
79 | def test_parse_bad_metadata_value(metadata_value_parser: Lark, text: str):
80 |     with pytest.raises(UnexpectedCharacters):
81 |         metadata_value_parser.parse(text)
82 | 
83 | 
84 | @pytest.mark.parametrize(
85 |     "text",
86 |     [
87 |         'foo: "String value"',
88 |         "bar: 2020-03-31",
89 |         "eggs: 12.34",
90 |     ],
91 | )
92 | def test_parse_metadata_item(metadata_item_parser: Lark, text: str):
93 |     metadata_item_parser.parse(text)
94 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | pip-wheel-metadata/
 24 | share/python-wheels/
 25 | *.egg-info/
 26 | .installed.cfg
 27 | *.egg
 28 | MANIFEST
 29 | 
 30 | # PyInstaller
 31 | #  Usually these files are written by a python script from a template
 32 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 33 | *.manifest
 34 | *.spec
 35 | 
 36 | # Installer logs
 37 | pip-log.txt
 38 | pip-delete-this-directory.txt
 39 | 
 40 | # Unit test / coverage reports
 41 | htmlcov/
 42 | .tox/
 43 | .nox/
 44 | .coverage
 45 | .coverage.*
 46 | .cache
 47 | nosetests.xml
 48 | coverage.xml
 49 | *.cover
 50 | *.py,cover
 51 | .hypothesis/
 52 | .pytest_cache/
 53 | 
 54 | # Translations
 55 | *.mo
 56 | *.pot
 57 | 
 58 | # Django stuff:
 59 | *.log
 60 | local_settings.py
 61 | db.sqlite3
 62 | db.sqlite3-journal
 63 | 
 64 | # Flask stuff:
 65 | instance/
 66 | .webassets-cache
 67 | 
 68 | # Scrapy stuff:
 69 | .scrapy
 70 | 
 71 | # Sphinx documentation
 72 | docs/_build/
 73 | 
 74 | # PyBuilder
 75 | target/
 76 | 
 77 | # Jupyter Notebook
 78 | .ipynb_checkpoints
 79 | 
 80 | # IPython
 81 | profile_default/
 82 | ipython_config.py
 83 | 
 84 | # pyenv
 85 | .python-version
 86 | 
 87 | # pipenv
 88 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 89 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 90 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 91 | #   install all needed dependencies.
 92 | #Pipfile.lock
 93 | 
 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
 95 | __pypackages__/
 96 | 
 97 | # Celery stuff
 98 | celerybeat-schedule
 99 | celerybeat.pid
100 | 
101 | # SageMath parsed files
102 | *.sage.py
103 | 
104 | # Environments
105 | .env
106 | .venv
107 | env/
108 | venv/
109 | ENV/
110 | env.bak/
111 | venv.bak/
112 | 
113 | # Spyder project settings
114 | .spyderproject
115 | .spyproject
116 | 
117 | # Rope project settings
118 | .ropeproject
119 | 
120 | # mkdocs documentation
121 | /site
122 | 
123 | # mypy
124 | .mypy_cache/
125 | .dmypy.json
126 | dmypy.json
127 | 
128 | # Pyre type checker
129 | .pyre/
130 | .idea
131 | 


--------------------------------------------------------------------------------
/tests/grammar/terminals/test_numbers.py:
--------------------------------------------------------------------------------
  1 | import typing
  2 | 
  3 | import pytest
  4 | from lark import Lark
  5 | from lark.exceptions import UnexpectedInput
  6 | 
  7 | 
  8 | @pytest.fixture
  9 | def number_parser(make_parser: typing.Callable) -> Lark:
 10 |     return make_parser(module="numbers", rule="NUMBER")
 11 | 
 12 | 
 13 | @pytest.fixture
 14 | def number_expr_parser(make_parser: typing.Callable) -> Lark:
 15 |     return make_parser(module="beancount", rule="number_expr")
 16 | 
 17 | 
 18 | @pytest.mark.parametrize(
 19 |     "text",
 20 |     [
 21 |         "1",
 22 |         "12",
 23 |         "123",
 24 |         "1234",
 25 |         "4578",
 26 |         "4,578",
 27 |         "4,578.1234",
 28 |         "1,123,578.1234",
 29 |         "12.34",
 30 |         ".34",
 31 |         "0.0",
 32 |     ],
 33 | )
 34 | def test_parse_number(number_parser: Lark, text: str):
 35 |     number_parser.parse(text)
 36 | 
 37 | 
 38 | @pytest.mark.parametrize(
 39 |     "text",
 40 |     [
 41 |         "-0",
 42 |         "1000,000",
 43 |         "1000,00",
 44 |         "1,0000",
 45 |         "+1234",
 46 |         "-5.67",
 47 |         "4578.123.45",
 48 |         "0x12.34",
 49 |         "..34",
 50 |         "0.0.",
 51 |         "abc",
 52 |     ],
 53 | )
 54 | def test_parse_bad_number(number_parser: Lark, text: str):
 55 |     with pytest.raises(UnexpectedInput):
 56 |         number_parser.parse(text)
 57 | 
 58 | 
 59 | @pytest.mark.parametrize(
 60 |     "text",
 61 |     [
 62 |         "0",
 63 |         "1234",
 64 |         "4578",
 65 |         "12.34",
 66 |         "12.34",
 67 |         ".34",
 68 |         "0.0",
 69 |         "-5",
 70 |         "-5.67",
 71 |         "+5.67",
 72 |         "1.2+3.4",
 73 |         "1.2-3.4",
 74 |         "1.2*3.4",
 75 |         "1.2/3.4",
 76 |         "1++2",
 77 |         "1--2",
 78 |         "++1",
 79 |         "--1",
 80 |         "(1+2)*3/4",
 81 |         "((1))",
 82 |         "--((1))",
 83 |         "1+.2",
 84 |     ],
 85 | )
 86 | def test_parse_number_expr(number_expr_parser: Lark, text: str):
 87 |     number_expr_parser.parse(text)
 88 | 
 89 | 
 90 | @pytest.mark.parametrize(
 91 |     "text",
 92 |     [
 93 |         "0a",
 94 |         "1234..",
 95 |         "..4578",
 96 |         "12..34",
 97 |         "abc",
 98 |         "1-",
 99 |         "2+",
100 |         "(1",
101 |         "1)",
102 |         ")1(",
103 |         "(1)(2)",
104 |         "+",
105 |         "-",
106 |     ]
107 | )
108 | def test_parse_bad_number_expr(number_expr_parser: Lark, text: str):
109 |     with pytest.raises(UnexpectedInput):
110 |         number_expr_parser.parse(text)
111 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # beancount-parser [![CircleCI](https://circleci.com/gh/LaunchPlatform/beancount-parser/tree/master.svg?style=svg)](https://circleci.com/gh/LaunchPlatform/beancount-parser/tree/master)
 2 | Standalone [Lark](https://github.com/lark-parser/lark) LALR(1) based Beancount syntax parser (not relying on Beancount library), MIT license
 3 | 
 4 | Please also checkout out [beancount-black](https://github.com/LaunchPlatform/beancount-black), an opinionated beancount code formatter based on beancount-parser.
 5 | 
 6 | ## Features
 7 | 
 8 | - **MIT licensed** - the only dependency is [Lark](https://github.com/lark-parser/lark)
 9 | - **Extremely fast** - LALR(1) is used
10 | - **Section awareness** - emac org symbol mark `*` will be parsed
11 | - **Comment awareness** - comments will be parsed
12 | - **Not a validator** - it does not validate beancount syntax, invalid beancount syntax may still pass the parsing
13 | 
14 | # Sponsor
15 | 
16 | The original project beancount-parser was meant to be an internal tool built by [Launch Platform LLC](https://launchplatform.com) for 
17 | 
18 | <p align="center">
19 |   <a href="https://beanhub.io"><img src="https://github.com/LaunchPlatform/beancount-black/raw/master/assets/beanhub.svg?raw=true" alt="BeanHub logo" /></a>
20 | </p>
21 | 
22 | A modern accounting book service based on the most popular open source version control system [Git](https://git-scm.com/) and text-based double entry accounting book software [Beancount](https://beancount.github.io/docs/index.html).
23 | We realized adding new entries with BeanHub automatically over time makes the beancount file a mess.
24 | So, a strong code formatter is needed.
25 | While SaaS businesses won't be required to open-source an internal tool like this, we still love that the service is only possible because of the open-source tool we are using.
26 | It would be greatly beneficial for the community to access a tool like this, so we've decided to open-source it under an MIT license. We hope you find this tool useful 😄
27 | 
28 | ## Install
29 | 
30 | To install the parser, simply run
31 | 
32 | ```bash
33 | pip install beancount-parser
34 | ```
35 | 
36 | ## Usage
37 | 
38 | If you want to run the parse beancount code, you can do this
39 | 
40 | ```python
41 | import io
42 | 
43 | from beancount_parser.parser import make_parser
44 | 
45 | parser = make_parser()
46 | tree = parser.parse(beancount_content)
47 | # do whatever you want with the tree here
48 | ```
49 | 
50 | ## Feedbacks
51 | 
52 | Feedbacks, bugs reporting or feature requests are welcome 🙌, just please open an issue.
53 | No guarantee we have time to deal with them, but will see what we can do.
54 | 


--------------------------------------------------------------------------------
/beancount_parser/helpers.py:
--------------------------------------------------------------------------------
 1 | from lark import Token
 2 | from lark import Tree
 3 | 
 4 | from .data_types import DATE_DIRECTIVE_ENTRY_TYPES
 5 | from .data_types import Entry
 6 | from .data_types import EntryType
 7 | from .data_types import Metadata
 8 | from .data_types import Posting
 9 | from .data_types import SIMPLE_DIRECTIVE_ENTRY_TYPES
10 | 
11 | 
12 | def get_entry_type(statement: Tree) -> EntryType:
13 |     first_child: Tree = statement.children[0]
14 |     if first_child.data == "date_directive":
15 |         return DATE_DIRECTIVE_ENTRY_TYPES[first_child.children[0].data.value]
16 |     elif first_child.data == "simple_directive":
17 |         return SIMPLE_DIRECTIVE_ENTRY_TYPES[first_child.children[0].data.value]
18 |     else:
19 |         raise ValueError(f"Unexpected first child type {first_child.data}")
20 | 
21 | 
22 | def collect_entries(tree: Tree) -> tuple[list[Entry], list[Tree]]:
23 |     entries: list[Entry] = []
24 |     comments: list[Tree] = []
25 |     for statement in tree.children:
26 |         if statement is None:
27 |             continue
28 |         if statement.data != "statement":
29 |             raise ValueError("Expected statement here")
30 |         first_child = statement.children[0]
31 |         if isinstance(first_child, Token):
32 |             if first_child.type == "COMMENT":
33 |                 comments.append(statement)
34 |             elif first_child.type == "SECTION_HEADER":
35 |                 entry = Entry(
36 |                     type=EntryType.SECTION_HEADER,
37 |                     comments=comments,
38 |                     statement=statement,
39 |                     metadata=[],
40 |                     postings=[],
41 |                 )
42 |                 entries.append(entry)
43 |                 comments = []
44 |             else:
45 |                 raise ValueError(f"Unexpected token {first_child.type}")
46 |         else:
47 |             if first_child.data == "posting":
48 |                 last_entry = entries[-1]
49 |                 if last_entry.type != EntryType.TXN:
50 |                     raise ValueError("Transaction expected")
51 |                 last_entry.postings.append(
52 |                     Posting(comments=comments, statement=statement, metadata=[])
53 |                 )
54 |                 comments = []
55 |                 continue
56 |             elif first_child.data == "metadata_item":
57 |                 last_entry = entries[-1]
58 |                 metadata = Metadata(comments=comments, statement=statement)
59 |                 if last_entry.postings:
60 |                     last_posting: Posting = last_entry.postings[-1]
61 |                     last_posting.metadata.append(metadata)
62 |                 else:
63 |                     last_entry.metadata.append(metadata)
64 |                 comments = []
65 |                 continue
66 |             entry = Entry(
67 |                 type=get_entry_type(statement),
68 |                 comments=comments,
69 |                 statement=statement,
70 |                 metadata=[],
71 |                 postings=[],
72 |             )
73 |             entries.append(entry)
74 |             comments = []
75 |     return entries, comments
76 | 


--------------------------------------------------------------------------------
/beancount_parser/parser.py:
--------------------------------------------------------------------------------
 1 | import glob
 2 | import json
 3 | import logging
 4 | import pathlib
 5 | import typing
 6 | 
 7 | from lark import Lark
 8 | from lark import Tree
 9 | 
10 | GRAMMAR_FOLDER = pathlib.Path(__file__).parent / "grammar"
11 | BEANCOUNT_GRAMMAR_FILE = GRAMMAR_FOLDER / "beancount.lark"
12 | 
13 | 
14 | def make_parser(**options: typing.Any) -> Lark:
15 |     default_options = dict(propagate_positions=True, parser="lalr")
16 |     with open(BEANCOUNT_GRAMMAR_FILE, "rt") as fo:
17 |         return Lark(grammar=fo, **(default_options | options))
18 | 
19 | 
20 | def extract_includes(tree: Tree) -> typing.Generator[tuple[str, int], None, None]:
21 |     """Extract include statements from the root tree"""
22 |     if tree.data != "start":
23 |         raise ValueError("Expected start")
24 |     for child in tree.children:
25 |         if child is None:
26 |             continue
27 |         if child.data != "statement":
28 |             raise ValueError("Expected statement")
29 |         first_child = child.children[0]
30 |         if not isinstance(first_child, Tree):
31 |             continue
32 |         if first_child.data != "simple_directive":
33 |             continue
34 |         include = first_child.children[0]
35 |         if include.data != "include":
36 |             continue
37 |         yield json.loads(include.children[0].value), first_child.meta.line
38 | 
39 | 
40 | def traverse(
41 |     parser: Lark, bean_file: pathlib.Path, root_dir: pathlib.Path | None = None
42 | ) -> typing.Generator[tuple[pathlib.Path, Tree], None, None]:
43 |     """Traverse a given bean file and follow all its includes, yield (path, parsed_tree) tuples"""
44 |     logger = logging.getLogger(__name__)
45 |     visited_bean_files: set[pathlib.Path] = set()
46 | 
47 |     if root_dir is None:
48 |         root_dir = bean_file.parent.absolute()
49 |     pending_files = [bean_file.absolute()]
50 | 
51 |     while pending_files:
52 |         current_file = pending_files.pop(0)
53 |         visited_bean_files.add(current_file)
54 |         tree = parser.parse(current_file.read_text())
55 |         yield current_file, tree
56 |         includes = extract_includes(tree)
57 |         for include, lineno in includes:
58 |             logger.info(
59 |                 "Process include at %s:%s with path value %s",
60 |                 current_file,
61 |                 lineno,
62 |                 include,
63 |             )
64 |             target_file = current_file.parent / include
65 |             for matched_file in glob.glob(str(target_file)):
66 |                 matched_file = pathlib.Path(matched_file).resolve().absolute()
67 |                 if root_dir not in matched_file.parents:
68 |                     logger.warning(
69 |                         "Matched file %s is not a sub-path of root %s, ignored",
70 |                         matched_file,
71 |                         root_dir,
72 |                     )
73 |                     # ensure include cannot go above the root folder, to avoid any potential security risk
74 |                     continue
75 |                 if matched_file in visited_bean_files:
76 |                     continue
77 |                 pending_files.append(matched_file)
78 | 


--------------------------------------------------------------------------------
/tests/grammar/test_beancount.py:
--------------------------------------------------------------------------------
  1 | from textwrap import dedent
  2 | 
  3 | import pytest
  4 | from lark import Lark
  5 | 
  6 | from beancount_parser.parser import make_parser
  7 | 
  8 | 
  9 | @pytest.fixture
 10 | def parser() -> Lark:
 11 |     return make_parser()
 12 | 
 13 | 
 14 | @pytest.mark.parametrize(
 15 |     "text",
 16 |     [
 17 |         dedent(
 18 |             """\
 19 |     1970-01-01 commodity USD
 20 |     1970-01-01 open Assets:MyBank USD
 21 |     
 22 |     option "foo" "bar"
 23 |     include "2022.bean"
 24 |     plugin "beancount.module"
 25 |     
 26 |     """
 27 |         ),
 28 |         dedent(
 29 |             """\
 30 |     2022-03-31 * "Foobar"
 31 |         Assets:MyBank 12.34 USD ; this is fine
 32 |     """
 33 |         ),
 34 |         dedent(
 35 |             """\
 36 |     2022-03-31 * "Foobar"
 37 |         Assets:MyBank 12.34 USD ; this is fine"""
 38 |         ),
 39 |         dedent(
 40 |             """\
 41 |     1970-01-01 commodity USD
 42 |     1970-01-01 open Assets:MyBank USD
 43 |     1970-01-01 note Assets:MyBank "this is my first bank account"
 44 |     1970-01-01 open Assets:My2ndBank USD,BTC
 45 |     1970-01-01 close Assets:My2ndBank
 46 |     1970-01-01 price BTC 100.0 USD
 47 |     1970-01-01 commodity USD
 48 |     1970-01-01 custom "string val" 123.45 USD TRUE FALSE 2022-04-01 Assets:Bank
 49 |     
 50 |        ; comment
 51 |     ; comment
 52 |     1970-01-01 event "job" "working from home"
 53 | 
 54 |     * org
 55 |     
 56 |     2022-03-31
 57 |         Assets:MyBank 12.34 USD
 58 |         
 59 |     2022-03-31 "foo" "bar"
 60 |         Assets:MyBank 12.34 USD
 61 |         
 62 |     ** org2
 63 |     
 64 |     2022-03-31 * "Foobar"
 65 |         document: "foobar.pdf" ; my doc
 66 |         document-2: "egg-spam.pdf"
 67 |         Assets:MyBank 12.34 USD ; this is fine
 68 |             document: "invoice.pdf"
 69 |             source: "invoice.pdf"
 70 | 
 71 |     """
 72 |         ),
 73 |         dedent(
 74 |             """\
 75 |         ;; -*- mode: org; mode: beancount; -*-
 76 | 
 77 |         """
 78 |         ),
 79 |         dedent(
 80 |             """\
 81 |     
 82 |     
 83 |        
 84 |     1970-01-01 open Assets:MyBank USD
 85 |     
 86 |     
 87 |     
 88 |     1970-01-01 close Assets:MyBank
 89 |     
 90 |     """
 91 |         ),
 92 |         # for multi-line plugin config issue
 93 |         # ref: https://github.com/LaunchPlatform/beancount-parser/issues/11
 94 |         dedent(
 95 |             """\
 96 |         plugin "plugins.zerosum" "{
 97 |           'zerosum_accounts': {
 98 |             'Equity:Transfers': ('Equity:Transfers:Matched', 7),
 99 |           },
100 |           'flag_unmatched': True
101 |         }"
102 |         """
103 |         ),
104 |         # ensure empty spaces in line works
105 |         '2022-04-20 * "First transaction"\n  Assets:Account1  -1 USD\n  Assets:Account2     1 USD\n   \n2022-04-20 * "Second transaction"\n   Assets:Account1 1 USD\n   Assets:Account2  -1 USD',
106 |         "\n\n\n",
107 |         "",
108 |         # test forecast transactions
109 |         dedent(
110 |             """\
111 |         2014-03-08 # "Electricity bill [MONTHLY]"
112 |             Expenses:Electricity                      50.10 USD
113 |             Assets:Checking                          -50.10 USD
114 |             """
115 |         ),
116 |         # test transaction flags
117 |         dedent(
118 |             """\
119 |         2014-03-08 * "foo" "bar"
120 |         2014-03-08 ! "foo" "bar"
121 |         2014-03-08 P "foo" "bar"
122 |         2014-03-08 S "foo" "bar"
123 |         2014-03-08 T "foo" "bar"
124 |         2014-03-08 C "foo" "bar"
125 |         2014-03-08 U "foo" "bar"
126 |         2014-03-08 R "foo" "bar"
127 |         2014-03-08 M "foo" "bar"
128 |             """
129 |         ),
130 |     ],
131 | )
132 | def test_parse(parser: Lark, text: str):
133 |     parser.parse(text)
134 | 


--------------------------------------------------------------------------------
/beancount_parser/grammar/beancount.lark:
--------------------------------------------------------------------------------
  1 | // Number
  2 | 
  3 | // Reduce priority of unary operators to align with the beancount behavior that
  4 | // "2000-01-01 custom 123 -456" is interpreted as "2000-01-01 custom -333"
  5 | 
  6 | UNARY_OP.-10: "+" | "-"
  7 | ADD_OP: "+" | "-"
  8 | MUL_OP: "*" | "/"
  9 | 
 10 | ASTERISK: "*"
 11 | 
 12 | number_expr: number_add_expr
 13 | ?number_add_expr: number_mul_expr (ADD_OP number_mul_expr)*
 14 | ?number_mul_expr: number_atom (MUL_OP number_atom)*
 15 | ?number_atom: NUMBER | "(" number_add_expr ")" | UNARY_OP number_atom
 16 | 
 17 | currencies: CURRENCY ("," CURRENCY)*
 18 | amount: number_expr CURRENCY
 19 | amount_tolerance: number_expr "~" number_expr CURRENCY
 20 | 
 21 | // Metadata
 22 | METADATA_KEY: LCASE_LETTER (LETTER | DIGIT | "-" | "_")*
 23 | ?metadata_value: ESCAPED_STRING
 24 |                     | ACCOUNT
 25 |                     | CURRENCY
 26 |                     | DATE
 27 |                     | number_expr
 28 |                     | TAGS
 29 |                     | amount
 30 | metadata_item: METADATA_KEY ":" metadata_value
 31 | 
 32 | // Date directives
 33 | ?annotation_item: TAG | LINK
 34 | annotations: annotation_item+
 35 | 
 36 | ?custom_arg: DATE
 37 |     | ACCOUNT
 38 |     | ESCAPED_STRING
 39 |     | number_expr
 40 |     | CURRENCY
 41 |     | BOOLEAN
 42 | 
 43 | // make it a token so that it will present in the `children`, otherwise the number
 44 | // of child for txn will be different makes it harder to process
 45 | TXN: "txn"
 46 | 
 47 | open:      DATE "open" ACCOUNT [currencies] [ESCAPED_STRING]
 48 | close:     DATE "close" ACCOUNT
 49 | balance:   DATE "balance" ACCOUNT (amount | amount_tolerance)
 50 | event:     DATE "event" ESCAPED_STRING ESCAPED_STRING
 51 | commodity: DATE "commodity" CURRENCY
 52 | document:  DATE "document" ACCOUNT ESCAPED_STRING
 53 | note:      DATE "note" ACCOUNT ESCAPED_STRING
 54 | pad:       DATE "pad" ACCOUNT ACCOUNT
 55 | price:     DATE "price" CURRENCY amount
 56 | custom:    DATE "custom" custom_arg+
 57 | txn:       DATE [TXN | FLAG] [[ESCAPED_STRING] ESCAPED_STRING] [annotations]
 58 | 
 59 | date_directive: open
 60 |                 | close
 61 |                 | balance
 62 |                 | event
 63 |                 | commodity
 64 |                 | document
 65 |                 | note
 66 |                 | pad
 67 |                 | price
 68 |                 | custom
 69 |                 | txn
 70 | 
 71 | // Simple directives
 72 | option: "option" ESCAPED_STRING ESCAPED_STRING
 73 | include: "include" ESCAPED_STRING
 74 | plugin: "plugin" ESCAPED_STRING [ESCAPED_STRING]
 75 | 
 76 | simple_directive: option
 77 |                  | include
 78 |                  | plugin
 79 | 
 80 | // Posting
 81 | total_cost: "{{" amount "}}"
 82 | both_cost: "{" number_expr  "#" amount "}"
 83 | cost_spec: "{" (cost_item ("," cost_item)*)? "}"
 84 | cost_item: amount | DATE | ESCAPED_STRING | ASTERISK
 85 | ?cost: total_cost | both_cost | cost_spec
 86 | 
 87 | per_unit_price: "@" amount
 88 | total_price: "@@" amount
 89 | ?posting_price: per_unit_price | total_price
 90 | 
 91 | detailed_posting: [FLAG] ACCOUNT amount [cost] [posting_price]
 92 | // the special case where only Account is present
 93 | simple_posting: [FLAG] ACCOUNT
 94 | posting: detailed_posting | simple_posting
 95 | 
 96 | ?directive: date_directive
 97 |         | simple_directive
 98 |         | posting
 99 |         | metadata_item
100 | _EMPTY_LINE: /[ \t]*[\r\n]/
101 | statement: directive [COMMENT] | COMMENT | SECTION_HEADER
102 | start: _EMPTY_LINE* (statement _NL+)* [statement]
103 | 
104 | %import common.WS_INLINE -> _WS
105 | %import common.NEWLINE -> _NL
106 | %import common.LCASE_LETTER
107 | %import common.LETTER
108 | %import common.DIGIT
109 | 
110 | %import .comment.COMMENT
111 | %import .section_header.SECTION_HEADER
112 | %import .numbers.NUMBER
113 | %import .account.ACCOUNT
114 | %import .flag.FLAG
115 | %import .currency.CURRENCY
116 | %import .date.DATE
117 | %import .tag.TAGS
118 | %import .tag.TAG
119 | %import .link.LINK
120 | %import .boolean.BOOLEAN
121 | %import .escaped_string.ESCAPED_STRING
122 | 
123 | %ignore _WS
124 | 


--------------------------------------------------------------------------------
/poetry.lock:
--------------------------------------------------------------------------------
  1 | [[package]]
  2 | name = "attrs"
  3 | version = "22.1.0"
  4 | description = "Classes Without Boilerplate"
  5 | category = "dev"
  6 | optional = false
  7 | python-versions = ">=3.5"
  8 | 
  9 | [package.extras]
 10 | dev = ["coverage[toml] (>=5.0.2)", "hypothesis", "pympler", "pytest (>=4.3.0)", "mypy (>=0.900,!=0.940)", "pytest-mypy-plugins", "zope.interface", "furo", "sphinx", "sphinx-notfound-page", "pre-commit", "cloudpickle"]
 11 | docs = ["furo", "sphinx", "zope.interface", "sphinx-notfound-page"]
 12 | tests = ["coverage[toml] (>=5.0.2)", "hypothesis", "pympler", "pytest (>=4.3.0)", "mypy (>=0.900,!=0.940)", "pytest-mypy-plugins", "zope.interface", "cloudpickle"]
 13 | tests_no_zope = ["coverage[toml] (>=5.0.2)", "hypothesis", "pympler", "pytest (>=4.3.0)", "mypy (>=0.900,!=0.940)", "pytest-mypy-plugins", "cloudpickle"]
 14 | 
 15 | [[package]]
 16 | name = "colorama"
 17 | version = "0.4.5"
 18 | description = "Cross-platform colored terminal text."
 19 | category = "dev"
 20 | optional = false
 21 | python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*"
 22 | 
 23 | [[package]]
 24 | name = "iniconfig"
 25 | version = "1.1.1"
 26 | description = "iniconfig: brain-dead simple config-ini parsing"
 27 | category = "dev"
 28 | optional = false
 29 | python-versions = "*"
 30 | 
 31 | [[package]]
 32 | name = "lark"
 33 | version = "1.1.2"
 34 | description = "a modern parsing library"
 35 | category = "main"
 36 | optional = false
 37 | python-versions = "*"
 38 | 
 39 | [package.extras]
 40 | atomic_cache = ["atomicwrites"]
 41 | nearley = ["js2py"]
 42 | regex = ["regex"]
 43 | 
 44 | [[package]]
 45 | name = "packaging"
 46 | version = "21.3"
 47 | description = "Core utilities for Python packages"
 48 | category = "dev"
 49 | optional = false
 50 | python-versions = ">=3.6"
 51 | 
 52 | [package.dependencies]
 53 | pyparsing = ">=2.0.2,<3.0.5 || >3.0.5"
 54 | 
 55 | [[package]]
 56 | name = "pluggy"
 57 | version = "1.0.0"
 58 | description = "plugin and hook calling mechanisms for python"
 59 | category = "dev"
 60 | optional = false
 61 | python-versions = ">=3.6"
 62 | 
 63 | [package.extras]
 64 | dev = ["pre-commit", "tox"]
 65 | testing = ["pytest", "pytest-benchmark"]
 66 | 
 67 | [[package]]
 68 | name = "py"
 69 | version = "1.11.0"
 70 | description = "library with cross-python path, ini-parsing, io, code, log facilities"
 71 | category = "dev"
 72 | optional = false
 73 | python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*"
 74 | 
 75 | [[package]]
 76 | name = "pyparsing"
 77 | version = "3.0.9"
 78 | description = "pyparsing module - Classes and methods to define and execute parsing grammars"
 79 | category = "dev"
 80 | optional = false
 81 | python-versions = ">=3.6.8"
 82 | 
 83 | [package.extras]
 84 | diagrams = ["railroad-diagrams", "jinja2"]
 85 | 
 86 | [[package]]
 87 | name = "pytest"
 88 | version = "7.1.3"
 89 | description = "pytest: simple powerful testing with Python"
 90 | category = "dev"
 91 | optional = false
 92 | python-versions = ">=3.7"
 93 | 
 94 | [package.dependencies]
 95 | attrs = ">=19.2.0"
 96 | colorama = {version = "*", markers = "sys_platform == \"win32\""}
 97 | iniconfig = "*"
 98 | packaging = "*"
 99 | pluggy = ">=0.12,<2.0"
100 | py = ">=1.8.2"
101 | tomli = ">=1.0.0"
102 | 
103 | [package.extras]
104 | testing = ["argcomplete", "hypothesis (>=3.56)", "mock", "nose", "pygments (>=2.7.2)", "requests", "xmlschema"]
105 | 
106 | [[package]]
107 | name = "tomli"
108 | version = "2.0.1"
109 | description = "A lil' TOML parser"
110 | category = "dev"
111 | optional = false
112 | python-versions = ">=3.7"
113 | 
114 | [metadata]
115 | lock-version = "1.1"
116 | python-versions = "^3.9"
117 | content-hash = "1c0ded8725800f44c4524e92064d3f29fc3ebc8c6ae2425251f09407b5ef97f1"
118 | 
119 | [metadata.files]
120 | attrs = []
121 | colorama = [
122 |     {file = "colorama-0.4.5-py2.py3-none-any.whl", hash = "sha256:854bf444933e37f5824ae7bfc1e98d5bce2ebe4160d46b5edf346a89358e99da"},
123 |     {file = "colorama-0.4.5.tar.gz", hash = "sha256:e6c6b4334fc50988a639d9b98aa429a0b57da6e17b9a44f0451f930b6967b7a4"},
124 | ]
125 | iniconfig = [
126 |     {file = "iniconfig-1.1.1-py2.py3-none-any.whl", hash = "sha256:011e24c64b7f47f6ebd835bb12a743f2fbe9a26d4cecaa7f53bc4f35ee9da8b3"},
127 |     {file = "iniconfig-1.1.1.tar.gz", hash = "sha256:bc3af051d7d14b2ee5ef9969666def0cd1a000e121eaea580d4a313df4b37f32"},
128 | ]
129 | lark = [
130 |     {file = "lark-1.1.2-py2.py3-none-any.whl", hash = "sha256:c1ab213fc5e2d273fe2d91da218ccc8b5b92d065b17faa5e743499cb16594b7d"},
131 |     {file = "lark-1.1.2.tar.gz", hash = "sha256:7a8d0c07d663da9391d7faee1bf1d7df4998c47ca43a593cbef5c7566acd057a"},
132 | ]
133 | packaging = [
134 |     {file = "packaging-21.3-py3-none-any.whl", hash = "sha256:ef103e05f519cdc783ae24ea4e2e0f508a9c99b2d4969652eed6a2e1ea5bd522"},
135 |     {file = "packaging-21.3.tar.gz", hash = "sha256:dd47c42927d89ab911e606518907cc2d3a1f38bbd026385970643f9c5b8ecfeb"},
136 | ]
137 | pluggy = [
138 |     {file = "pluggy-1.0.0-py2.py3-none-any.whl", hash = "sha256:74134bbf457f031a36d68416e1509f34bd5ccc019f0bcc952c7b909d06b37bd3"},
139 |     {file = "pluggy-1.0.0.tar.gz", hash = "sha256:4224373bacce55f955a878bf9cfa763c1e360858e330072059e10bad68531159"},
140 | ]
141 | py = [
142 |     {file = "py-1.11.0-py2.py3-none-any.whl", hash = "sha256:607c53218732647dff4acdfcd50cb62615cedf612e72d1724fb1a0cc6405b378"},
143 |     {file = "py-1.11.0.tar.gz", hash = "sha256:51c75c4126074b472f746a24399ad32f6053d1b34b68d2fa41e558e6f4a98719"},
144 | ]
145 | pyparsing = [
146 |     {file = "pyparsing-3.0.9-py3-none-any.whl", hash = "sha256:5026bae9a10eeaefb61dab2f09052b9f4307d44aee4eda64b309723d8d206bbc"},
147 |     {file = "pyparsing-3.0.9.tar.gz", hash = "sha256:2b020ecf7d21b687f219b71ecad3631f644a47f01403fa1d1036b0c6416d70fb"},
148 | ]
149 | pytest = []
150 | tomli = [
151 |     {file = "tomli-2.0.1-py3-none-any.whl", hash = "sha256:939de3e7a6161af0c887ef91b7d41a53e7c5a1ca976325f429cb46ea9bc30ecc"},
152 |     {file = "tomli-2.0.1.tar.gz", hash = "sha256:de526c12914f0c550d15924c62d72abc48d6fe7364aa87328337a31007fe8a4f"},
153 | ]
154 | 


--------------------------------------------------------------------------------