├── qslang
    ├── py.typed
    ├── __main__.py
    ├── __about__.py
    ├── __init__.py
    ├── preprocess.py
    ├── config.py
    ├── filter.py
    ├── util.py
    ├── igroupby.py
    ├── avg_times.py
    ├── dose.py
    ├── event.py
    ├── pharmacokinetics.py
    ├── load.py
    ├── parsimonious.py
    └── main.py
├── data
    ├── private
    │   └── .empty
    └── test
    │   ├── notes
    │       └── 2022-08-10.md
    │   └── testnote.enex
├── .gitignore
├── .gitmodules
├── tests
    ├── test_util.py
    ├── test_dose.py
    └── test_parse.py
├── test-integration.sh
├── config.example.toml
├── Makefile
├── pyproject.toml
├── .github
    └── workflows
    │   └── build.yml
└── README.md


/qslang/py.typed:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/data/private/.empty:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/qslang/__main__.py:
--------------------------------------------------------------------------------
1 | from qslang.main import main
2 | 
3 | if __name__ == "__main__":
4 |     main()
5 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | data/private
2 | data/generated
3 | .*cache
4 | __pycache__
5 | config.toml
6 | *.egg-info
7 | *coverage*
8 | 


--------------------------------------------------------------------------------
/.gitmodules:
--------------------------------------------------------------------------------
1 | [submodule "thirdparty/evernote-dump"]
2 | 	path = thirdparty/evernote-dump
3 | 	url = https://github.com/ErikBjare/evernote-dump.git
4 | 


--------------------------------------------------------------------------------
/qslang/__about__.py:
--------------------------------------------------------------------------------
1 | __title__ = "QSlang"
2 | __version__ = "0.1"
3 | __summary__ = "A text-based language for manual entry of quantified self data"
4 | __author__ = "Erik Bjäreholt"
5 | __email__ = "erik@bjareho.lt"
6 | __uri__ = "https://github.com/ErikBjare/QSlang"
7 | 


--------------------------------------------------------------------------------
/qslang/__init__.py:
--------------------------------------------------------------------------------
1 | from qslang.dose import Dose
2 | from qslang.event import Event, print_events
3 | from qslang.load import load_events
4 | from qslang.filter import filter_events
5 | 
6 | __all__ = ["Dose", "Event", "print_events", "load_events", "filter_events"]
7 | 


--------------------------------------------------------------------------------
/tests/test_util.py:
--------------------------------------------------------------------------------
 1 | #!/bin/env python3
 2 | 
 3 | from qslang.util import dayrange, monthrange
 4 | 
 5 | 
 6 | def test_monthrange():
 7 |     months = monthrange((2017, 1), (2018, 4))
 8 |     assert len(months) == 12 + 4
 9 | 
10 | 
11 | def test_dayrange():
12 |     days = dayrange((2017, 12, 20), (2017, 12, 31))
13 |     assert len(days) == 12
14 | 
15 |     days = dayrange((2017, 12, 20), (2018, 2, 4))
16 |     assert len(days) == 12 + 31 + 4
17 | 


--------------------------------------------------------------------------------
/data/test/notes/2022-08-10.md:
--------------------------------------------------------------------------------
 1 | # 2022-08-10
 2 | 
 3 | 01:00 - This day is just a test example
 4 | 
 5 | 08:30 - 1 cup Coffee
 6 | 
 7 | 09:00 - 3g Creatine monohydrate + 1g Fish Oil (360mg DHA + 240mg EPA) + 5mg Zinc (from picolinate)
 8 | 
 9 | 10:00 - Having a workout
10 | 
11 | 10:00 - 100mg Caffeine
12 | 
13 | 17:30 - Going to dinner with friends
14 | 
15 | ~18:00 - 33cl Beer (5.2%)
16 | 
17 | 19:30 - 1.5dl Wine (12.1%)
18 | 
19 | 22:00 - 500mg Magnesium bisglycinate
20 | 
21 | 23:30 - 0.5mg Melatonin subl
22 | 
23 | +00:10 - Having a aard time sleeping, to demonstrate the (+time) syntax for entries into the next day
24 | 


--------------------------------------------------------------------------------
/test-integration.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # Testing the QSlang cli
 3 | 
 4 | # fail on error
 5 | set -e
 6 | 
 7 | # print commands
 8 | set -x
 9 | 
10 | # set environment variable to disable plotting
11 | export MPLBACKEND=Agg
12 | 
13 | FLAGS="--testing"
14 | 
15 | # print help
16 | qslang $FLAGS --help
17 | 
18 | # list all substances
19 | qslang $FLAGS substances
20 | 
21 | # print summary
22 | qslang $FLAGS summary --substances caffeine
23 | 
24 | # list events
25 | qslang $FLAGS events --substances caffeine
26 | 
27 | # list effectspans of common substance
28 | qslang $FLAGS effectspan --substances caffeine
29 | 
30 | # plots
31 | qslang $FLAGS plot --substances caffeine
32 | qslang $FLAGS plot-calendar --substances caffeine
33 | qslang $FLAGS plot-effectspan --substances caffeine
34 | 
35 | # old tests
36 | qslang $FLAGS plot --substances caffeine
37 | qslang $FLAGS plot --count --substances caffeine
38 | qslang $FLAGS plot --days --substances caffeine
39 | 


--------------------------------------------------------------------------------
/qslang/preprocess.py:
--------------------------------------------------------------------------------
 1 | from qslang.event import Event
 2 | 
 3 | _alcohol_conc_assumptions = {
 4 |     "gin": 0.4,
 5 |     "vodka": 0.4,
 6 |     "whiskey": 0.4,
 7 |     "beer": 0.05,
 8 |     "wine": 0.12,
 9 | }
10 | 
11 | 
12 | def _alcohol_preprocess(events: list[Event]) -> list[Event]:
13 |     for e in events:
14 |         if not e.substance or not e.dose:
15 |             continue
16 |         if "Alcohol" in e.tags:
17 |             conc_str = e.data.get("concentration", None)
18 |             if conc_str:
19 |                 if "?" not in conc_str:
20 |                     conc = 0.01 * float(conc_str.strip("%"))
21 |             elif e.substance.lower() in _alcohol_conc_assumptions:
22 |                 conc = _alcohol_conc_assumptions[e.substance.lower()]
23 |             else:
24 |                 print(f"Concentration unknown for event: {e}")
25 |                 continue
26 |             e.data["substance"] = "Alcohol"
27 |             e.data["amount"] = str(e.dose.quantity * conc)
28 |     return events
29 | 


--------------------------------------------------------------------------------
/data/test/testnote.enex:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <!DOCTYPE en-export SYSTEM "http://xml.evernote.com/pub/evernote-export2.dtd">
 3 | <en-export export-date="20120727T073610Z" application="Evernote" version="Evernote Mac 3.0.5 (209942)">
 4 | <note><title>Vim Tips</title><content><![CDATA[<?xml version="1.0" encoding="UTF-8" standalone="no"?>
 5 | <!DOCTYPE en-note SYSTEM "http://xml.evernote.com/pub/enml2.dtd">
 6 | <en-note style="word-wrap: break-word; -webkit-nbsp-mode: space; -webkit-line-break: after-white-space;">
 7 | yank for copy, delete for cut, put for parse
 8 | <div><br/></div>
 9 | <div>Move in context, not position</div>
10 | <div>/ search forward</div>
11 | <div>? search backward</div>
12 | <div>n repeat last search</div>
13 | <div>N repeat last search but in the opposite direction</div>
14 | <div>tx move to 'x'</div>
15 | <div>fx find 'x'</div>
16 | </en-note>
17 | ]]></content><created>20101229T161500Z</created><updated>20101231T161039Z</updated><note-attributes/></note>
18 | </en-export>
19 | 


--------------------------------------------------------------------------------
/config.example.toml:
--------------------------------------------------------------------------------
 1 | # Categories are used to group similar substances together.
 2 | [categories]
 3 | stimulants = ["Coffee", "Caffeine", "Modafinil"]
 4 | sleepaids = ["Melatonin", "5-HTP", "Magnesium", "L-Theanine"]  # Gentle sleep aids
 5 | alcohol = ["Beer", "Wine", "Drink", "Vodka", "Gin", "Whiskey", "Cider"]
 6 | supplements = ["Creatine monohydrate", "Fish Oil", "Magnesium bisglycinate", "Zinc"]
 7 | 
 8 | # Aliases can be used to substitute synonyms,
 9 | # like shorthands/mispellings into their proper name,
10 | # or similar things as the same thing.
11 | [aliases]
12 | "Creatine monohydrate" = ["Creatine"]
13 | 
14 | # Durations are the used to get an approximate duration of the effects of a substance.
15 | # Here in hours.
16 | [durations]
17 | "caffeine" = 4
18 | "modafinil" = 12
19 | 
20 | # This is where you specify the location of logs to load.
21 | [data]
22 | standardnotes = "data/test/notes"   # folder-based using snfs or standardnotes plaintext backup folder
23 | #standardnotes_export = ""          # unencrypted standardnotes export txt (actually JSON) file
24 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | SRCFILES=qslang/*.py tests/*.py
 2 | 
 3 | run:
 4 | 	python3 main.py
 5 | 
 6 | test:
 7 | 	poetry run pytest --cov=qslang
 8 | 
 9 | test-integration:
10 | 	poetry run ./test-integration.sh
11 | 
12 | data/generated/effectspan-caffeine.csv:
13 | 	poetry run python3 -m qslang effectspan --substances caffeine > $@
14 | 
15 | data/generated/effectspan-cannabis.csv:
16 | 	# TODO: the 'cannabis oil' part doesn't work
17 | 	poetry run python3 -m qslang effectspan --substances 'weed,hash,cannabis oil' --normalize 'weed' > $@
18 | 
19 | typecheck:
20 | 	poetry run mypy --ignore-missing-import --check-untyped-defs ${SRCFILES}
21 | 
22 | format:
23 | 	poetry run black qslang tests
24 | 
25 | pyupgrade:
26 | 	poetry run pyupgrade --py310-plus ${SRCFILES}
27 | 
28 | no_implicit_optional:
29 | 	poetry run no_implicit_optional --use-union-or .
30 | 
31 | precommit:
32 | 	make format
33 | 	make pyupgrade
34 | 	make typecheck
35 | 
36 | data/private/Evernote:
37 | 	cd thirdparty/evernote-dump/source/ && \
38 | 		python run_script.py ../../../data/private/Evernote.enex && \
39 | 		mv Evernote/ ../../../data/private
40 | 
41 | 


--------------------------------------------------------------------------------
/tests/test_dose.py:
--------------------------------------------------------------------------------
 1 | from qslang.dose import Dose
 2 | 
 3 | 
 4 | def test_dose_eq():
 5 |     assert Dose("", "100ml") == Dose("", "0.1l")
 6 | 
 7 | 
 8 | def test_add_dose():
 9 |     assert Dose("caffeine", "100mg") + Dose("caffeine", "100mg")
10 | 
11 |     assert Dose("", "0g") + Dose("", "1g") == Dose("", "1.0g")
12 |     assert Dose("", "1mg") + Dose("", "10mg") == Dose("", "11.0mg")
13 |     assert Dose("", "500mcg") + Dose("", "1mg") == Dose("", "1.5mg")
14 |     assert Dose("", "100mcg") + Dose("", "100ug") == Dose("", "200.0ug")
15 |     assert Dose("", "100mcg") + Dose("", "100μg") == Dose("", "200.0ug")
16 | 
17 |     assert Dose("", "1ml") + Dose("", "2ml") == Dose("", "3.0ml")
18 |     assert Dose("", "1dl") + Dose("", "4dl") == Dose("", "500.0ml")
19 |     assert Dose("", "1.0dl") + Dose("", "0l") == Dose("", "100.0ml")
20 | 
21 |     assert Dose("", "33cl") + Dose("", "1l") == Dose("", "1.33l")
22 | 
23 | 
24 | def test_dose_format():
25 |     d = Dose("Caffeine", "0.1g")
26 |     assert str(d) == "100mg Caffeine"
27 | 
28 |     d = Dose("Potent stuff", "100mcg")
29 |     assert str(d) == "100mcg Potent stuff"
30 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [tool.poetry]
 2 | name = "qslang"
 3 | version = "0.1.0"
 4 | description = ""
 5 | authors = ["Erik Bjäreholt <erik@bjareho.lt>"]
 6 | license = "MPL-2.0"
 7 | packages = [
 8 |     { include = "qslang" }
 9 | ]
10 | 
11 | [tool.poetry.scripts]
12 | qslang = "qslang.main:main"
13 | 
14 | [tool.poetry.dependencies]
15 | python = "^3.10"
16 | aw-core = "*"
17 | 
18 | numpy = "*"
19 | matplotlib = "*"
20 | pandas = "*"
21 | calplot = "^0.1"  # fork of calmap
22 | 
23 | click = "*"
24 | pint = "*"
25 | toml = "*"
26 | 
27 | regex = "^2022.6.2"
28 | parsimonious = "^0.9.0"
29 | 
30 | [tool.poetry.group.dev.dependencies]
31 | pytest = "*"
32 | pytest-cov = "*"
33 | pytest-ordering = "*"
34 | mypy = "*"
35 | flake8 = "*"
36 | black = "*"
37 | types-toml = "^0.10.8"
38 | pyupgrade = "^3.3.1"
39 | 
40 | [tool.pytest.ini_options]
41 | minversion = "6.0"
42 | addopts = "--cov=qslang --cov-report=xml --cov-report=html --cov-report=term" # --profile --cov-report=term
43 | testpaths = [
44 |     "qslang",
45 |     "tests",
46 | ]
47 | python_files = ["*.py",]
48 | filterwarnings = ["ignore::DeprecationWarning",]
49 | 
50 | [build-system]
51 | requires = ["poetry-core"]
52 | build-backend = "poetry.core.masonry.api"
53 | 


--------------------------------------------------------------------------------
/qslang/config.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | from pathlib import Path
 3 | 
 4 | import toml
 5 | 
 6 | logger = logging.getLogger(__name__)
 7 | 
 8 | rootdir = Path(__file__).resolve().parent.parent
 9 | homedir = Path.home()
10 | configdir = homedir / ".config" / "qslang"
11 | 
12 | _testing = False
13 | _config = None
14 | 
15 | 
16 | def set_global_testing():
17 |     logger.info("Setting global testing flag")
18 |     global _testing
19 |     _testing = True
20 | 
21 | 
22 | def load_config(testing=False):
23 |     global _testing
24 |     global _config
25 | 
26 |     testing = testing or _testing
27 |     if _config:
28 |         return _config
29 | 
30 |     filepath = None
31 |     for path in (configdir, rootdir):
32 |         path = path / "config.toml"
33 |         if path.exists():
34 |             filepath = path
35 | 
36 |     if not filepath or testing:
37 |         if not filepath:
38 |             logger.warning("No config found, falling back to example config")
39 |         if testing:
40 |             logger.info("Using example config for testing")
41 |         filepath = rootdir / "config.example.toml"
42 | 
43 |     logger.info(f"Using config file at {filepath}")
44 |     with open(filepath) as f:
45 |         config = toml.load(f)
46 |     _config = config
47 |     return config
48 | 
49 | 
50 | if __name__ == "__main__":
51 |     print(rootdir)
52 |     print(load_config())
53 | 


--------------------------------------------------------------------------------
/.github/workflows/build.yml:
--------------------------------------------------------------------------------
 1 | name: Build
 2 | 
 3 | on:
 4 |   push:
 5 |     branches: [ master ]
 6 |   pull_request:
 7 |     branches: [ master ]
 8 | 
 9 | jobs:
10 |   tests:
11 |     runs-on: ubuntu-latest
12 |     strategy:
13 |       matrix:
14 |         python-version: ['3.10']
15 | 
16 |     steps:
17 |     - uses: actions/checkout@v2
18 |       with:
19 |         submodules: 'recursive'
20 | 
21 |     - name: Set up Python
22 |       uses: actions/setup-python@v1
23 |       with:
24 |         python-version: ${{ matrix.python-version }}
25 | 
26 |     - name: Install
27 |       run: |
28 |         pip install poetry
29 |         poetry install
30 | 
31 |     - name: Run unit tests
32 |       run: |
33 |         make test
34 |         bash <(curl -s https://codecov.io/bash)
35 | 
36 |     - name: Run integration tests
37 |       run: |
38 |         make test-integration
39 | 
40 |   typecheck:
41 |     runs-on: ubuntu-latest
42 |     strategy:
43 |       matrix:
44 |         python-version: ['3.10']
45 | 
46 |     steps:
47 |     - uses: actions/checkout@v2
48 |       with:
49 |         submodules: 'recursive'
50 | 
51 |     - name: Set up Python
52 |       uses: actions/setup-python@v1
53 |       with:
54 |         python-version: ${{ matrix.python-version }}
55 | 
56 |     - name: Install
57 |       run: |
58 |         pip install poetry
59 |         poetry install
60 | 
61 |     - name: Typecheck
62 |       run: |
63 |         make typecheck
64 | 


--------------------------------------------------------------------------------
/qslang/filter.py:
--------------------------------------------------------------------------------
 1 | from datetime import datetime
 2 | 
 3 | from qslang import Event
 4 | 
 5 | 
 6 | def filter_events_by_args(events: list[Event], args: list[str]) -> list[Event]:
 7 |     if not args:
 8 |         raise ValueError("Missing argument")
 9 | 
10 |     matches = []
11 |     for e in events:
12 |         for arg in args:
13 |             if (e.substance and e.substance.lower() == arg.lower()) or (
14 |                 arg[0] == "#"
15 |                 and arg.strip("#").lower() in set(map(lambda t: t.lower(), e.tags))
16 |             ):
17 |                 matches.append(e)
18 |                 break
19 |     return matches
20 | 
21 | 
22 | def filter_events(events, start=None, end=None, substances=[]):
23 |     if start:
24 |         events = [e for e in events if e.timestamp >= start]
25 |     if end:
26 |         events = [e for e in events if e.timestamp <= end]
27 |     if substances:
28 |         events = filter_events_by_args(events, substances)
29 |     return events
30 | 
31 | 
32 | def test_filter_events_by_args() -> None:
33 |     events = [
34 |         Event(datetime.now(), "dose", {"substance": "test"}),
35 |         Event(datetime.now(), "dose", {"substance": "test2"}),
36 |     ]
37 |     res = filter_events_by_args(events, ["test"])
38 |     assert len(res) == 1
39 | 
40 | 
41 | def test_filter_subst_with_space() -> None:
42 |     events = [
43 |         Event(datetime.now(), "dose", {"substance": "cannabis oil"}),
44 |     ]
45 |     res = filter_events_by_args(events, ["cannabis oil"])
46 |     assert len(res) == 1
47 | 


--------------------------------------------------------------------------------
/qslang/util.py:
--------------------------------------------------------------------------------
 1 | #!/bin/env python3
 2 | 
 3 | import logging
 4 | import itertools
 5 | import calendar
 6 | from collections import defaultdict
 7 | 
 8 | 
 9 | log = logging.getLogger(__name__)
10 | 
11 | 
12 | class MsgCounterHandler(logging.Handler):
13 |     """https://stackoverflow.com/a/31142078/965332"""
14 | 
15 |     level2count: dict[str, int]
16 | 
17 |     def __init__(self, *args, **kwargs) -> None:
18 |         super().__init__(*args, **kwargs)
19 |         self.level2count = defaultdict(int)
20 | 
21 |     def emit(self, record) -> None:
22 |         self.level2count[record.levelname] += 1
23 | 
24 | 
25 | def days_in_month(year, month):
26 |     return max(list(calendar.Calendar().itermonthdays(year, month)))
27 | 
28 | 
29 | def monthrange(
30 |     min_date: tuple[int, int], max_date: tuple[int, int]
31 | ) -> list[tuple[int, int]]:
32 |     (min_year, min_month) = min_date
33 |     (max_year, max_month) = max_date
34 |     g = list(itertools.product(range(min_year, max_year + 1), range(1, 13)))
35 |     g = list(itertools.dropwhile(lambda t: t < (min_year, min_month), g))
36 |     return list(itertools.takewhile(lambda t: t <= (max_year, max_month), g))
37 | 
38 | 
39 | def dayrange(
40 |     min_date: tuple[int, int, int], max_date: tuple[int, int, int]
41 | ) -> list[tuple[int, int, int]]:
42 |     months = monthrange(min_date[:2], max_date[:2])
43 |     return [
44 |         (y, m, d)
45 |         for y, m in months
46 |         for d in range(1, days_in_month(y, m) + 1)
47 |         if min_date <= (y, m, d) <= max_date
48 |     ]
49 | 


--------------------------------------------------------------------------------
/qslang/igroupby.py:
--------------------------------------------------------------------------------
 1 | from typing import TypeVar, overload, no_type_check
 2 | from collections.abc import Callable
 3 | from itertools import groupby
 4 | 
 5 | T = TypeVar("T")
 6 | K = TypeVar("K")
 7 | 
 8 | 
 9 | # Overload necessary here, see issue: https://github.com/python/mypy/issues/5464
10 | @overload
11 | def igroupby(l: list[T]) -> dict[K, list[K]]:
12 |     ...
13 | 
14 | 
15 | @overload  # noqa: F811
16 | def igroupby(l: list[T], key: Callable[[T], K]) -> dict[K, list[T]]:
17 |     ...
18 | 
19 | 
20 | def igroupby(l, key=lambda x: x):  # noqa: F811
21 |     return {k: list(v) for k, v in groupby(sorted(l, key=key), key=key)}
22 | 
23 | 
24 | def test_igroupby() -> None:
25 |     d0: dict[int, list[int]] = igroupby([1, 1, 2])
26 |     assert len(d0[1]) == 2
27 |     assert len(d0[2]) == 1
28 | 
29 |     d1: dict[int, list[float]] = igroupby([1.2, 1.5, 2], key=lambda x: round(x))
30 |     assert len(d1[1]) == 1
31 |     assert len(d1[2]) == 2
32 | 
33 |     d2: dict[int, list[int]] = igroupby([1, 1, 2], key=lambda x: round(x))
34 |     assert len(d2[1]) == 2
35 |     assert len(d2[2]) == 1
36 | 
37 |     t0: dict[int, list[int]] = igroupby([1, 2])  # noqa: F841
38 |     t1: dict[str, list[int]] = igroupby([1, 2], key=lambda x: str(x))  # noqa: F841
39 | 
40 | 
41 | @no_type_check
42 | def invalid_used_types() -> None:
43 |     # Just tests for making sure the overload works correctly,
44 |     # comment the function decorator to get the errors.
45 |     t0: dict[str, list[int]] = igroupby([1, 2])  # noqa: F841
46 |     t1: dict[int, list[int]] = igroupby([1, 2], key=lambda x: str(x))  # noqa: F841
47 | 


--------------------------------------------------------------------------------
/qslang/avg_times.py:
--------------------------------------------------------------------------------
 1 | # From: https://stackoverflow.com/a/44463260/965332
 2 | 
 3 | import math
 4 | from cmath import phase, rect
 5 | from datetime import datetime, time
 6 | from math import degrees, radians
 7 | 
 8 | import numpy
 9 | 
10 | 
11 | def time_to_radians(time_of_day: time) -> float:
12 |     # radians are calculated using a 24-hour circle, not 12-hour, starting at north and moving clockwise
13 |     seconds_from_midnight = (
14 |         3600 * time_of_day.hour + 60 * time_of_day.minute + time_of_day.second
15 |     )
16 |     radians = float(seconds_from_midnight) / float(12 * 60 * 60) * 2.0 * math.pi
17 |     return radians
18 | 
19 | 
20 | def average_angle(angles: list[float]) -> float:
21 |     # angles measured in radians
22 |     x_sum = numpy.sum([math.sin(x) for x in angles])
23 |     y_sum = numpy.sum([math.cos(x) for x in angles])
24 |     x_mean = x_sum / float(len(angles))
25 |     y_mean = y_sum / float(len(angles))
26 |     return numpy.arctan2(x_mean, y_mean)
27 | 
28 | 
29 | def radians_to_time_of_day(x: float) -> time:
30 |     # radians are measured clockwise from north and represent time in a 24-hour circle
31 |     seconds_from_midnight = int(float(x) / (2.0 * math.pi) * 12.0 * 60.0 * 60.0)
32 |     hour = seconds_from_midnight // 3600 % 24
33 |     minute = (seconds_from_midnight % 3600) // 60
34 |     second = seconds_from_midnight % 60
35 |     return time(hour, minute, second)
36 | 
37 | 
38 | # Based on: https://rosettacode.org/wiki/Averages/Mean_time_of_day#Python
39 | 
40 | 
41 | def mean_angle(deg):
42 |     return degrees(phase(sum(rect(1, radians(d)) for d in deg) / len(deg)))
43 | 
44 | 
45 | def mean_time(times: list[time | datetime]) -> time:
46 |     seconds = (
47 |         (float(t.second) + int(t.minute) * 60 + int(t.hour) * 3600) for t in times
48 |     )
49 |     day = 24 * 60 * 60
50 |     to_angles = [s * 360.0 / day for s in seconds]
51 |     mean_as_angle = mean_angle(to_angles)
52 |     mean_seconds = mean_as_angle * day / 360.0
53 |     if mean_seconds < 0:
54 |         mean_seconds += day
55 |     h, m = divmod(mean_seconds, 3600)
56 |     m, s = divmod(m, 60)
57 |     if h == 24:
58 |         h = 0
59 |     return time(int(h), int(m), int(s))
60 | 
61 | 
62 | def test_mean_time():
63 |     t = mean_time([datetime(2017, 6, 9, 0, 10), datetime(2017, 6, 9, 0, 20)])
64 |     assert time(0, 14, 59) <= t <= time(0, 15)
65 | 
66 |     t = mean_time([datetime(2017, 6, 9, 23, 50), datetime(2017, 6, 9, 0, 10)])
67 |     assert t == time(0, 0)
68 | 
69 |     t = mean_time([time(23, 0, 17), time(23, 40, 20), time(0, 12, 45), time(0, 17, 19)])
70 |     assert t == time(23, 47, 43)
71 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | QSlang
 2 | ======
 3 | 
 4 | [![Build](https://github.com/ErikBjare/QSlang/actions/workflows/build.yml/badge.svg)](https://github.com/ErikBjare/QSlang/actions/workflows/build.yml)
 5 | [![codecov](https://codecov.io/gh/ErikBjare/qslang/branch/master/graph/badge.svg)](https://codecov.io/gh/ErikBjare/qslang)
 6 | [![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black)
 7 | [![Typechecking: Mypy](http://www.mypy-lang.org/static/mypy_badge.svg)](http://mypy-lang.org/)
 8 | 
 9 | A tool to parse and analyze drug logs, for science. 
10 | 
11 | Uses a domain-specific language for manual entry of drug doses and accompanying journal/plaintext entries in a flexible textual format, which can then be used to analyze use of drugs/supplements/pharmaceuticals. Input on phones/touch devices is surprisingly efficient when used with sentence-predicting keyboards (like Swiftkey etc).
12 | 
13 | Built with [parsimonious](https://github.com/erikrose/parsimonious) (to parse notes) and [pint](https://github.com/hgrecco/pint) (to handle units).
14 | 
15 | Pronounced: Q-Slang
16 | 
17 | 
18 | Installation
19 | ============
20 | 
21 | To install, simply run:
22 | 
23 | ```sh
24 | pip install git+https://github.com/ErikBjare/QSlang.git
25 | ```
26 | 
27 | You should now have a `qslang` command available, or if you don't have your PATH configured, you can run it with `python3 -m qslang`.
28 | 
29 | Usage
30 | =====
31 | 
32 | ```
33 | $ qslang --help
34 | Usage: qslang [OPTIONS] COMMAND [ARGS]...
35 | 
36 |   QSlang is a tool to parse and analyze dose logs, for science.
37 | 
38 | Options:
39 |   -v, --verbose
40 |   --testing      run with testing config & data
41 |   --help         Show this message and exit.
42 | 
43 | Commands:
44 |   effectspan       print effect spans
45 |   events           print list of all doses
46 |   plot             plot doses over time in a barchart
47 |   plot-calendar    plot doses in a calendar
48 |   plot-effectspan  plot effect spans in a barchart
49 |   plot-influence   plot percent of time spent under effects of a substance
50 |   substances       print list of substances
51 |   summary          print summary of doses for each substance
52 | ```
53 | 
54 | For setup & configuration, copy `config.example.toml` to `config.toml` and edit as appropriate.
55 | 
56 | QSlang can read data from:
57 | 
58 |  - Directory with plaintext-files (as created by [standardnotes-fs](https://github.com/tannercollin/standardnotes-fs))
59 |     - How to: Put your notes in a folder, or use standardnotes-fs (deprecated) to mount your notes the same directory. Set the `data.standardnotes_export` key to the file path in config.
60 |  - Standard Notes export (unencrypted)
61 |     - How to: create an unencrypted export and unzip the `SN Archive.txt` file (keep its default name).  Set the `data.standardnotes` key to the folder path in config.
62 |  - Evernote (enex files)
63 |     - How to: export the notebooks you want to analyze as `.enex` file. Then put all the exported notebooks you want into `./data/private`. Then run `make data/private/Evernote` to extract the .enex into markdown files (which will be put into `data/private/Evernote/`).
64 | 
65 | Then run `qslang --help` to get further usage instructions.
66 | 
67 | Input format
68 | ============
69 | 
70 | This is the expected format of notes, I've tried to make it lenient/flexible parser but might write a stricter one in the future to avoid ambiguous parsing.
71 | 
72 | Basic example:
73 | 
74 | ```
75 | # 2018-04-14
76 | 
77 | 07:01 - Woke up
78 | 
79 | 07:32 - 2000IU Vitamin D3 + 5g Creatine monohydrate + 200mg Magnesium (from citrate)
80 | 
81 | 08:10 - ~2dl Green tea + 10g Cocoa
82 | 
83 | 12:54 - ~2dl Green tea
84 | 
85 | 16:30 - Started working on QSlang
86 | ```
87 | 


--------------------------------------------------------------------------------
/qslang/dose.py:
--------------------------------------------------------------------------------
  1 | #!/bin/env python3
  2 | 
  3 | import logging
  4 | from typing import Any
  5 | 
  6 | import pint
  7 | 
  8 | log = logging.getLogger(__name__)
  9 | 
 10 | 
 11 | ureg = pint.UnitRegistry(
 12 |     preprocessors=[
 13 |         lambda s: s.replace("%", "percent"),
 14 |         lambda s: s.replace("%%", "permille"),
 15 |     ]
 16 | )
 17 | 
 18 | ureg.define("micro- = 10**-6 = mc- = μ-")
 19 | ureg.define("percent = 0.01 = %")
 20 | ureg.define("permille = 0.001 = %%")
 21 | 
 22 | 
 23 | ureg.define("cup = 2*dl")
 24 | 
 25 | # NOTE: Not sure if this is correct? But gets rid of the warnings...
 26 | ureg.define("x = count")
 27 | ureg.define("IU = x")  # for now
 28 | ureg.define("CFU = x")  # for now
 29 | ureg.define("unknown = x")  # for now
 30 | ureg.define("serving = x")  # for now
 31 | ureg.define("puff = x")  # for now
 32 | ureg.define("puffs = x")  # for now
 33 | ureg.define("hit = x")  # for now
 34 | ureg.define("hits = x")  # for now
 35 | 
 36 | ureg.define("B = 10**9 * x")  # for noting billions of CFU, for example
 37 | 
 38 | # The type here is because mypy doesn't like this dynamically created type
 39 | Q_: Any = ureg.Quantity
 40 | 
 41 | 
 42 | class Dose:
 43 |     def __init__(self, substance: str, amount: str | Q_) -> None:
 44 |         self.substance: str = substance
 45 |         if not isinstance(amount, ureg.Quantity):
 46 |             self.quantity = Q_(amount)
 47 |         else:
 48 |             self.quantity = amount
 49 | 
 50 |     def __str__(self) -> str:
 51 |         return f"{self.amount_with_unit} {self.substance}"
 52 | 
 53 |     @property
 54 |     def amount(self) -> float:
 55 |         # return the amount as a float, in the base unit (kg for mass, L for volumes)
 56 |         return self.quantity.to_base_units().magnitude
 57 | 
 58 |     @property
 59 |     def amount_with_unit(self) -> str:
 60 |         if not self.quantity.units:
 61 |             return str(round(self.quantity))
 62 |         q = self.quantity.to_compact()
 63 |         # print(q)
 64 |         amount = q.magnitude
 65 |         amount = round(amount) if round(amount, 8) % 1.0 == 0 else amount
 66 |         return f"{amount}{q.units:~P}"
 67 | 
 68 |     def __repr__(self):
 69 |         return f"<Dose {self}>"
 70 | 
 71 |     def __add__(self, other: "Dose") -> "Dose":
 72 |         if self.quantity.units.dimensionality != other.quantity.units.dimensionality:
 73 |             # if quantity of either is 0, we skip it
 74 |             if self.quantity.magnitude == 0:
 75 |                 return other
 76 |             if other.quantity.magnitude == 0:
 77 |                 return self
 78 |             raise ValueError(
 79 |                 f"Cannot add doses with different units: {self.quantity.units} and {other.quantity.units} (for {self} and {other})"
 80 |             )
 81 |         assert self.substance.lower() == other.substance.lower()
 82 |         return Dose(self.substance, self.quantity + other.quantity)
 83 | 
 84 |     def __truediv__(self, b):
 85 |         return Dose(self.substance, self.quantity / b)
 86 | 
 87 |     def __lt__(self, other):
 88 |         return self.quantity < other.quantity
 89 | 
 90 |     def __eq__(self, other):
 91 |         return (
 92 |             self.substance == other.substance
 93 |             and round((self.quantity - other.quantity).magnitude, 12) == 0
 94 |         )
 95 | 
 96 | 
 97 | def test_amount_with_unit():
 98 |     d = Dose("L", "100 mcg")
 99 |     assert d.amount_with_unit == "100mcg"
100 | 
101 | 
102 | def test_amount_unitless():
103 |     d = Dose("Candy", "10x")
104 |     assert d.amount_with_unit == "10x"
105 | 
106 | 
107 | def test_amount_iu():
108 |     d = Dose("Vitamin D", "5000 IU")
109 |     assert d.amount_with_unit == "5kIU"
110 | 
111 | 
112 | def test_amount_cfu():
113 |     d = Dose("CFU", "7B")
114 |     assert d.amount_with_unit == "7B"
115 | 


--------------------------------------------------------------------------------
/qslang/event.py:
--------------------------------------------------------------------------------
  1 | #!/bin/env python3
  2 | 
  3 | import logging
  4 | import json
  5 | from copy import copy
  6 | from typing import Any, Literal
  7 | from collections.abc import Hashable
  8 | from datetime import datetime
  9 | 
 10 | from dataclasses import dataclass, field
 11 | 
 12 | from .dose import Dose
 13 | 
 14 | 
 15 | log = logging.getLogger(__name__)
 16 | 
 17 | 
 18 | def _freeze(obj: Any) -> Any:
 19 |     if isinstance(obj, Hashable):
 20 |         return obj
 21 |     elif isinstance(obj, datetime):
 22 |         return obj.isoformat()
 23 |     elif isinstance(obj, list):
 24 |         return tuple(_freeze(x) for x in obj)
 25 |     elif isinstance(obj, dict):
 26 |         return tuple((k, _freeze(v)) for k, v in obj.items())
 27 |     else:
 28 |         raise ValueError("Cannot freeze object of type %s" % type(obj))
 29 | 
 30 | 
 31 | @dataclass(order=True)
 32 | class Event:
 33 |     timestamp: datetime
 34 |     type: Literal["dose"] | Literal["journal"]
 35 |     data: dict = field(compare=False)
 36 | 
 37 |     def __hash__(self):
 38 |         return hash((self.timestamp, self.type, _freeze(self.data)))
 39 | 
 40 |     @property
 41 |     def tags(self) -> list[str]:
 42 |         return self.data["tags"] if "tags" in self.data else []
 43 | 
 44 |     @property
 45 |     def substance(self) -> str | None:
 46 |         return self.data["substance"] if "substance" in self.data else None
 47 | 
 48 |     @property
 49 |     def dose(self) -> Dose | None:
 50 |         if self.type == "dose":
 51 |             try:
 52 |                 assert self.substance
 53 |                 # NOTE: Amount could be None, if specified as unknown ("?") in entry
 54 |                 return Dose(self.substance, self.amount or 0)
 55 |             except Exception as e:
 56 |                 print(self.data)
 57 |                 log.warning(f"Unable to build Dose object: {e}")
 58 |                 return None
 59 |         else:
 60 |             return None
 61 | 
 62 |     @property
 63 |     def amount(self) -> float | None:
 64 |         """Returns the amount with unit, or None"""
 65 |         try:
 66 |             assert "dose" in self.data
 67 |             assert "amount" in self.data["dose"]
 68 |             amount = self.data["dose"]["amount"]
 69 |             assert amount != "unknown"
 70 |             return str(amount) + self.data["dose"]["unit"]
 71 |         except AssertionError:
 72 |             return None
 73 | 
 74 |     @property
 75 |     def roa(self) -> str:
 76 |         try:
 77 |             assert "dose" in self.data
 78 |             assert "roa" in self.data["dose"]
 79 |             return self.data["dose"]["roa"]
 80 |         except AssertionError:
 81 |             return "unknown"
 82 | 
 83 |     def prettyprint(self, show_misc=False) -> None:
 84 |         if self.type == "dose" and "amount" in self.data and "substance" in self.data:
 85 |             d = self.data
 86 |             misc = copy(self.data)
 87 |             misc.pop("amount")
 88 |             misc.pop("substance")
 89 |             if self.dose:
 90 |                 base_str = str(self.dose)
 91 |             else:
 92 |                 base_str = f"{d['amount'] if 'amount' in d else '?'} {d['substance']}"
 93 |             misc_str = f"  -  {misc}" if show_misc else ""
 94 |             e_str = base_str + misc_str
 95 |         else:
 96 |             e_str = str(self.data)
 97 |         print(f"{self.timestamp.isoformat()} | {self.type.ljust(7)} | " + e_str)
 98 | 
 99 |     @property
100 |     def json_dict(self) -> dict[str, Any]:
101 |         return {"timestamp": self.timestamp, "type": self.type, "data": self.data}
102 | 
103 |     @property
104 |     def json_str(self) -> str:
105 |         return json.dumps(self.json_dict)
106 | 
107 | 
108 | def print_events(events: list[Event]) -> None:
109 |     last_date: datetime | None = None
110 |     for e in events:
111 |         if last_date and last_date.date() != e.timestamp.date():
112 |             print(
113 |                 f"{str(last_date.date()).ljust(8)} =========|=========|====== New day ====="
114 |             )
115 |         e.prettyprint()
116 |         last_date = e.timestamp
117 | 


--------------------------------------------------------------------------------
/qslang/pharmacokinetics.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | from collections import defaultdict
  3 | from datetime import datetime, timedelta, timezone
  4 | 
  5 | import matplotlib.pyplot as plt
  6 | import numpy as np
  7 | import pandas as pd
  8 | from aw_core import Event
  9 | 
 10 | from . import Dose
 11 | from .config import load_config
 12 | 
 13 | logger = logging.getLogger(__name__)
 14 | 
 15 | 
 16 | def compute_plasma(doses: list[tuple[datetime, Dose]]):
 17 |     # https://pharmacy.ufl.edu/files/2013/01/5127-28-equations.pdf
 18 |     absorbtion_halflife = timedelta(minutes=30)
 19 |     halflife = timedelta(minutes=4 * 60)
 20 |     idx = pd.date_range(
 21 |         start=doses[0][0],
 22 |         end=doses[-1][0] + timedelta(hours=12),
 23 |         freq=timedelta(minutes=30),
 24 |     )
 25 |     df = pd.DataFrame(index=idx)
 26 |     df["administered"] = 0.0
 27 | 
 28 |     for dt, dose in doses:
 29 |         print(dose.quantity)
 30 |         df.at[dt, "administered"] = dose.quantity.magnitude
 31 | 
 32 |     df["C"] = 0.0
 33 |     df["unabsorbed"] = df["administered"]
 34 |     for i in df.index[1:]:
 35 |         stepsize = i.freq / pd.Timedelta(halflife)
 36 |         stepsize_abs = i.freq / pd.Timedelta(absorbtion_halflife)
 37 |         df.loc[i, "unabsorbed"] += df.loc[i - i.freq, "unabsorbed"] * np.exp2(
 38 |             -1 * stepsize_abs
 39 |         )
 40 |         df.loc[i, "C"] += df.loc[i - i.freq, "C"] * np.exp2(-stepsize) + df.loc[
 41 |             i - i.freq, "unabsorbed"
 42 |         ] * (1 - np.exp2(-stepsize_abs))
 43 | 
 44 |     df["C"].plot(label="plasma")
 45 |     df["unabsorbed"].plot(label="unabsorbed")
 46 |     plt.legend()
 47 |     plt.show()
 48 | 
 49 |     print(df)
 50 | 
 51 | 
 52 | def effectspan_substance(doses: list[tuple[datetime, Dose]]) -> list[Event]:
 53 |     """
 54 |     Given a list of doses for a particular substance, return a list of events
 55 |     spanning the time during which the substance was active (according to durations specified in a dictionary).
 56 |     """
 57 |     subst = doses[0][1].substance.lower()
 58 |     subst_durations = {
 59 |         k: timedelta(hours=v) for k, v in load_config().get("durations", {}).items()
 60 |     }
 61 | 
 62 |     # TODO: Incorporate time-until-effect into the calculation
 63 |     # assert all doses of same substance
 64 |     assert all(dose.substance.lower() == subst for (_, dose) in doses)
 65 | 
 66 |     # assert we have duration data for the substance
 67 |     if subst not in subst_durations:
 68 |         raise ValueError(f"Unknown effect duration for substance: {subst}")
 69 | 
 70 |     # sort
 71 |     doses = sorted(doses, key=lambda x: x[0])
 72 | 
 73 |     # compute effectspan for each dose, merge overlaps
 74 |     events: list[Event] = []
 75 |     for dt, dose in doses:
 76 |         end = dt + subst_durations[subst]
 77 | 
 78 |         # checks if last event overlaps with dose, if so, extend it
 79 |         if len(events) > 0:
 80 |             last_event = events[-1]
 81 |             # if last event ends before dose starts
 82 |             if (last_event.timestamp + last_event.duration) > dt:
 83 |                 # events overlap
 84 |                 last_event.duration = end - last_event.timestamp
 85 |                 last_event.data["doses"].append(dose)
 86 |                 continue
 87 | 
 88 |         e = Event(
 89 |             timestamp=dt,
 90 |             duration=subst_durations[subst],
 91 |             data={"substance": subst, "doses": [dose]},
 92 |         )
 93 |         events.append(e)
 94 | 
 95 |     return events
 96 | 
 97 | 
 98 | def effectspan(doses: list[tuple[datetime, Dose]]) -> list[Event]:
 99 |     """
100 |     Given a list of doses, computes all spans of time during which the substance is active.
101 |     """
102 |     doses = sorted(doses, key=lambda x: x[0])
103 | 
104 |     # Group by substance
105 |     groups = defaultdict(list)
106 |     for dt, dose in doses:
107 |         groups[dose.substance].append((dt, dose))
108 | 
109 |     # Compute effectspan for each substance
110 |     events = []
111 |     for substance, doses in groups.items():
112 |         try:
113 |             events.extend(effectspan_substance(doses))
114 |         except ValueError as e:
115 |             logger.warning(f"Failed to compute effectspan for {substance}: {e}")
116 | 
117 |     return events
118 | 
119 | 
120 | def example():
121 |     doses = [
122 |         (datetime(2018, 9, 10, 8, tzinfo=timezone.utc), Dose("Caffeine", "50mg")),
123 |         (datetime(2018, 9, 10, 12, tzinfo=timezone.utc), Dose("Caffeine", "50mg")),
124 |     ]
125 |     compute_plasma(doses)
126 | 
127 | 
128 | def test_effectspan():
129 |     doses = [
130 |         (datetime(2018, 9, 10, 8, tzinfo=timezone.utc), Dose("Caffeine", "75mg")),
131 |         (datetime(2018, 9, 10, 12, tzinfo=timezone.utc), Dose("Caffeine", "50mg")),
132 |         (datetime(2018, 9, 10, 16, 20, tzinfo=timezone.utc), Dose("Cannabis", "0.03g")),
133 |         (datetime(2018, 9, 10, 19, tzinfo=timezone.utc), Dose("Cannabis", "0.05g")),
134 |     ]
135 |     events = effectspan(doses)
136 |     for e in events:
137 |         print(e)
138 | 
139 | 
140 | if __name__ == "__main__":
141 |     test_effectspan()
142 |     example()
143 | 


--------------------------------------------------------------------------------
/tests/test_parse.py:
--------------------------------------------------------------------------------
  1 | from datetime import datetime
  2 | 
  3 | import pytest
  4 | from qslang.parsimonious import (
  5 |     Event,
  6 |     ParseError,
  7 |     _parse_continue_on_err,
  8 |     parse,
  9 |     parse_entries,
 10 |     parse_to_node,
 11 | )
 12 | 
 13 | # Tests parsing with visitor
 14 | 
 15 | 
 16 | def test_parse_notes():
 17 |     parsed = parse("09:00 - One journal entry\n\n10:00 - Another journal entry")
 18 |     assert len(parsed) == 2
 19 |     assert parsed[0].type == "journal"
 20 |     assert parsed[0].data == {"note": "One journal entry"}
 21 |     assert parsed[1].type == "journal"
 22 |     assert parsed[1].data == {"note": "Another journal entry"}
 23 | 
 24 | 
 25 | def test_parse_multidose():
 26 |     s = "09:00 - 100mg Caffeine + 200mg L-Theanine"
 27 |     assert parse(s)
 28 | 
 29 | 
 30 | def test_parse_multivit():
 31 |     s = "09:00 - 1x Multivitamin (100mg Magnesium (from Citrate) + 10mg Zinc (from Picolinate))"
 32 |     assert parse(s)
 33 | 
 34 | 
 35 | def test_parse_nested():
 36 |     s = "09:30 - 1x Something (2x Something-Else (10mg Substance-A + 10mg Substance-B) + 10mg Substance-C) oral"
 37 |     assert parse(s)
 38 | 
 39 | 
 40 | def test_parse_simple():
 41 |     print("Simple example, no day header")
 42 |     s = """09:30 - 1 cup Coffee oral"""
 43 |     parsed = parse(s)
 44 |     assert len(parsed) == 1
 45 |     assert parsed[0].timestamp == datetime(1900, 1, 1, 9, 30)
 46 |     assert parsed[0].data["substance"] == "Coffee"
 47 |     assert parsed[0].data["dose"] == {"amount": 1, "unit": "cup", "roa": "oral"}
 48 | 
 49 | 
 50 | def test_parse_header():
 51 |     print("\nHeader example")
 52 |     s = """
 53 |     # 2020-01-01
 54 | 
 55 |     09:30 - 1 cup Coffee
 56 |     """
 57 |     parsed = parse(s)
 58 |     assert len(parsed) == 1
 59 |     assert parsed[0].timestamp == datetime(2020, 1, 1, 9, 30)
 60 |     assert parsed[0].data["substance"] == "Coffee"
 61 | 
 62 | 
 63 | def test_parse_subdoses():
 64 |     print("\nSubdoses example")
 65 |     s = """
 66 |     09:30 - 1 cup Coffee (100mg Caffeine + 50mg L-Theanine)
 67 |     """
 68 |     parsed = parse(s)
 69 |     assert len(parsed) == 1
 70 |     assert parsed[0].timestamp == datetime(1900, 1, 1, 9, 30)
 71 |     assert parsed[0].data["substance"] == "Coffee"
 72 |     assert parsed[0].data.get("subdoses", []) == [
 73 |         {"substance": "Caffeine", "dose": {"amount": 100, "unit": "mg"}},
 74 |         {"substance": "L-Theanine", "dose": {"amount": 50, "unit": "mg"}},
 75 |     ]
 76 | 
 77 | 
 78 | def test_parse_complex():
 79 |     print("\nComplex example")
 80 |     # Complex example
 81 |     s = """
 82 |     09:30 - 1 cup Coffee (strong, milk, ~100mg Caffeine + 50mg L-Theanine + 1mg LOL)
 83 |     """
 84 |     parsed = parse(s)
 85 |     assert len(parsed) == 1
 86 |     assert parsed[0].timestamp == datetime(1900, 1, 1, 9, 30)
 87 |     assert parsed[0].data["substance"] == "Coffee"
 88 |     assert parsed[0].data.get("notes", []) == [
 89 |         {"note": "strong"},
 90 |         {"note": "milk"},
 91 |     ]
 92 |     assert parsed[0].data.get("subdoses", []) == [
 93 |         {
 94 |             "substance": "Caffeine",
 95 |             "dose": {"amount": 100, "unit": "mg", "approx": True},
 96 |         },
 97 |         {"substance": "L-Theanine", "dose": {"amount": 50, "unit": "mg"}},
 98 |         {"substance": "LOL", "dose": {"amount": 1, "unit": "mg"}},
 99 |     ]
100 | 
101 | 
102 | def test_parse_event():
103 |     s = "# 2020-01-01\n09:30 - 1x Something (50mg Caffeine + 100mg L-Theanine)"
104 |     print(parse(s))
105 | 
106 | 
107 | def test_parse_alcohol():
108 |     s = "# 2020-01-01\n18:30 - 4cl Gin (Tanqueray, 47%)"
109 |     parsed = parse(s)
110 |     assert len(parsed) == 1
111 |     assert parsed[0].data["notes"] == [{"note": "Tanqueray"}, {"note": "47%"}]
112 | 
113 | 
114 | def test_parse_patient():
115 |     parsed = parse("# 2020-01-01\n09:30 - 100mcg LSD\n09:30 - {F} 100mcg LSD")
116 |     assert "patient" not in parsed[0].data
117 |     assert parsed[1].data["patient"] == "F"
118 | 
119 | 
120 | def test_parse_ratio():
121 |     s = """
122 |     19:00 - 1g Some kind of extract (10:1)
123 |     """
124 |     entries = parse(s)
125 |     assert len(entries) == 1
126 |     assert entries[0].data["substance"] == "Some kind of extract"
127 |     assert entries[0].data["notes"][0] == {"note": "10:1"}
128 | 
129 | 
130 | def test_parse_umlaut():
131 |     # Umlaut in substance name
132 |     s = """20:00 - 4cl Jägermeister"""
133 |     entries = parse(s)
134 |     assert len(entries) == 1
135 |     assert entries[0].data["substance"] == "Jägermeister"
136 | 
137 | 
138 | def test_parse_half_serving():
139 |     # Half serving
140 |     s = """20:00 - 1/2 serving Elevate Pre-workout Formula (5g Vitargo + 1.6g Beta-Alanine + 1.5g Citrulline Malate + 1.5g Arginine Alpha Ketoglutarate + 1.25g Trimethylglycine + 1g Taurine + 250mg Glucuronolactone + 200mg L-Tyrosine + 150mg Grape Seed Extract + 125mg Caffeine + 90mg ACTINOS + 12.5mg Vitamin B6 + 2.5mg Bioperine) + 5g Creatine"""
141 |     entries = parse(s)
142 |     assert len(entries) == 2
143 |     assert entries[0].data["substance"] == "Elevate Pre-workout Formula"
144 |     assert entries[0].data["dose"]["unit"] == "serving"
145 |     # assert entries[0].data["dose"]["amount"] == 0.5
146 | 
147 | 
148 | def test_parse_dayheader_title():
149 |     # Half serving
150 |     s = """# 2022-08-03 - Just some example title"""
151 |     parse(s)
152 | 
153 | 
154 | def test_parse_probiotic_cfu():
155 |     # Half serving
156 |     s = """10:00 - 1x Probiotic (30B CFU)"""
157 |     entries = parse(s)
158 |     assert len(entries) == 1
159 |     assert entries[0].data["substance"] == "Probiotic"
160 |     assert entries[0].data["dose"]["amount"] == 1
161 |     assert entries[0].data["dose"]["unit"] == "x"
162 |     assert entries[0].data["subdoses"] == [
163 |         {"substance": "CFU", "dose": {"amount": 30, "unit": "B"}}
164 |     ]
165 |     # assert entries[0].data["notes"] == [{"note": "30B CFU"}]
166 | 
167 | 
168 | # Parse to node tests
169 | 
170 | 
171 | @pytest.mark.run(order=0)
172 | def test_parse_node_dayheader():
173 |     assert parse_to_node("# 2020-1-1", rule="day_header")
174 |     assert parse_to_node("# 2020-01-01", rule="day_header")
175 | 
176 | 
177 | @pytest.mark.run(order=0)
178 | def test_parse_node_entry():
179 |     assert parse_to_node("10:00 - 100mg Caffeine", rule="entry")
180 |     assert parse_to_node("10:00 - 1 cup Coffee", rule="entry")
181 | 
182 | 
183 | @pytest.mark.run(order=0)
184 | def test_parse_node_full():
185 |     assert parse_to_node("10:00 - 100mg Caffeine", rule="entries")
186 |     assert parse_to_node("10:00 - 1 cup Coffee\n11:00 - 50mg Caffeine", rule="entries")
187 | 
188 | 
189 | @pytest.mark.run(order=0)
190 | def test_parse_node_unknown():
191 |     assert parse_to_node("10:00 - ?dl Coffee", rule="entry")
192 | 
193 | 
194 | @pytest.mark.run(order=0)
195 | def test_parse_node_approx_time():
196 |     assert parse_to_node("~10:00 - 1dl Coffee", rule="entry")
197 | 
198 | 
199 | @pytest.mark.run(order=0)
200 | def test_parse_node_approx_amount():
201 |     assert parse_to_node("10:00 - ~1dl Coffee", rule="entry")
202 | 
203 | 
204 | @pytest.mark.run(order=0)
205 | def test_parse_node_next_day():
206 |     assert parse_to_node("+01:00 - 0.5mg Melatonin", rule="entry")
207 | 
208 | 
209 | @pytest.mark.run(order=0)
210 | def test_parse_node_extra():
211 |     assert parse_to_node("(100mg Caffeine + 200mg L-Theanine)", rule="extra")
212 | 
213 | 
214 | # Test parse entries
215 | 
216 | 
217 | @pytest.mark.run(order=0)
218 | def test_parse_entries():
219 |     entries = list(parse_entries("10:00 - 100mg Caffeine"))
220 |     assert len(entries) == 1
221 | 
222 |     entries = list(parse_entries("10:00 - 1 cup Coffee\n\n11:00 - 50mg Caffeine"))
223 |     assert len(entries) == 2
224 | 
225 | 
226 | def test_parse_decimal():
227 |     s = """
228 |     19:00 - 3.5g Creatine monohydrate
229 |     """
230 |     assert list(parse_entries(s))
231 | 
232 | 
233 | def test_parse_percent():
234 |     s = """
235 |     19:00 - 4cl Drink (8%)
236 |     """
237 |     assert list(parse_entries(s))
238 | 
239 | 
240 | def test_parse_entries_notes():
241 |     s = """
242 |     09:30 - Just a plain note
243 | 
244 |     09:40 - 1x Something (with a note)
245 |     """
246 |     assert list(parse_entries(s))
247 | 
248 | 
249 | def test_parse_entries_day_example():
250 |     s = """
251 |     # 2020-01-01
252 | 
253 |     09:30 - 1 cup Coffee (100mg Caffeine + 50mg L-Theanine)
254 | 
255 |     21:30 - 0.5mg Melatonin subl
256 |     """
257 |     assert list(parse_entries(s))
258 | 
259 | 
260 | def test_parse_next_day():
261 |     s = """
262 |     # 2017-06-08
263 | 
264 |     10:00 - 100mg Caffeine
265 | 
266 |     +00:30 - 0.5mg Melatonin subl
267 |     """
268 |     entries = parse(s)
269 |     print(entries)
270 |     assert len(entries) == 2
271 |     assert entries[0].timestamp == datetime(2017, 6, 8, 10, 0)
272 |     assert entries[1].timestamp == datetime(2017, 6, 9, 0, 30)
273 | 
274 | 
275 | def test_parse_continue_on_err():
276 |     s = """
277 |     # 2020-01-01
278 | 
279 |     08:00 - 1x This will lead to an error ((+)
280 | 
281 |     09:00 - But this should still parse to a note.
282 |     """
283 |     entries = _parse_continue_on_err(s)
284 |     assert len(entries) == 2
285 | 
286 |     # first entry is a parse error
287 |     assert isinstance(entries[0], ParseError)
288 | 
289 |     # ensure that the day header is being tracked
290 |     assert isinstance(entries[1], Event)
291 |     assert entries[1].timestamp == datetime(2020, 1, 1, 9, 0)
292 | 


--------------------------------------------------------------------------------
/qslang/load.py:
--------------------------------------------------------------------------------
  1 | #!/bin/env python3
  2 | 
  3 | import itertools
  4 | import json
  5 | import logging
  6 | import os
  7 | import re
  8 | from collections import defaultdict
  9 | from datetime import datetime
 10 | from pathlib import Path
 11 | from typing import Literal
 12 | 
 13 | from .config import load_config
 14 | from .event import Event
 15 | from .filter import filter_events
 16 | from .parsimonious import parse_defer_errors
 17 | from .preprocess import _alcohol_preprocess
 18 | 
 19 | logger = logging.getLogger(__name__)
 20 | 
 21 | re_date = re.compile(r"[0-9]{4}-[0-9]{1,2}-[0-9]{1,2}")
 22 | re_evernote_author = re.compile(r">author:(.+)$")
 23 | re_evernote_source = re.compile(r">source:(.+)$")
 24 | 
 25 | 
 26 | base_dir = os.path.dirname(__file__)
 27 | 
 28 | 
 29 | def load_events(
 30 |     start: datetime | None = None,
 31 |     end: datetime | None = None,
 32 |     substances: list[str] = [],
 33 |     sources: None
 34 |     | (
 35 |         list[Literal["standardnotes"] | Literal["evernote"] | Literal["example"]]
 36 |     ) = None,
 37 | ) -> list[Event]:
 38 |     """
 39 |     Load events from various sources.
 40 | 
 41 |     Sources can be:
 42 |     - standardnotes
 43 |     - evernote
 44 |     - example
 45 | 
 46 |     If set to None, all sources will be attempted.
 47 |     """
 48 |     events: list[Event] = []
 49 | 
 50 |     # NOTE: Many notes are duplicated (due to conflicts),
 51 |     # so we will end up with duplcate events that we have to deal with.
 52 | 
 53 |     if sources is None or "standardnotes" in sources:
 54 |         logger.info("Loading standardnotes...")
 55 |         new_events = notes_to_events(_load_standardnotes())
 56 |         logger.info(f"Loaded {len(new_events)} from standardnotes")
 57 |         events += new_events
 58 | 
 59 |     if sources is None or "evernote" in sources:
 60 |         logger.info("Loading evernote...")
 61 |         new_events = notes_to_events(_load_evernotes())
 62 |         logger.info(f"Loaded {len(new_events)} from evernote")
 63 |         events += new_events
 64 | 
 65 |     if not events:
 66 |         logger.warning("No events found, falling back to example data")
 67 |     if not events or (sources and "example" in sources):
 68 |         new_events = notes_to_events(_load_example_notes())
 69 |         logger.info(f"Loaded {len(new_events)} from example data")
 70 |         events += new_events
 71 | 
 72 |     events = _extend_substance_abbrs(events)
 73 |     events = _tag_substances(events)
 74 |     events = sorted(events)
 75 |     events = filter_events(events, start, end, substances)
 76 |     events = _alcohol_preprocess(events)
 77 | 
 78 |     # sanity checks
 79 |     illegal_chars = ["(", ")", "/"]
 80 |     for e in events:
 81 |         for char in illegal_chars:
 82 |             if e.substance and char in e.substance:
 83 |                 logger.warning(
 84 |                     f"Substance '{e.substance}' contained illegal char '{char}' (entry time: {e.timestamp})"
 85 |                 )
 86 | 
 87 |     # remove events before 1970 since they are clearly wrong
 88 |     idxs = []
 89 |     for i, e in enumerate(events):
 90 |         if e.timestamp.year < 1970:
 91 |             idxs.append(i)
 92 |     if idxs:
 93 |         logger.warning(f"Removing {len(idxs)} events before 1970: {events[idxs[0]]}...")
 94 |     for i in reversed(idxs):
 95 |         del events[i]
 96 | 
 97 |     return events
 98 | 
 99 | 
100 | def notes_to_events(notes: list[str]) -> list[Event]:
101 |     """
102 |     Turns raw notes into events
103 | 
104 |     - Collects errors
105 |     - Deals with duplicates
106 |     """
107 |     logger.debug("Converting to events...")
108 |     events = []
109 |     errors = []
110 |     for note in notes:
111 |         note_events, note_errors = parse_defer_errors(note)
112 |         events += note_events
113 |         errors += note_errors
114 |     if errors:
115 |         total = len(events) + len(errors)
116 |         logger.warning(
117 |             f"Found {len(errors)} ({len(errors) / total * 100:.2f}%) errors when parsing {total} notes"
118 |         )
119 |         logger.warning("First 3 errors")
120 |         for e in errors[:3]:
121 |             logger.exception(e)
122 | 
123 |     # remove duplicate events
124 |     events_pre = len(events)
125 |     events = list(set(events))
126 |     if len(events) != events_pre:
127 |         logger.warning("Removed duplicate events: %d -> %d", events_pre, len(events))
128 | 
129 |     return events
130 | 
131 | 
132 | def _get_notes_dir() -> Path | None:
133 |     config = load_config()
134 |     p = config.get("data", {}).get("standardnotes", None)
135 |     if p is None:
136 |         return None
137 |     return Path(p).expanduser()
138 | 
139 | 
140 | def _get_export_file() -> Path | None:
141 |     config = load_config()
142 |     p = config.get("data", {}).get("standardnotes_export", None)
143 |     if p is None:
144 |         return None
145 |     return Path(p).expanduser()
146 | 
147 | 
148 | def _load_standardnotes() -> list[str]:
149 |     # use dir-loader or file-loader depending on config
150 |     path_dir = _get_notes_dir()
151 |     path_export = _get_export_file()
152 |     assert path_dir
153 |     if path_dir and path_export:
154 |         raise ValueError(
155 |             "Both `data.standardnotes` and `data.standardnotes_export` are configured, comment out one of them in config"
156 |         )
157 |     if path_dir and path_dir.exists():
158 |         return _load_dir_notes(path_dir)
159 |     elif path_export and path_export.exists():
160 |         return _load_standardnotes_export()
161 |     else:
162 |         logger.warning("no standardnotes export in config")
163 |         return []
164 | 
165 | 
166 | def _load_standardnotes_export() -> list[str]:
167 |     """Loads a "Standard Notes Backup and Import File.txt" (JSON) file"""
168 |     # NOTE: Used to be deprecated, but not any longer as standardnotes-fs isn't working as well as it used to (after the standardnotes 004 upgrade)
169 |     path = _get_export_file()
170 |     if path is None:
171 |         logger.warning("no standardnotes export in config")
172 |         return []
173 | 
174 |     logger.info(f"Loading standardnotes from {path}")
175 |     notes = []
176 |     with open(path) as f:
177 |         data = json.load(f)
178 |         for entry in sorted(
179 |             data["items"],
180 |             key=lambda e: e["content"]["title"] if "title" in e["content"] else "",
181 |         ):
182 |             if "title" in entry["content"] and "text" in entry["content"]:
183 |                 title = entry["content"]["title"]
184 |                 text = entry["content"]["text"]
185 |                 if re_date.match(title):
186 |                     # print(title)
187 |                     # print(text)
188 |                     notes.append(f"# {title}\n\n{text}")
189 |             else:
190 |                 logger.debug("Unknown note type")
191 |                 # print(entry["content"])
192 |                 title = None
193 | 
194 |     assert notes, "no notes were read, is the file available and decrypted?"
195 |     return notes
196 | 
197 | 
198 | def _load_dir_notes(path: Path) -> list[str]:
199 |     """
200 |     This used to be called _load_standardnotes_fs,
201 |     as it was used when standardnotes-fs was still functional.
202 | 
203 |     However, it was repurposed as it generalizes well.
204 |     """
205 |     notes = []
206 |     for p in itertools.chain(path.glob("*.md"), path.glob("*.txt")):
207 |         title = p.name.split(".")[0]
208 |         if re_date.match(title):
209 |             with open(p) as f:
210 |                 text = f.read()
211 |                 # print(title)
212 |                 # print(text)
213 |                 if text.startswith("#"):
214 |                     notes.append(text)
215 |                 else:
216 |                     notes.append(f"# {title}\n\n{text}")
217 |         else:
218 |             logger.debug("Unknown note type")
219 |             # print(entry["content"])
220 | 
221 |     return notes
222 | 
223 | 
224 | def _load_example_notes() -> list[str]:
225 |     notes = _load_dir_notes(Path(base_dir) / ".." / "data" / "test" / "notes")
226 |     assert notes
227 |     return notes
228 | 
229 | 
230 | def _load_evernotes() -> list[str]:
231 |     notes = []
232 |     # TODO: read from config
233 |     d = Path("./data/private/Evernote")
234 |     dateset = set()
235 |     for p in d.glob("*.md"):
236 |         data = p.read_text()
237 | 
238 |         # A bad idea for filtering away notes that were not mine, but might still be useful for tagging with metadata
239 |         if False:
240 |             authors = re_evernote_author.findall(data)
241 |             if authors and "erik" not in authors[0]:
242 |                 print(f" - Skipped note from other author: {authors}")
243 |                 continue
244 | 
245 |             source = re_evernote_source.findall(data)
246 |             if not authors and not source:
247 |                 print(" - Skipping note without author or source")
248 |                 continue
249 | 
250 |             if source and "android" not in source[0]:
251 |                 print(f" - Source was something else than android: {source}")
252 | 
253 |         dates = re_date.findall(str(p))
254 |         if dates:
255 |             dateset.add(dates[0])
256 | 
257 |             # Remove metadata lines
258 |             data = "\n".join(
259 |                 line
260 |                 for line in data.split("\n")
261 |                 if not (
262 |                     line.startswith(">")
263 |                     or line.startswith("---")
264 |                     or line.startswith("##")
265 |                 )
266 |             )
267 | 
268 |             notes.append(data)
269 |     # pprint(sorted(dates))
270 |     return notes
271 | 
272 | 
273 | def _load_categories() -> dict[str, list[str]]:
274 |     "Returns a dict {category: [substances...]}"
275 |     config = load_config()
276 |     categories = config.get("categories", {})
277 |     for cat in categories:
278 |         categories[cat] = [sub.lower() for sub in categories[cat]]
279 |     return categories
280 | 
281 | 
282 | def _substance2categories():
283 |     "returns the inverted dict of _load_categories"
284 |     sub2cat = defaultdict(set)
285 |     for cat, subs in _load_categories().items():
286 |         for sub in subs:
287 |             sub2cat[sub].add(cat)
288 |     return sub2cat
289 | 
290 | 
291 | def _load_substance_aliases() -> dict[str, list[str]]:
292 |     """Loads a mapping from target values to a list of substance aliases that should be renamed to target"""
293 |     config = load_config()
294 |     aliases = config.get("aliases", {})
295 |     return aliases
296 | 
297 | 
298 | def _tag_substances(events: list[Event]) -> list[Event]:
299 |     substance_categories = _substance2categories()
300 |     for e in events:
301 |         if e.substance and e.substance.lower() in substance_categories:
302 |             cats = substance_categories[e.substance.lower()]
303 |             e.data["tags"] = cats
304 |     n_doses = len([e for e in events if e.substance])
305 |     n_categorized = len([e for e in events if e.tags])
306 |     frac_categorized = n_categorized / n_doses if events else 0.0
307 |     logger.info(
308 |         f"Categorized {n_categorized} out of {n_doses} doses ({round(frac_categorized*100, 1)}%)"
309 |     )
310 |     return events
311 | 
312 | 
313 | def _extend_substance_abbrs(events) -> list[Event]:
314 |     substance_aliases = _load_substance_aliases()
315 |     # invert mapping and lowercase for easier lookup
316 |     substance_aliases_inv = {
317 |         v.lower(): k for k, vs in substance_aliases.items() for v in vs
318 |     }
319 |     for e in events:
320 |         if e.substance and e.substance.lower() in substance_aliases_inv:
321 |             e.data["substance"] = substance_aliases_inv[e.substance.lower()]
322 |     return events
323 | 
324 | 
325 | def test_load_events():
326 |     events = load_events(sources=["example"])
327 |     print(f"Loaded {len(events)} events")
328 |     assert events
329 | 


--------------------------------------------------------------------------------
/qslang/parsimonious.py:
--------------------------------------------------------------------------------
  1 | #!/bin/env python3
  2 | """
  3 | A reimplementation of the (somewhat broken) pop-regex parser.
  4 | We will use the parsimonious library to parse the string.
  5 | We will comment step by step how the parser works.
  6 | """
  7 | 
  8 | import logging
  9 | from collections.abc import Generator
 10 | from datetime import (
 11 |     date,
 12 |     datetime,
 13 |     time,
 14 |     timedelta,
 15 | )
 16 | from typing import Any
 17 | 
 18 | import parsimonious
 19 | from parsimonious.nodes import Node, NodeVisitor
 20 | 
 21 | from .event import Event
 22 | 
 23 | logger = logging.getLogger(__name__)
 24 | 
 25 | 
 26 | def flatten(ls: list[Any]) -> list[Any]:
 27 |     """Flatten a list of lists."""
 28 |     if not isinstance(ls, list):
 29 |         raise TypeError("Expected a list")
 30 |     return [item for sublist in ls for item in sublist]
 31 | 
 32 | 
 33 | class ParseError:
 34 |     def __init__(self, e: BaseException, s: str, date: str):
 35 |         self.e = e
 36 |         self.s = s
 37 |         self.date = date
 38 | 
 39 |     def __repr__(self):
 40 |         return f"<ParseError: {self.e}, string: {self.s}, date: {self.date}>"
 41 | 
 42 | 
 43 | # Step 1: Create a parsimonious grammar
 44 | # We will use a simple grammar that will parse the following string:
 45 | #   "08:20 - 1x Something (50mg Caffeine + 100mg L-Theanine)"
 46 | # The grammar is a series of rules, each of which is a sequence of tokens.
 47 | grammar = parsimonious.Grammar(
 48 |     r"""
 49 |     entries     = day_header? ws (entry)*
 50 | 
 51 |     day_header  = '#' ws date (ws "-" ws ~"[a-z0-9 ]+"i)? nl?
 52 |     entry       = ws time_prefix* time ws "-" ws entry_data ws nl?
 53 |     entry_data  = dose_list / note
 54 |     note        = ~"[A-Z][^\n]+"i
 55 | 
 56 |     date        = ~"[0-9]{4}-[0-9]{1,2}-[0-9]{1,2}"
 57 |     time        = ~"[0-9?]{1,2}:[0-9?]{1,2}"
 58 |     time_prefix = approx / next_day
 59 | 
 60 |     ws          = ~"[ ]*"
 61 |     nl          = ~"\n+"
 62 | 
 63 |     dose        = patient? ws amount ws substance ws extra? ws roa?
 64 |     dose_list   = dose (ws "+" ws dose)*
 65 |     patient     = "{" ~"[a-z]+"i "}"
 66 |     amount      = (unknown ws unit?) / (approx? fraction ws unit?) / (approx? number ws unit?)
 67 |     number      = ~"[0-9]+[.]?[0-9]*"
 68 |     unit        = prefixlessunit / (siprefix? baseunit)
 69 |     prefixlessunit = "cup" / "x" / "IU" / "GDU" / "B" / "serving" / ~"puff(s)?"
 70 |     siprefix    = "n" / "u" / "mc" / "m" / "c" / "d"
 71 |     baseunit    = "g" / "l"
 72 |     substance   = ~"[a-z0-9\-äåö]+"i (ws !roa ~"[a-z0-9\-åäö]+"i)*
 73 |     extra       = "(" extra_data (ws "," ws extra_data)* ")"
 74 |     extra_data  = percent / dose_list / short_note
 75 |     short_note  = ratio? ws ~"[A-Z][^,)\n]+"i?
 76 |     ratio       = ~"[0-9]+:[0-9]+"
 77 |     fraction    = ~"[0-9]+\/[0-9]+"
 78 |     percent     = ~"[>]"? number "%" ws substance?
 79 |     roa         = "oral" / ~"vap(ed|orized)?" / "intranasal" / ~"insuff(lated)?" / ~"subcut(aneous)?" / ~"subl(ingual)?" / "smoked" / "spliff" / "inhaled" / "buccal" / "rectal"
 80 | 
 81 |     approx = "~"
 82 |     unknown = "?"
 83 |     next_day = "+"
 84 |     """
 85 | )
 86 | 
 87 | 
 88 | def parse(s: str) -> list[Event]:
 89 |     visitor = Visitor()
 90 |     visitor.grammar = grammar
 91 |     events: list[Event] = visitor.parse(s.strip())  # type: ignore
 92 |     return events
 93 | 
 94 | 
 95 | def parse_defer_errors(s: str) -> tuple[list[Event], list[ParseError]]:
 96 |     """
 97 |     Tries to parse strings into a list of events.
 98 |     If some entries can't be read: store the resulting errors in a list.
 99 | 
100 |     returns both the events and errors.
101 |     """
102 |     entries: list[Event | ParseError] = _parse_continue_on_err(s)
103 |     events = []
104 |     errors = []
105 |     for e in entries:
106 |         if isinstance(e, Event):
107 |             events.append(e)
108 |         elif isinstance(e, ParseError):
109 |             # logger.warning(f"Error while parsing: {e}")
110 |             errors.append(e)
111 |         else:
112 |             print(e)
113 |             raise TypeError(f"Unexpected type: {type(e)}")
114 |     # check how many have 1900-1-1 as date
115 |     n_no_date = len([e for e in events if e.timestamp.date() <= date(1901, 1, 1)])
116 |     if n_no_date:
117 |         logger.warning(f"{n_no_date} events have no date")
118 |     return events, errors
119 | 
120 | 
121 | def parse_to_node(string, rule=None) -> Node:
122 |     _grammar = grammar
123 |     if rule is not None:
124 |         _grammar = _grammar[rule]
125 |     return _grammar.parse(
126 |         string,
127 |     )
128 | 
129 | 
130 | def parse_entries(s: str) -> Generator[Node, None, None]:
131 |     """
132 |     Parse entries one by one, instead of as a whole.
133 |     Returns a generator of ``parsimonious.nodes.Node`` objects, one for each entry.
134 |     """
135 |     for entry in s.split("\n"):
136 |         entry = entry.strip()
137 |         if entry:
138 |             if entry[0] == "#":
139 |                 yield parse_to_node(entry, rule="day_header")
140 |             else:
141 |                 yield parse_to_node(entry, rule="entry")
142 | 
143 | 
144 | class Visitor(NodeVisitor):
145 |     def generic_visit(self, node, visited_children) -> list:
146 |         if node.expr_name:
147 |             logger.warning(f"GENERIC HIT: {node.expr_name}   {visited_children}")
148 |         return visited_children
149 | 
150 |     def visit_entries(self, node, visited_children) -> list[Event]:
151 |         day_header, _, entries = visited_children
152 |         day = None
153 | 
154 |         # Check if first entry is day header
155 |         if day_header:
156 |             (day,) = day_header
157 |             assert isinstance(day, date)
158 | 
159 |         # Parse all entries
160 |         events: list[Event] = []
161 |         for entry in entries:
162 |             for event in entry:
163 |                 if event:
164 |                     assert isinstance(event, Event)
165 |                     if day:
166 |                         event.timestamp = event.timestamp.combine(
167 |                             day, event.timestamp.time()
168 |                         )
169 |                     if event.data.pop("next_day", None):
170 |                         event.timestamp += timedelta(days=1)
171 |                     events.append(event)
172 | 
173 |         for e in events:
174 |             assert isinstance(e, Event)
175 | 
176 |         return events
177 | 
178 |     def visit_entry(self, node, visited_children, day=None) -> list[Event]:
179 |         _, time_prefix, time, _, _, _, entries, _, _ = visited_children
180 | 
181 |         if day is None:
182 |             day = date(1900, 1, 1)
183 | 
184 |         timestamp = datetime.combine(day, time)
185 | 
186 |         events = []
187 |         for data in entries:
188 |             for (p,) in time_prefix:
189 |                 if p == "next_day":
190 |                     data["next_day"] = True
191 |                 elif p == "approx":
192 |                     data["approx"] = True
193 |                 else:
194 |                     raise ValueError(f"Unknown time prefix: {p}")
195 | 
196 |             events.append(
197 |                 Event(
198 |                     timestamp=timestamp,
199 |                     type="dose" if "substance" in data else "journal",
200 |                     data=data,
201 |                 )
202 |             )
203 |         return events
204 | 
205 |     def visit_day_header(self, node, visited_children) -> date:
206 |         _, _, day, *_ = visited_children
207 |         assert isinstance(day, date)
208 |         return day
209 | 
210 |     def visit_approx(self, node, visited_children) -> str:
211 |         return "approx"
212 | 
213 |     def visit_next_day(self, node, visited_children) -> str:
214 |         return "next_day"
215 | 
216 |     def visit_unknown(self, node, visited_children) -> str:
217 |         return "unknown"
218 | 
219 |     def visit_entry_data(self, node, visited_children) -> list[dict[str, Any]]:
220 |         doses_or_note = visited_children[0]
221 |         if isinstance(doses_or_note, list):
222 |             return doses_or_note
223 |         elif isinstance(doses_or_note, dict):
224 |             return [doses_or_note]
225 |         else:
226 |             raise ValueError(f"Unknown entry data: {doses_or_note}")
227 | 
228 |     def visit_extra_data(self, node, visited_children) -> dict:
229 |         _notes = []
230 |         _subdoses = []
231 |         for child in visited_children:
232 |             if isinstance(child, dict):
233 |                 _notes.append(child)
234 |             elif isinstance(child, list):
235 |                 _subdoses.extend(child)
236 |             else:
237 |                 raise ValueError(f"Unknown child type: {child}")
238 | 
239 |         for c in _notes:
240 |             assert "note" in c
241 |         for c in _subdoses:
242 |             assert "substance" in c
243 | 
244 |         extra_data = {"notes": _notes, "subdoses": _subdoses}
245 |         return extra_data
246 | 
247 |     def visit_dose_list(self, node, visited_children) -> list[dict[str, Any]]:
248 |         first_dose, more_doses = visited_children
249 |         doses = [first_dose]
250 |         for c in more_doses:
251 |             if c:
252 |                 _, _, _, dose = c
253 |                 doses.append(dose)
254 | 
255 |         assert all(isinstance(d, dict) for d in doses)
256 |         return doses
257 | 
258 |     def visit_dose(self, node, visited_children) -> dict[str, Any]:
259 |         patient, _, dose, _, substance, a1, extras, a2, roa = visited_children
260 |         assert a1 is None
261 |         assert a2 is None
262 |         d = {
263 |             "substance": substance,
264 |             "dose": {**dose},
265 |             "subdoses": [],
266 |             "notes": [],
267 |         }
268 | 
269 |         if roa:
270 |             d["dose"]["roa"] = roa[0]
271 |         if patient:
272 |             d["patient"] = patient[0]
273 | 
274 |         if extras:
275 |             for e in extras:
276 |                 d["notes"].extend(e["notes"])
277 |                 d["subdoses"].extend(e["subdoses"])
278 | 
279 |         if not d["notes"]:
280 |             del d["notes"]
281 |         if not d["subdoses"]:
282 |             del d["subdoses"]
283 | 
284 |         return d
285 | 
286 |     def visit_date(self, node, visited_children) -> date:
287 |         return datetime.strptime(node.text, "%Y-%m-%d").date()
288 | 
289 |     def visit_time(self, node, visited_children) -> time:
290 |         if node.text == "??:??":
291 |             logger.warning("Entry with unknown time, assuming 00:00")
292 |             return time(0, 0)
293 |         return datetime.strptime(node.text, "%H:%M").time()
294 | 
295 |     def visit_extra(self, node, visited_children) -> list:
296 |         _, extra, *more = visited_children
297 |         if more:
298 |             more, _ = more
299 |             assert _ == []
300 |             for c in more:
301 |                 _, _, _, more_extra = c
302 |                 extra["notes"].extend(more_extra["notes"])
303 |                 extra["subdoses"].extend(more_extra["subdoses"])
304 | 
305 |         return extra
306 | 
307 |     def visit_note(self, node, visited_children) -> dict:
308 |         return {"note": node.text}
309 | 
310 |     def visit_short_note(self, node, visited_children) -> dict:
311 |         return {"note": node.text}
312 | 
313 |     def visit_ratio(self, node, visited_children) -> str:
314 |         return node.text
315 | 
316 |     def visit_siprefix(self, node, visited_children) -> str:
317 |         return node.text
318 | 
319 |     def visit_baseunit(self, node, visited_children) -> str:
320 |         return node.text
321 | 
322 |     def visit_amount(self, node, visited_children) -> dict[str, Any]:
323 |         visited_children = visited_children[0]
324 |         if len(visited_children) == 4:
325 |             (approx, amount, _, unit) = visited_children
326 |             d = {
327 |                 "amount": amount,
328 |                 "unit": unit[0] if unit else "unknown",
329 |             }
330 |             if approx:
331 |                 d["approx"] = True
332 |             return d
333 |         elif len(visited_children) == 3:
334 |             (_, amount, unit) = visited_children
335 |             return {"amount": "unknown", "unit": unit[0] if unit else "unknown"}
336 |         else:
337 |             raise ValueError(f"Unknown amount: {visited_children}")
338 | 
339 |     def visit_unit(self, node, visited_children) -> str:
340 |         return node.text
341 | 
342 |     def visit_prefixlessunit(self, node, visited_children) -> str:
343 |         return node.text
344 | 
345 |     def visit_number(self, node, visited_children) -> float:
346 |         return float(node.text)
347 | 
348 |     def visit_substance(self, node, visited_children) -> str:
349 |         return node.text
350 | 
351 |     def visit_roa(self, node, visited_children) -> str:
352 |         return node.text
353 | 
354 |     def visit_patient(self, node, visited_children) -> str:
355 |         return node.text[1:-1]
356 | 
357 |     def visit_percent(self, node, visited_children) -> dict[str, Any]:
358 |         return {"note": node.text}
359 | 
360 |     def visit_fraction(self, node, visited_children) -> float:
361 |         return eval(node.text)
362 | 
363 |     def visit_time_prefix(self, node, visited_children) -> str:
364 |         return visited_children
365 | 
366 |     def visit_ws(self, node, visited_children) -> None:
367 |         return None
368 | 
369 |     def visit_nl(self, node, visited_children) -> None:
370 |         return None
371 | 
372 | 
373 | def _parse_continue_on_err(s: str) -> list[Event | ParseError]:
374 |     """
375 |     We want to parse events row by row, so we can handle errors (which ``parse`` cannot).
376 | 
377 |     To do this, we need to parse line by line, returning errors with correct timestamps
378 |     determined by previous day header. If an event cannot be read, return an 'ParseError'
379 |     instead, for filtering by the caller.
380 |     """
381 |     entries: list[Event | ParseError] = []
382 |     day_header = ""
383 |     for line in s.splitlines():
384 |         line = line.strip()
385 | 
386 |         # skip empty lines
387 |         if not line:
388 |             continue
389 | 
390 |         if line.startswith("# 20"):  # assumption will break for dates >=2100-1-1
391 |             day_header = line
392 |             continue
393 | 
394 |         try:
395 |             events = parse(day_header + "\n" + line)
396 |             if events:
397 |                 entries.extend(events)
398 |         except Exception as e:
399 |             # Useful in testing to get stacktraces
400 |             # logger.exception(e)
401 |             entries.append(ParseError(e, line, day_header[2:]))
402 | 
403 |     return entries
404 | 


--------------------------------------------------------------------------------
/qslang/main.py:
--------------------------------------------------------------------------------
  1 | #!/bin/env python3
  2 | 
  3 | import json
  4 | import logging
  5 | import statistics
  6 | from collections import Counter, defaultdict
  7 | from datetime import (
  8 |     date,
  9 |     datetime,
 10 |     time,
 11 |     timedelta,
 12 |     timezone,
 13 | )
 14 | from itertools import groupby
 15 | 
 16 | import calplot
 17 | import click
 18 | import matplotlib.pyplot as plt
 19 | import numpy as np
 20 | import pandas as pd
 21 | import pint
 22 | 
 23 | from . import (
 24 |     Dose,
 25 |     Event,
 26 |     load_events,
 27 |     print_events,
 28 | )
 29 | from .avg_times import mean_time
 30 | from .config import load_config, set_global_testing
 31 | from .igroupby import igroupby
 32 | from .pharmacokinetics import effectspan as _effectspan
 33 | from .util import dayrange, monthrange
 34 | 
 35 | logger = logging.getLogger(__name__)
 36 | 
 37 | # TODO: Make configurable
 38 | start_of_day = timedelta(hours=4)
 39 | 
 40 | 
 41 | @click.group()
 42 | @click.option("-v", "--verbose", is_flag=True)
 43 | @click.option("--testing", is_flag=True, help="run with testing config & data")
 44 | def main(verbose=False, testing=True):
 45 |     """QSlang is a tool to parse and analyze dose logs, for science."""
 46 |     logging.basicConfig(
 47 |         level=logging.DEBUG if verbose else logging.INFO,
 48 |         format="%(levelname).4s | %(module)-8s |  %(message)s",
 49 |     )
 50 | 
 51 |     if testing:
 52 |         set_global_testing()
 53 |     load_config()
 54 | 
 55 | 
 56 | @main.command(help="print list of all doses")
 57 | @click.option(
 58 |     "--start", type=click.DateTime(["%Y-%m-%d"]), help="start date to filter events by"
 59 | )
 60 | @click.option(
 61 |     "--end", type=click.DateTime(["%Y-%m-%d"]), help="end date to filter events by"
 62 | )
 63 | @click.option("--substances", help="substances to filter by (comma-separated)")
 64 | def events(start: datetime, end: datetime, substances: str | None):
 65 |     substances_list = substances.split(",") if substances else []
 66 |     events = load_events(start, end, substances_list)
 67 |     print_events(events)
 68 | 
 69 | 
 70 | @main.command(help="print summary of doses for each substance")
 71 | @click.option(
 72 |     "--start", type=click.DateTime(["%Y-%m-%d"]), help="start date to filter events by"
 73 | )
 74 | @click.option(
 75 |     "--end", type=click.DateTime(["%Y-%m-%d"]), help="end date to filter events by"
 76 | )
 77 | @click.option("--substances", help="substances to filter by (comma-separated)")
 78 | def summary(start: datetime, end: datetime, substances: str) -> None:
 79 |     # TODO: rename function to something more descriptive, like 'summary'?
 80 |     substances_list = substances.split(",") if substances else []
 81 |     events = load_events(start, end, substances_list)
 82 |     events = [e for e in events if e.substance]
 83 | 
 84 |     if events:
 85 |         for substance, substance_events in igroupby(
 86 |             events, lambda e: e.substance
 87 |         ).items():
 88 |             assert substance
 89 |             _print_daily_doses(substance_events, substance)
 90 |     else:
 91 |         print("No matching events found")
 92 | 
 93 | 
 94 | @main.command(help="print effect spans")
 95 | @click.option(
 96 |     "--start", type=click.DateTime(["%Y-%m-%d"]), help="start date to filter events by"
 97 | )
 98 | @click.option(
 99 |     "--end", type=click.DateTime(["%Y-%m-%d"]), help="end date to filter events by"
100 | )
101 | @click.option("--substances", help="substances to filter by (comma-separated)")
102 | @click.option("--normalize", help="consider all substances a particular substance")
103 | def effectspan(start: datetime, end: datetime, substances: str, normalize: str):
104 |     substances_list = substances.split(",") if substances else []
105 |     events = load_events(start, end, substances_list)
106 |     events = [e for e in events if e.substance]
107 | 
108 |     if normalize:
109 |         for e in events:
110 |             e.data["substance"] = normalize
111 | 
112 |     if events:
113 |         effectspans = _effectspan(
114 |             [
115 |                 (e.timestamp.replace(tzinfo=timezone.utc), e.dose)
116 |                 for e in events
117 |                 if e.dose
118 |             ]
119 |         )
120 |         for span in effectspans:
121 |             # will break horribly if any ; in output
122 |             data = span.data
123 |             data["doses"] = [{"amount": d.amount_with_unit} for d in data["doses"]]
124 |             print(
125 |                 "; ".join(
126 |                     [
127 |                         span.timestamp.isoformat(),
128 |                         str(span.duration.total_seconds()),
129 |                         json.dumps(data),
130 |                     ]
131 |                 )
132 |             )
133 |     else:
134 |         print("No matching events found")
135 | 
136 | 
137 | @main.command(help="plot effect spans in a barchart")
138 | @click.option(
139 |     "--start", type=click.DateTime(["%Y-%m-%d"]), help="start date to filter events by"
140 | )
141 | @click.option(
142 |     "--end", type=click.DateTime(["%Y-%m-%d"]), help="end date to filter events by"
143 | )
144 | @click.option("--substances", help="substances to filter by (comma-separated)")
145 | def plot_effectspan(start, end, substances):
146 |     substances_list = substances.split(",") if substances else []
147 |     events = load_events(start, end, substances_list)
148 |     events = [e for e in events if e.substance]
149 | 
150 |     bars_by_substance = {}
151 | 
152 |     if events:
153 |         effectspans = _effectspan(
154 |             [
155 |                 (e.timestamp.replace(tzinfo=timezone.utc), e.dose)
156 |                 for e in events
157 |                 if e.dose
158 |             ]
159 |         )
160 | 
161 |         # now that we have effectspans, we need to plot each span in a bar diagram
162 |         # there can be multiple spans per day
163 |         # we will build the bars grouped by substance
164 | 
165 |         for substance in {span.data["substance"] for span in effectspans}:
166 |             # list of bars, with (x, y_start, duration) for each bar
167 |             bars = []
168 |             for span in effectspans:
169 |                 if span.data["substance"] == substance:
170 |                     bars.append(
171 |                         (
172 |                             span.timestamp.date(),
173 |                             span.timestamp.time(),
174 |                             span.duration,
175 |                         )
176 |                     )
177 | 
178 |             # split bars crossing the 24h mark
179 |             for bar in bars:
180 |                 bar_end_hour = bar[1].hour + bar[2].total_seconds() / 3600
181 |                 if bar_end_hour > 24:
182 |                     # create a new bar for the time past midnight
183 |                     bars.append(
184 |                         (
185 |                             bar[0] + timedelta(days=1),
186 |                             time(0, 0),
187 |                             timedelta(hours=bar_end_hour - 24),
188 |                         )
189 |                     )
190 |                     # shorten the original bar
191 |                     bars[bars.index(bar)] = (
192 |                         bar[0],
193 |                         bar[1],
194 |                         timedelta(hours=24 - bar[1].hour),
195 |                     )
196 | 
197 |             # transform to (x, height, bottom) for each bar
198 |             bars_mpl = [
199 |                 (x, duration.total_seconds() / 3600, y_start.hour + y_start.minute / 60)
200 |                 for x, y_start, duration in bars
201 |             ]
202 |             bars_by_substance[substance] = bars_mpl
203 | 
204 |         # plot
205 |         fig, ax = plt.subplots()
206 |         ax.set_xlabel("Date")
207 |         ax.set_ylabel("Hour")
208 | 
209 |         for subst, subst_bars in bars_by_substance.items():
210 |             x, height, bottom = zip(*subst_bars)
211 |             ax.bar(
212 |                 x,
213 |                 height,
214 |                 bottom=bottom,
215 |                 label=subst,
216 |             )
217 |         # ax.bar(x, height, bottom=bottom)
218 |         # invert axis such that each day starts at the top
219 |         ax.set_ylim(0, 24)
220 |         ax.invert_yaxis()
221 |         plt.title("Effectspans where subject is under influence of substance")
222 |         plt.legend()
223 |         plt.show()
224 | 
225 | 
226 | @main.command(help="plot percent of time spent under effects of a substance")
227 | @click.option(
228 |     "--start", type=click.DateTime(["%Y-%m-%d"]), help="start date to filter events by"
229 | )
230 | @click.option(
231 |     "--end", type=click.DateTime(["%Y-%m-%d"]), help="end date to filter events by"
232 | )
233 | @click.option("--substances", help="substances to filter by (comma-separated)")
234 | def plot_influence(start, end, substances):
235 |     substances_list = substances.split(",") if substances else []
236 |     events = load_events(start, end, substances_list)
237 |     events = [e for e in events if e.substance]
238 | 
239 |     if events:
240 |         effectspans = _effectspan(
241 |             [
242 |                 (e.timestamp.replace(tzinfo=timezone.utc), e.dose)
243 |                 for e in events
244 |                 if e.dose
245 |             ]
246 |         )
247 | 
248 |         # count the number of hours spent under the influence of each substance, by day
249 |         # we will build a dict of {substance: {date: hours}}
250 |         # TODO: Handle spans that cross the day boundary
251 |         hours_by_substance_by_day: dict[str, dict[date, float]] = defaultdict(
252 |             lambda: defaultdict(float)
253 |         )
254 |         for span in effectspans:
255 |             substance = span.data["substance"]
256 |             day = (span.timestamp - day_offset).date()
257 |             hours_by_substance_by_day[substance][day] += (
258 |                 span.duration.total_seconds() / 3600
259 |             )
260 | 
261 |         # plot
262 |         fig, ax = plt.subplots()
263 |         ax.set_xlabel("Date")
264 |         ax.set_ylabel("Hours")
265 | 
266 |         # plot bars
267 |         for subst, hours_by_day in hours_by_substance_by_day.items():
268 |             x = list(hours_by_day.keys())
269 |             y = list(hours_by_day.values())
270 |             ax.bar(x, y, label=subst)
271 | 
272 |             # plot a line for the moving average, with pandas
273 |             df = pd.DataFrame({"days": x, "hours": y})
274 |             df = df.set_index("days")
275 |             df = df.reindex(
276 |                 pd.date_range(start=df.index.min(), end=df.index.max()), fill_value=0
277 |             )
278 |             df["rolling"] = df["hours"].rolling(7).mean()
279 |             ax.plot(df.index, df["rolling"], label=f"{subst} 7D MA")
280 | 
281 |         ax.set_ylim(0, 24)
282 |         plt.title("Hours spent under the influence of substance")
283 |         plt.legend()
284 |         plt.show()
285 | 
286 | 
287 | @main.command(help="print list of substances")
288 | @click.option(
289 |     "--start", type=click.DateTime(["%Y-%m-%d"]), help="start date to filter events by"
290 | )
291 | @click.option(
292 |     "--end", type=click.DateTime(["%Y-%m-%d"]), help="end date to filter events by"
293 | )
294 | @click.option("--substances", help="substances to filter by (comma-separated)")
295 | @click.option(
296 |     "--group-day",
297 |     is_flag=True,
298 |     help="group by day, counting each day with a dose instead of each dose",
299 | )
300 | def substances(start, end, substances, group_day=True) -> None:
301 |     substances_list = substances.split(",") if substances else []
302 |     events = load_events(start, end, substances_list)
303 |     # group by substance
304 |     # then, if group_day, group by day and then count number of days
305 |     c = Counter(
306 |         {
307 |             k: len(v)
308 |             if not group_day
309 |             else len({d for d in [e.timestamp.date() for e in v if e.timestamp.date()]})
310 |             for k, v in igroupby(
311 |                 [e for e in events if e.substance],
312 |                 lambda e: e.substance,
313 |             ).items()
314 |         }
315 |     )
316 | 
317 |     for s, n in c.most_common():
318 |         print(f"{n}x\t{s}")
319 |     print(f"{len(c)} substances found")
320 | 
321 | 
322 | @main.command(help="plot doses over time in a barchart")
323 | @click.option(
324 |     "--start", type=click.DateTime(["%Y-%m-%d"]), help="start date to filter events by"
325 | )
326 | @click.option(
327 |     "--end", type=click.DateTime(["%Y-%m-%d"]), help="end date to filter events by"
328 | )
329 | @click.option("--substances", help="substances to filter by (comma-separated)")
330 | @click.option("--any", is_flag=True, help="count all matches as any match")
331 | @click.option("--daily", is_flag=True, help="use daily resolution on the x-axis")
332 | @click.option("--count", is_flag=True, help="count number of doses instead of amount")
333 | @click.option("--days", is_flag=True, help="count number of days with doses")
334 | def plot(
335 |     start: datetime | None,
336 |     end: datetime | None,
337 |     substances: str,
338 |     any: bool,
339 |     daily: bool,
340 |     count: bool,
341 |     days: bool,
342 | ):
343 |     substances_list = substances.split(",") if substances else []
344 |     events = load_events(start, end, substances_list)
345 |     _plot_frequency(
346 |         events, count=count or days, one_per_day=days, daily=daily, any_substance=any
347 |     )
348 | 
349 | 
350 | @main.command(help="plot doses in a calendar")
351 | @click.option(
352 |     "--start", type=click.DateTime(["%Y-%m-%d"]), help="start date to filter events by"
353 | )
354 | @click.option(
355 |     "--end", type=click.DateTime(["%Y-%m-%d"]), help="end date to filter events by"
356 | )
357 | @click.option("--substances", help="substances to filter by (comma-separated)")
358 | def plot_calendar(start: datetime | None, end: datetime | None, substances: str):
359 |     substances_list = substances.split(",") if substances else []
360 |     events = load_events(start, end, substances_list)
361 |     _plot_calendar(events)
362 | 
363 | 
364 | def _print_daily_doses(
365 |     events: list[Event], substance: str, ignore_doses_fewer_than=None
366 | ):
367 |     events = [
368 |         e
369 |         for e in events
370 |         if e.substance and e.substance.lower() == substance.lower() and e.dose
371 |     ]
372 |     if not events:
373 |         logger.info(f"No doses found for substance '{substance}'")
374 |         return
375 | 
376 |     # NOTE: Respects the 'start of day' setting when grouping by date
377 |     grouped_by_date = igroupby(
378 |         sorted(events), key=lambda e: (e.timestamp - start_of_day).date()
379 |     )
380 |     assert events[0].dose
381 |     # outer accumulator (all days)
382 |     tot_amt = Dose(substance, events[0].dose.quantity * 0)
383 |     for _, v in grouped_by_date.items():
384 |         valid_doses = [
385 |             entry.dose
386 |             for entry in v
387 |             if entry.dose
388 |             and entry.dose.quantity.magnitude > 0
389 |             and entry.dose.quantity.units != "dimensionless"
390 |         ]
391 | 
392 |         # if no valid doses, skip day
393 |         if not valid_doses:
394 |             continue
395 | 
396 |         # find first non-zero non-dimensionless dose to use as accumulator
397 |         # FIXME: accumulate by unit type (g, l, x, puffs, etc)
398 |         initdose = valid_doses[0]
399 | 
400 |         try:
401 |             # inner accumulator (per day)
402 |             amt = Dose(substance, initdose.quantity * 0)
403 |             for e in v:
404 |                 if e.dose and e.dose.quantity.magnitude > 0:
405 |                     amt += e.dose
406 |             # add to outer accumulator
407 |             tot_amt += amt
408 |         except Exception as e:
409 |             logger.exception(f"Unable to sum amounts '{v}', '{tot_amt}': {e}")
410 |             # logger.warning(f"initdose: {initdose}")
411 |             # logger.warning(f"dose to add: {e}")
412 | 
413 |     if ignore_doses_fewer_than and ignore_doses_fewer_than > len(grouped_by_date):
414 |         return
415 | 
416 |     # TODO: Use Counter
417 | 
418 |     print(f"{substance}:")
419 |     print(
420 |         f" - latest: {max(grouped_by_date)} ({(date.today() - max(grouped_by_date)).days} days ago)"
421 |     )
422 |     print(
423 |         f" - oldest: {min(grouped_by_date)} ({(date.today() - min(grouped_by_date)).days} days ago)"
424 |     )
425 |     print(f" - {len(grouped_by_date)} days totalling {tot_amt.amount_with_unit}")
426 |     print(f" - avg dose/day: {tot_amt/len(events)}")
427 | 
428 |     firstlast_dose_times: tuple[list[datetime], list[datetime]] = tuple(
429 |         zip(
430 |             *[
431 |                 (
432 |                     min(e.timestamp - start_of_day for e in events) + start_of_day,
433 |                     max(e.timestamp - start_of_day for e in events) + start_of_day,
434 |                 )
435 |                 for events in grouped_by_date.values()
436 |             ]
437 |         )
438 |     )  # type: ignore
439 |     first_dose_times, last_dose_times = firstlast_dose_times
440 | 
441 |     avg_time_of_first_dose = mean_time([t.time() for t in first_dose_times])
442 |     avg_time_of_last_dose = mean_time([t.time() for t in last_dose_times])
443 |     print(
444 |         f" - avg time of first/last daily dose: {avg_time_of_first_dose}/{avg_time_of_last_dose}"
445 |     )
446 | 
447 |     try:
448 |         median_dose = statistics.median(e.dose for e in events if e.dose)  # type: ignore
449 |         min_dose = min(e.dose for e in events if e.dose)
450 |         max_dose = max(e.dose for e in events if e.dose)
451 |         print(
452 |             f" - min/median/max dose: {min_dose.amount_with_unit}/{median_dose.amount_with_unit}/{max_dose.amount_with_unit}"
453 |         )
454 |     except (pint.errors.DimensionalityError, AssertionError):
455 |         logger.warning(
456 |             "Couldn't compute min/median/max doses due to inconsistent units"
457 |         )
458 |     grouped_by_roa = igroupby(events, key=lambda e: e.roa)
459 |     print(" - ROAs:")
460 |     for roa in sorted(grouped_by_roa, key=lambda v: grouped_by_roa[v]):
461 |         print(f"   - {roa.ljust(10)}  n: {len(grouped_by_roa[roa])}")
462 | 
463 | 
464 | TDate = tuple[int, int, int | None]
465 | 
466 | day_offset = timedelta(hours=-4)
467 | 
468 | 
469 | def _grouped_by_date(events: list[Event], monthly=True) -> dict[TDate, list[Event]]:
470 |     grouped_by_date: dict[TDate, list[Event]] = defaultdict(list)
471 |     for period, events_grouped in groupby(
472 |         events,
473 |         key=lambda e: (
474 |             (e.timestamp + day_offset).year,
475 |             (e.timestamp + day_offset).month,
476 |             None if monthly else (e.timestamp + day_offset).day,
477 |         ),
478 |     ):
479 |         grouped_by_date[period] = list(events_grouped)
480 |     return grouped_by_date
481 | 
482 | 
483 | TValueByDate = dict[TDate, float]
484 | 
485 | 
486 | def _dosesum(doses):
487 |     doses = list(doses)
488 |     if not doses:
489 |         return 0
490 |     acc = doses[0]
491 |     for dose in doses[1:]:
492 |         acc += dose
493 |     return acc
494 | 
495 | 
496 | def _sum_doses(events: list[Event], monthly=True) -> dict[str, TValueByDate]:
497 |     substances = {e.substance for e in events if e.substance}
498 |     events = [e for e in events if e.dose]
499 |     grouped_by_date = _grouped_by_date(events, monthly=monthly)
500 | 
501 |     period_counts: dict[str, dict[TDate, float]] = defaultdict(
502 |         lambda: defaultdict(float)
503 |     )
504 |     for period in grouped_by_date.keys():
505 |         events_g_date = grouped_by_date[period]
506 |         events_g_substance = igroupby(events_g_date, key=lambda e: e.substance)
507 |         c = Counter(
508 |             {
509 |                 substance: _dosesum(e.dose for e in _events)
510 |                 for substance, _events in events_g_substance.items()
511 |                 if substance
512 |             }
513 |         )
514 | 
515 |         for k, v in c.most_common(20):
516 |             assert k
517 |             print(f" - {v}")
518 | 
519 |         for s in substances:
520 |             period_counts[s][period] = c[s].quantity.to_base_units().magnitude if isinstance(c[s], Dose) else 0  # type: ignore
521 | 
522 |     return period_counts
523 | 
524 | 
525 | def _count_doses(
526 |     events: list[Event], one_per_day=True, monthly=True, verbose=False
527 | ) -> dict[str, TValueByDate]:
528 |     substances = {e.substance for e in events if e.substance}
529 |     grouped_by_date = _grouped_by_date(events, monthly=monthly)
530 | 
531 |     period_counts: dict[str, dict[TDate, float]] = defaultdict(
532 |         lambda: defaultdict(float)
533 |     )
534 |     for period in grouped_by_date.keys():
535 |         events = grouped_by_date[period]
536 |         grouped_by_substance = igroupby(events, key=lambda e: e.substance)
537 |         c = Counter(
538 |             {
539 |                 substance: (
540 |                     len({(e.timestamp + day_offset).date() for e in events})
541 |                     if one_per_day
542 |                     else len(events)
543 |                 )
544 |                 for substance, events in grouped_by_substance.items()
545 |             }
546 |         )
547 |         unit = " days" if one_per_day else "x"
548 | 
549 |         if verbose:
550 |             print(period)
551 |             for k, v in c.most_common(20):
552 |                 print(f" - {v}{unit} {k}")
553 | 
554 |         for s in substances:
555 |             period_counts[s][period] = c[s]
556 | 
557 |     return period_counts
558 | 
559 | 
560 | def _plot_frequency(
561 |     events,
562 |     count=False,
563 |     one_per_day=False,
564 |     any_substance=False,
565 |     daily=False,
566 |     verbose=False,
567 |     figsize: tuple[int, int] | None = None,
568 | ):
569 |     """
570 |     Should plot frequency of use over time
571 |     (i.e. a barchart where the bar heights are equal to the count per period)
572 |     """
573 |     plt.figure(figsize=figsize if figsize else None)
574 | 
575 |     # Filter away journal entries and sort
576 |     events = list(sorted(filter(lambda e: e.type == "dose", events)))
577 |     assert events
578 | 
579 |     if any_substance:
580 |         for e in events:
581 |             e.data["substance"] = "Any"
582 | 
583 |     if count or one_per_day:
584 |         period_counts = _count_doses(
585 |             events, one_per_day=one_per_day, monthly=not daily, verbose=verbose
586 |         )
587 |     else:
588 |         period_counts = _sum_doses(events, monthly=not daily)
589 | 
590 |     labels: list[tuple[int, int, int]] = [
591 |         (date[0], date[1], date[2] or 0)
592 |         for sd in period_counts
593 |         for date in period_counts[sd].keys()
594 |     ]
595 |     if daily:
596 |         labels = dayrange(min(labels), max(labels))
597 |     else:
598 |         labels = [(m[0], m[1], 1) for m in monthrange(min(labels)[:2], max(labels)[:2])]
599 |     fmt = "%Y-%m-%d" if daily else "%Y-%m"
600 |     labels_date = [datetime(*t).strftime(fmt) for t in labels]
601 | 
602 |     stackheight = np.zeros(len(labels))
603 |     for substance, value_by_date in period_counts.items():
604 |         n = [
605 |             value_by_date.get(label if daily else (*label[:2], None), 0)
606 |             for label in labels
607 |         ]
608 |         # check that n is not all zeros (indication of indexing error)
609 |         assert any(n)
610 |         plt.bar(labels_date, n, label=substance, bottom=stackheight)
611 |         stackheight += np.array(n)
612 | 
613 |     plt.xticks(rotation="vertical")
614 |     plt.legend()
615 |     plt.show()
616 | 
617 | 
618 | def _plot_calendar(
619 |     events,
620 |     cmap="YlGn",
621 |     fillcolor="whitesmoke",
622 |     figsize=None,
623 |     one_per_day=True,
624 |     **kwargs,
625 | ):
626 |     # suitable values for cmap: Reds, YlGn
627 | 
628 |     # Filter away journal entries and sort
629 |     events = list(sorted(filter(lambda e: e.type == "dose", events)))
630 |     assert events, "No events found"
631 | 
632 |     for e in events:
633 |         e.data["substance"] = "Any"
634 | 
635 |     # TODO: use dose or dose equivalents instead of count
636 |     period_counts = _count_doses(events, one_per_day=False, monthly=False)
637 |     assert len(period_counts) == 1
638 | 
639 |     doses = [n_dose for n_dose in next(iter(period_counts.values())).values()]
640 |     labels = [
641 |         pd.Timestamp("-".join(map(str, date)))
642 |         for sd in period_counts
643 |         for date in period_counts[sd].keys()
644 |     ]
645 | 
646 |     series = pd.Series(doses, index=labels)
647 |     series = series[~series.index.duplicated()]
648 |     series = series.resample("D").sum().asfreq("D")
649 | 
650 |     calplot.calplot(
651 |         series,
652 |         fillcolor=fillcolor,
653 |         cmap=cmap,
654 |         linewidth=1,
655 |         figsize=figsize,
656 |         vmin=0,
657 |         vmax=1 if one_per_day else max(series),
658 |         dropzero=False,
659 |         fig_kws=kwargs,
660 |     )
661 |     plt.show()
662 | 
663 | 
664 | if __name__ == "__main__":
665 |     main()
666 | 


--------------------------------------------------------------------------------