├── .all-contributorsrc
├── .coveragerc
├── .flake8
├── .github
├── ISSUE_TEMPLATE.md
└── workflows
│ ├── check-links.yaml
│ ├── codeql.yml
│ ├── release.yaml
│ └── testing.yaml
├── .gitignore
├── .pre-commit-config.yaml
├── .readthedocs.yaml
├── CHANGELOG.md
├── LICENSE.txt
├── MANIFEST.in
├── README.md
├── ankipandas
├── __init__.py
├── _columns.py
├── ankidf.py
├── collection.py
├── conftest.py
├── data
│ └── anki_fields.csv
├── paths.py
├── raw.py
├── test
│ ├── __init__.py
│ ├── data
│ │ └── few_basic_cards
│ │ │ ├── collection.anki2
│ │ │ └── collection_v1.anki2
│ ├── test_ankidf.py
│ ├── test_collection.py
│ ├── test_paths.py
│ ├── test_raw.py
│ ├── test_regression.py
│ └── util.py
├── util
│ ├── __init__.py
│ ├── checksum.py
│ ├── dataframe.py
│ ├── guid.py
│ ├── log.py
│ ├── misc.py
│ ├── test
│ │ ├── __init__.py
│ │ ├── test_dataframe.py
│ │ ├── test_log.py
│ │ ├── test_misc.py
│ │ └── test_types.py
│ └── types.py
└── version.txt
├── codespell.txt
├── doc
├── Makefile
├── ankidf.rst
├── collection.rst
├── conf.py
├── examples.rst
├── examples
│ ├── col.anki2
│ ├── examples
│ │ ├── leeches_per_deck.py
│ │ ├── repetitions_per_deck.py
│ │ ├── repetitions_per_type.py
│ │ ├── retention_distribution_vs_deck.py
│ │ ├── retention_rate_per_deck.py
│ │ └── reviews_vs_ease.py
│ ├── loader.py
│ └── plots
│ │ ├── leeches_per_deck.png
│ │ ├── repetitions_per_deck.png
│ │ ├── repetitions_per_type.png
│ │ ├── retention_distribution_vs_deck.png
│ │ ├── retention_rate_per_deck.png
│ │ └── reviews_vs_ease.png
├── faq.md
├── index.rst
├── paths.rst
├── projects_with_ap.rst
├── raw.rst
├── requirements.txt
├── troubleshooting.rst
└── util.rst
├── misc
└── logo
│ ├── _logos.svg
│ ├── logo.svg
│ ├── logo_github.png
│ ├── logo_rtd.svg
│ └── logo_social.svg
├── mlc_config.json
├── mypy.ini
├── pyproject.toml
├── requirements.txt
├── setup.cfg
└── setup.py
/.all-contributorsrc:
--------------------------------------------------------------------------------
1 | {
2 | "files": [
3 | "README.md"
4 | ],
5 | "imageSize": 100,
6 | "commit": false,
7 | "contributorsSortAlphabetically": true,
8 | "contributors": [
9 | {
10 | "login": "exc4l",
11 | "name": "exc4l",
12 | "avatar_url": "https://avatars3.githubusercontent.com/u/74188442?v=4",
13 | "profile": "https://github.com/exc4l",
14 | "contributions": [
15 | "bug",
16 | "code"
17 | ]
18 | },
19 | {
20 | "login": "CalculusAce",
21 | "name": "CalculusAce",
22 | "avatar_url": "https://avatars3.githubusercontent.com/u/42630988?v=4",
23 | "profile": "https://github.com/CalculusAce",
24 | "contributions": [
25 | "bug"
26 | ]
27 | },
28 | {
29 | "login": "brownbat",
30 | "name": "Thomas Brownback",
31 | "avatar_url": "https://avatars2.githubusercontent.com/u/26754?v=4",
32 | "profile": "http://thomasbrownback.com/",
33 | "contributions": [
34 | "bug"
35 | ]
36 | },
37 | {
38 | "login": "p4nix",
39 | "name": "p4nix",
40 | "avatar_url": "https://avatars1.githubusercontent.com/u/7038116?v=4",
41 | "profile": "https://github.com/p4nix",
42 | "contributions": [
43 | "bug"
44 | ]
45 | },
46 | {
47 | "login": "eumiro",
48 | "name": "Miroslav Šedivý",
49 | "avatar_url": "https://avatars0.githubusercontent.com/u/6774676?v=4",
50 | "profile": "https://github.com/eumiro",
51 | "contributions": [
52 | "test",
53 | "code"
54 | ]
55 | },
56 | {
57 | "login": "khughitt",
58 | "name": "Keith Hughitt",
59 | "avatar_url": "https://avatars.githubusercontent.com/u/125001?v=4",
60 | "profile": "https://github.com/khughitt",
61 | "contributions": [
62 | "bug"
63 | ]
64 | },
65 | {
66 | "login": "bollwyvl",
67 | "name": "Nicholas Bollweg",
68 | "avatar_url": "https://avatars.githubusercontent.com/u/45380?v=4",
69 | "profile": "https://github.com/bollwyvl",
70 | "contributions": [
71 | "code"
72 | ]
73 | },
74 | {
75 | "login": "eshrh",
76 | "name": "eshrh",
77 | "avatar_url": "https://avatars.githubusercontent.com/u/16175276?v=4",
78 | "profile": "http://esrh.sdf.org",
79 | "contributions": [
80 | "doc"
81 | ]
82 | },
83 | {
84 | "login": "Blocked",
85 | "name": "Blocked",
86 | "avatar_url": "https://avatars.githubusercontent.com/u/4366503?v=4",
87 | "profile": "https://github.com/Blocked",
88 | "contributions": [
89 | "bug"
90 | ]
91 | },
92 | {
93 | "login": "frnsys",
94 | "name": "Francis Tseng",
95 | "avatar_url": "https://avatars.githubusercontent.com/u/1059947?v=4",
96 | "profile": "http://frnsys.com",
97 | "contributions": [
98 | "bug",
99 | "code"
100 | ]
101 | }
102 | ],
103 | "contributorsPerLine": 7,
104 | "projectName": "AnkiPandas",
105 | "projectOwner": "klieret",
106 | "repoType": "github",
107 | "repoHost": "https://github.com",
108 | "skipCi": true,
109 | "commitConvention": "angular"
110 | }
111 |
--------------------------------------------------------------------------------
/.coveragerc:
--------------------------------------------------------------------------------
1 | [run]
2 | omit =
3 | setup.py
4 | examples/*
5 | */test/*
6 | test_*
7 | relative_files = True
8 |
--------------------------------------------------------------------------------
/.flake8:
--------------------------------------------------------------------------------
1 | [flake8]
2 | max-line-length = 80
3 | select = C,E,F,W,B,B950
4 | ignore = E203, E501, W503
5 | per-file-ignores =
6 | doc/examples/examples/*: F821
7 | */__init__.py: F401
8 | exclude =
9 | .git,
10 | __pycache__,
11 | notebooks,
12 |
--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE.md:
--------------------------------------------------------------------------------
1 | Instructions
2 |
3 | First off, thanks for helping! :heart:
4 |
5 | To make it easier to come up with a solution, please make sure to include
6 |
7 | * Your operating system
8 | * Please make sure that you set the logging level of ankipandas to debug directly after importing:
9 | `ankipandas.set_debug_log_level()`.
10 | * A minimal working example (MWE) of code that can be used to reproduce your problem (where applicable)
11 | * A clear description of what you expected and what happened instead
12 | * The complete output of all log messages of `AnkiPandas`
13 |
14 | Also note that it might take some time before I can take a look at this.
15 | If I don't reply within a week, please ping me again (e.g. write another comment).
16 | Thanks for your patience!
17 |
18 | You may delete these instructions from your comment.
19 |
--------------------------------------------------------------------------------
/.github/workflows/check-links.yaml:
--------------------------------------------------------------------------------
1 | name: Check Markdown links
2 |
3 | on:
4 | push:
5 | pull_request:
6 | schedule:
7 | - cron: "0 0 1 * *"
8 |
9 | jobs:
10 | markdown-link-check:
11 | runs-on: ubuntu-latest
12 | steps:
13 | - uses: actions/checkout@master
14 | - uses: gaurav-nelson/github-action-markdown-link-check@v1
15 |
--------------------------------------------------------------------------------
/.github/workflows/codeql.yml:
--------------------------------------------------------------------------------
1 | name: "CodeQL"
2 |
3 | on:
4 | push:
5 | branches: [ "master" ]
6 | pull_request:
7 | branches: [ "master" ]
8 | schedule:
9 | - cron: "44 5 * * 5"
10 |
11 | jobs:
12 | analyze:
13 | name: Analyze
14 | runs-on: ubuntu-latest
15 | permissions:
16 | actions: read
17 | contents: read
18 | security-events: write
19 |
20 | strategy:
21 | fail-fast: false
22 | matrix:
23 | language: [ python ]
24 |
25 | steps:
26 | - name: Checkout
27 | uses: actions/checkout@v3
28 |
29 | - name: Initialize CodeQL
30 | uses: github/codeql-action/init@v2
31 | with:
32 | languages: ${{ matrix.language }}
33 | queries: +security-and-quality
34 |
35 | - name: Autobuild
36 | uses: github/codeql-action/autobuild@v2
37 |
38 | - name: Perform CodeQL Analysis
39 | uses: github/codeql-action/analyze@v2
40 | with:
41 | category: "/language:${{ matrix.language }}"
42 |
--------------------------------------------------------------------------------
/.github/workflows/release.yaml:
--------------------------------------------------------------------------------
1 | name: Release
2 |
3 | on:
4 | release:
5 | types: [published]
6 | workflow_dispatch:
7 |
8 | jobs:
9 | deploy:
10 | runs-on: ubuntu-latest
11 |
12 | steps:
13 | - uses: actions/checkout@v3
14 | with:
15 | fetch-depth: 0
16 |
17 | - name: Build sdist and wheel
18 | run: pipx run build
19 |
20 | - uses: actions/upload-artifact@v4
21 | with:
22 | path: dist
23 |
24 | - name: Check products
25 | run: pipx run twine check dist/*
26 |
27 | - uses: pypa/gh-action-pypi-publish@v1.6.1
28 | if: github.event_name == 'release' && github.event.action == 'published'
29 | with:
30 | # Remember to generate this and set it in "GitHub Secrets"
31 | password: ${{ secrets.pypi_password }}
32 | verbose: true
33 |
--------------------------------------------------------------------------------
/.github/workflows/testing.yaml:
--------------------------------------------------------------------------------
1 | name: testing
2 |
3 | on: [push, pull_request]
4 |
5 | jobs:
6 | build:
7 | runs-on: ubuntu-latest
8 |
9 | steps:
10 | - uses: actions/checkout@v2
11 | - name: Set up Python 3.8
12 | uses: actions/setup-python@v2
13 | with:
14 | python-version: 3.8
15 | - name: Install prerequisites
16 | run: |
17 | python3 -m pip install -U pip wheel setuptools
18 | - name: Build distribution
19 | run: |
20 | python3 setup.py sdist bdist_wheel
21 | cd dist && sha256sum * | tee SHA256SUMS
22 | - name: Install lint/test dependencies
23 | run: |
24 | python3 -m pip install flake8 pytest pytest-cover pytest-subtests
25 | - name: Lint with flake8
26 | run: |
27 | # stop the build if there are Python syntax errors or undefined names
28 | flake8 . --count --select=E9,F63,F7,F82 --ignore F821 --show-source --statistics
29 | # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
30 | flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
31 | - name: Install test dependencies
32 | run: |
33 | python3 -m pip install -e .
34 | - name: Test with pytest
35 | run: |
36 | pytest
37 | - uses: actions/upload-artifact@v4
38 | with:
39 | name: ankipandas dist ${{ github.run_number }}
40 | path: ./dist
41 | - name: Coveralls
42 | uses: AndreMiras/coveralls-python-action@develop
43 |
44 | test:
45 | needs: [build]
46 | runs-on: ${{ matrix.os }}-latest
47 | strategy:
48 | matrix:
49 | os: [ubuntu, windows, macos]
50 | python-version: ['3.8', '3.9', '3.10']
51 | include:
52 | - python-version: '3.8'
53 | artifact: ankipandas-*.whl
54 | - python-version: '3.9'
55 | artifact: ankipandas-*.tar.gz
56 | - python-version: '3.10'
57 | artifact: ankipandas-*.tar.gz
58 |
59 | steps:
60 | - uses: actions/checkout@v2
61 | - uses: actions/download-artifact@v4.1.7
62 | with:
63 | name: ankipandas dist ${{ github.run_number }}
64 | path: ./dist
65 | - name: Set up Python ${{ matrix.python-version }}
66 | uses: actions/setup-python@v2
67 | with:
68 | python-version: ${{ matrix.python-version }}
69 | - name: Install prerequisites
70 | run: |
71 | python3 -m pip install -U pip wheel
72 | - name: Get artifact path
73 | id: artifact
74 | shell: bash -l {0}
75 | run: |
76 | cd dist
77 | echo "::set-output name=path::$(ls ${{ matrix.artifact }})"
78 | - name: Install package
79 | run: |
80 | cd dist
81 | python3 -m pip install ${{ steps.artifact.outputs.path }}
82 | - name: Smoke test
83 | run: |
84 | cd dist
85 | python3 -m pip list
86 | python3 -m pip check
87 | python3 -c "import ankipandas"
88 | - name: Install test dependencies
89 | run: |
90 | python3 -m pip install pytest pytest-cover pytest-subtests
91 | - name: Test with pytest
92 | run: |
93 | cd dist
94 | pytest --pyargs ankipandas --cov ankipandas --cov-report term-missing:skip-covered
95 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | todo.txt
2 | *.swp
3 | .idea/**
4 | doc/_build/**
5 | # gets copied:
6 | doc/readme.md
7 | .~*
8 |
9 | examples/**
10 | examples/jupyter_notebooks/tests.ipynb
11 |
12 | # Created by https://www.gitignore.io/api/python
13 | # Edit at https://www.gitignore.io/?templates=python
14 |
15 | *.ipynb
16 |
17 | ### Python ###
18 | # Byte-compiled / optimized / DLL files
19 | __pycache__/
20 | *.py[cod]
21 | *$py.class
22 |
23 | # C extensions
24 | *.so
25 |
26 | # Distribution / packaging
27 | .Python
28 | build/
29 | develop-eggs/
30 | dist/
31 | downloads/
32 | eggs/
33 | .eggs/
34 | lib/
35 | lib64/
36 | parts/
37 | sdist/
38 | var/
39 | wheels/
40 | pip-wheel-metadata/
41 | share/python-wheels/
42 | *.egg-info/
43 | .installed.cfg
44 | *.egg
45 | MANIFEST
46 |
47 | # PyInstaller
48 | # Usually these files are written by a python script from a template
49 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
50 | *.manifest
51 | *.spec
52 |
53 | # Installer logs
54 | pip-log.txt
55 | pip-delete-this-directory.txt
56 |
57 | # Unit test / coverage reports
58 | htmlcov/
59 | .tox/
60 | .nox/
61 | .coverage
62 | .coverage.*
63 | .cache
64 | nosetests.xml
65 | coverage.xml
66 | *.cover
67 | .hypothesis/
68 | .pytest_cache/
69 |
70 | # Translations
71 | *.mo
72 | *.pot
73 |
74 | # Django stuff:
75 | *.log
76 | local_settings.py
77 | db.sqlite3
78 |
79 | # Flask stuff:
80 | instance/
81 | .webassets-cache
82 |
83 | # Scrapy stuff:
84 | .scrapy
85 |
86 | # Sphinx documentation
87 | docs/_build/
88 |
89 | # PyBuilder
90 | target/
91 |
92 | # Jupyter Notebook
93 | .ipynb_checkpoints
94 |
95 | # IPython
96 | profile_default/
97 | ipython_config.py
98 |
99 | # pyenv
100 | .python-version
101 |
102 | # pipenv
103 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
104 | # However, in case of collaboration, if having platform-specific dependencies or dependencies
105 | # having no cross-platform support, pipenv may install dependencies that don’t work, or not
106 | # install all needed dependencies.
107 | #Pipfile.lock
108 |
109 | # celery beat schedule file
110 | celerybeat-schedule
111 |
112 | # SageMath parsed files
113 | *.sage.py
114 |
115 | # Environments
116 | .env
117 | .venv
118 | env/
119 | venv/
120 | ENV/
121 | env.bak/
122 | venv.bak/
123 |
124 | # Spyder project settings
125 | .spyderproject
126 | .spyproject
127 |
128 | # Rope project settings
129 | .ropeproject
130 |
131 | # mkdocs documentation
132 | /site
133 |
134 | # mypy
135 | .mypy_cache/
136 | .dmypy.json
137 | dmypy.json
138 |
139 | # Pyre type checker
140 | .pyre/
141 |
142 | # End of https://www.gitignore.io/api/python
143 |
--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
1 | repos:
2 | - repo: https://github.com/psf/black
3 | rev: 24.10.0
4 | hooks:
5 | - id: black
6 |
7 | - repo: https://github.com/pre-commit/pre-commit-hooks
8 | rev: v5.0.0
9 | hooks:
10 | - id: check-added-large-files
11 | - id: check-case-conflict
12 | - id: check-merge-conflict
13 | - id: detect-private-key
14 | - id: end-of-file-fixer
15 | - id: trailing-whitespace
16 | - repo: https://github.com/pycqa/isort
17 | rev: 5.13.2
18 | hooks:
19 | - id: isort
20 | name: isort (python)
21 | args:
22 | [
23 | "--profile",
24 | "black",
25 | "-a",
26 | "from __future__ import annotations",
27 | "--append-only",
28 | ]
29 |
30 | - repo: https://github.com/PyCQA/flake8
31 | rev: "7.1.1" # pick a git hash / tag to point to
32 | hooks:
33 | - id: flake8
34 | additional_dependencies: ["flake8-bugbear"]
35 | - repo: https://github.com/pre-commit/mirrors-mypy
36 | rev: "v1.13.0" # Use the sha / tag you want to point at
37 | hooks:
38 | - id: mypy
39 | exclude: "conftest.py|^doc/examples/examples/|^doc/conf.py"
40 | - repo: https://github.com/codespell-project/codespell
41 | rev: "v2.3.0"
42 | hooks:
43 | - id: codespell
44 | args: ["-I", "codespell.txt"]
45 | - repo: https://github.com/asottile/pyupgrade
46 | rev: v3.19.0
47 | hooks:
48 | - id: pyupgrade
49 | args: ["--py37-plus"]
50 |
51 |
52 |
53 | ci:
54 | autoupdate_schedule: monthly
55 |
--------------------------------------------------------------------------------
/.readthedocs.yaml:
--------------------------------------------------------------------------------
1 | version: 2
2 |
3 | build:
4 | os: ubuntu-22.04
5 | tools:
6 | python: "3.10"
7 |
8 | sphinx:
9 | configuration: doc/conf.py
10 |
11 | python:
12 | install:
13 | - requirements: doc/requirements.txt
14 |
--------------------------------------------------------------------------------
/CHANGELOG.md:
--------------------------------------------------------------------------------
1 | # Changelog
2 |
3 | The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
4 | and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
5 |
6 | ## 0.3.15 -- 2023-10-11
7 |
8 | ### Removed
9 |
10 | - `Collection.write` now raises a `NotImplementedError` because
11 | https://github.com/klieret/AnkiPandas/issues/137 has so far not been
12 | resolved and has caused numerous problems.
13 |
14 | ## 0.3.14 -- 2023-05-14
15 |
16 | ### Fixed
17 |
18 | - Ensured compatibility with pandas 2.0
19 |
20 | ## 0.3.13 -- 2023-04-05
21 |
22 | ### Fixed
23 |
24 | - Require pandas < 2.0 for now because of API changes in pandas 2.0.
25 | See #143
26 |
27 | ## 0.3.12 -- 2023-01-01
28 |
29 | ### Fixed
30 |
31 | - Fix future warning about `Index.__and__`
32 |
33 | ## 0.3.11 -- 2022-12-29
34 |
35 | ### Removed
36 |
37 | - Python 3.6 has reached its end of life and is no longer supported. This is to
38 | ensure that we can keep the code base modern and developers happy.
39 |
40 | ### Fixed
41 |
42 | - When updating cards or notes, certain SQL search indices that are usually
43 | automatically created by Anki could be left missing. While these do not
44 | change any of the information in the database and can be created trivially,
45 | this caused Anki to refuse to load the database.
46 |
47 | ### Documentation and developer happiness
48 |
49 | - Various improvements to the documentation
50 | - Work on modernizing the code base
51 |
52 | ## 0.3.10 -- 2021-05-07
53 |
54 | ### Fixed
55 |
56 | - The SQLite connection to the database is now always closed right away by the Collection.
57 | This solves issues where the connection was kept open even though all collection and
58 | AnkiDataFrame objects were deleted (there is additional garbage collection trouble).
59 | Read more in [issue 75](https://github.com/klieret/AnkiPandas/issues/75)
60 |
61 | ## 0.3.9 -- 2020-12-17
62 |
63 | ### Deprecated
64 |
65 | - Future releases will not support python 3.5 anymore. Added warning for users still on
66 | python 3.5.
67 |
68 | ## 0.3.8 -- 2020-12-05
69 |
70 | ### Fixed
71 |
72 | - Setup problems with editable install as described [here](https://github.com/pypa/pip/issues/7953)
73 | - Compatibility issues with building ankipandas on windows machines (windows is not
74 | using utf8 by default which often results in errors, see
75 | [here](https://discuss.python.org/t/pep-597-enable-utf-8-mode-by-default-on-windows/3122))
76 | - Issues with max search depth for database search
77 | - AttributeError when calling `merge_notes` with `inplace=True`. [Issue #51](https://github.com/klieret/AnkiPandas/issues/51)
78 | - Default search paths might not have been working because the user name was not inserted properly
79 | - Properly tell anki that things were changed and that it should sync its tables
80 |
81 | ### Changed
82 |
83 | - Improved database search on windows machines
84 | - If no changes are detected in the different tables, the database will not be overwritten
85 |
86 | ## 0.3.7 -- 2020-11-28
87 |
88 | ### Fixed
89 |
90 | - `merge_cards` and `merge_notes` didn't update metadata of return value, resulting in errors like
91 | `Unknown value of _df_format`. Issue #42
92 | - `force` values weren't passed on, resulting in AnkiPandas refusing to do anything
93 | when writing out
94 | - On Windows the int size is 32 bit even on 64 bit computers, resulting in issues with
95 | large values of ids. Issue #41
96 |
97 |
98 | ## 0.3.6 - 2020-08-26
99 |
100 | ### Fixed
101 |
102 | - Keep support for python 3.5
103 |
104 | ## 0.3.5 - 2020-08-26
105 |
106 | ### Fixed
107 |
108 | - Support for new anki versions ([#38](https://github.com/klieret/AnkiPandas/issues/38))
109 |
110 | ## 0.3.4 - 2020-07-09
111 |
112 | ### Fixed
113 |
114 | - JSONDecodeError upon loading database. Also added additional debugging output in case something like this happens again.
115 |
116 | ## 0.3.3 - 2020-04-21
117 |
118 | ### Fixed
119 |
120 | - Compatibility with new pandas versions
121 |
122 | ## 0.3.0 - 2019-06-02
123 |
124 | ### Added
125 |
126 | - Add new notes and cards
127 |
128 | ### Changed
129 |
130 | - Cards/notes/reviews tables are now initialized from a central ``Collection`` object
131 |
132 | ### Fixed
133 |
134 | - ``was_modified``, ``was_added`` break when user added additional columns to dataframe
135 | - Correctly set ``mod`` (modification timestamp) and ``usn`` (update sequence number) of whole database after updates
136 |
137 | ## 0.2.1 - 2019-05-17
138 |
139 | ### Fixed
140 |
141 | - Merging of tables failed with some pandas versions
142 |
143 | ## 0.2.0 - 2019-05-07
144 |
145 | ### Added
146 |
147 | - Modify tables and write them back into the database.
148 |
--------------------------------------------------------------------------------
/LICENSE.txt:
--------------------------------------------------------------------------------
1 | The MIT License (MIT)
2 |
3 | Copyright (c) 2019 Kilian Lieret
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include ankipandas/version.txt
2 | include requirements.txt
3 | include CHANGELOG.md
4 | include README.md
5 | include LICENSE.txt
6 | recursive-include ankipandas/test/data/few_basic_cards *.anki2
7 | recursive-include ankipandas/data *.csv
8 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 |
2 |
3 | 
4 |
5 | Analyze and manipulate your Anki collection using pandas!
6 |
7 |
8 |
9 |
10 | [](https://ankipandas.readthedocs.io/) [](https://matrix.to/#/#AnkiPandas_community:gitter.im)
11 | [](https://github.com/klieret/ankipandas/blob/master/LICENSE.txt)
12 | [](https://git-scm.com/book/en/v2/GitHub-Contributing-to-a-Project)
13 |
14 |
15 |
16 |
17 | [](https://results.pre-commit.ci/latest/github/klieret/AnkiPandas/master)
18 | [](https://github.com/klieret/AnkiPandas/actions) [](https://coveralls.io/github/klieret/AnkiPandas?branch=master)
19 | [](https://github.com/klieret/AnkiPandas/actions/workflows/codeql.yml)
20 | [](https://gitmoji.dev) [](https://github.com/python/black)
21 | [](https://pypi.org/project/ankipandas/)
22 |
23 |
24 |
25 |
26 | ## 📝 Description
27 |
28 | > **Note**
29 | > This package needs a new maintainer, as I currently do not have enough time to continue development
30 | > of this package. Writing modifications back into the Anki database is currently disabled,
31 | > in particular because of issue [#137](https://github.com/klieret/AnkiPandas/issues/137).
32 | > Please reach out to me if you are interested in getting involved!
33 |
34 | [Anki](https://apps.ankiweb.net/) is one of the most popular flashcard
35 | system for spaced repetition learning,
36 | [pandas](https://pandas.pydata.org/) is the most popular python package
37 | for data analysis and manipulation. So what could be better than to
38 | bring both together?
39 |
40 | With `AnkiPandas` you can use `pandas` to easily analyze or manipulate
41 | your Anki flashcards.
42 |
43 | **Features**:
44 |
45 | - **Select**: Easily select arbitrary subsets of your cards, notes or
46 | reviews using `pandas` ([one of many
47 | introductions](https://medium.com/dunder-data/6fcd0170be9c),
48 | [official
49 | documentation](https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html))
50 | - **Visualize**: Use pandas\' powerful [built in
51 | tools](https://pandas.pydata.org/pandas-docs/stable/user_guide/visualization.html)
52 | or switch to the even more versatile
53 | [seaborn](https://seaborn.pydata.org/) (statistical analysis) or
54 | [matplotlib](https://matplotlib.org/) libraries
55 | - **Manipulate**: Apply fast bulk operations to the table (e.g. add
56 | tags, change decks, set field contents, suspend cards, \...) or
57 | iterate over the table and perform these manipulations step by step.
58 | **⚠️ This functionality is currently disabled until [#137](https://github.com/klieret/AnkiPandas/issues/137) has been resolved! ⚠️**
59 | - **Import and Export**: Pandas can export to (and import from) csv,
60 | MS Excel, HTML, JSON, \... ([io
61 | documentation](https://pandas.pydata.org/pandas-docs/stable/user_guide/io.html))
62 |
63 | **Pros**:
64 |
65 | - **Easy installation**: Install via python package manager
66 | (independent of your Anki installation)
67 | - **Simple**: Just one line of code to get started
68 | - **Convenient**: Bring together information about
69 | [cards](https://apps.ankiweb.net/docs/manual.html#cards),
70 | [notes](https://apps.ankiweb.net/docs/manual.html#notes-&-fields),
71 | [models](https://apps.ankiweb.net/docs/manual.html#note-types),
72 | [decks](https://apps.ankiweb.net/docs/manual.html#decks) and more in
73 | just one table!
74 | - **Fully documented**: Documentation on [readthedocs](https://ankipandas.readthedocs.io/)
75 | - **Well tested**: More than 100 unit tests to keep everything in
76 | check
77 |
78 | Alternatives: If your main goal is to add new cards, models and more,
79 | you can also take a look at the
80 | [genanki](https://github.com/kerrickstaley/genanki) project.
81 |
82 | ## 📦 Installation
83 |
84 | `AnkiPandas` is available as [pypi
85 | package](https://pypi.org/project/ankipandas/) and can be installed or
86 | upgrade with the [python package
87 | manager](https://pip.pypa.io/en/stable/):
88 |
89 | ```sh
90 | pip3 install --user --upgrade ankipandas
91 | ```
92 |
93 | ### Development installation
94 |
95 | For the latest development version you can also work from a cloned
96 | version of this repository:
97 |
98 | ```sh
99 | git clone https://github.com/klieret/ankipandas/
100 | cd ankipandas
101 | pip3 install --user --upgrade --editable .
102 | ```
103 |
104 | If you want to help develop this package further, please also install the
105 | [pre-commit](https://pre-commit.ci/) hooks and use [gitmoji](https://gitmoji.dev/):
106 |
107 | ```sh
108 | pre-commit install
109 | gitmoji -i
110 | ```
111 |
112 | ## 🔥 Let's get started!
113 |
114 | Starting up is as easy as this:
115 |
116 | ```python
117 | from ankipandas import Collection
118 |
119 | col = Collection()
120 | ```
121 |
122 | And `col.notes` will be dataframe containing all notes, with additional
123 | methods that make many things easy. Similarly, you can access cards or
124 | reviews using `col.cards` or `col.revs`.
125 |
126 | If called without any argument `Collection()` tries to find your Anki
127 | database by itself. However this might take some time. To make it
128 | easier, simply supply (part of) the path to the database and (if you
129 | have more than one user) your Anki user name, e.g.
130 | `Collection(".local/share/Anki2/", user="User 1")` on many Linux
131 | installations.
132 |
133 | To get information about the interpretation of each column, use
134 | `print(col.notes.help_cols())`.
135 |
136 | Take a look at the [documentation](https://ankipandas.readthedocs.io/)
137 | to find out more about more about the available methods!
138 |
139 | Some basic examples:
140 |
141 | ## 📈 Analysis
142 |
143 | **More examples**: [Analysis
144 | documentation](https://ankipandas.readthedocs.io/en/latest/examples.html),
145 | [projects that use `AnkiPandas`](https://ankipandas.readthedocs.io/en/latest/projects_with_ap.html).
146 |
147 | Show a histogram of the number of reviews (repetitions) of each card for
148 | all decks:
149 |
150 | ```python
151 | col.cards.hist(column="creps", by="cdeck")
152 | ```
153 |
154 | Show the number of leeches per deck as pie chart:
155 |
156 | ```python
157 | cards = col.cards.merge_notes()
158 | selection = cards[cards.has_tag("leech")]
159 | selection["cdeck"].value_counts().plot.pie()
160 | ```
161 |
162 | Find all notes of model `MnemoticModel` with empty `Mnemotic` field:
163 |
164 | ```python
165 | notes = col.notes.fields_as_columns()
166 | notes.query("model=='MnemoticModel' and 'Mnemotic'==''")
167 | ```
168 |
169 | ## 🛠️ Manipulations
170 |
171 | > **Warning**
172 | > Writing the database has currently been disabled until
173 | > [#137](https://github.com/klieret/AnkiPandas/issues/137) has been resolved.
174 | > Help is much appreciated!
175 |
176 | > **Warning**
177 | > **Please be careful and test this well!**
178 | > Ankipandas will create a backup of your database before writing, so you can always restore the previous state. Please make sure that everything is working before continuing to use Anki normally!
179 |
180 | Add the `difficult-japanese` and `marked` tag to all notes that contain
181 | the tags `Japanese` and `leech`:
182 |
183 | ```python
184 | notes = col.notes
185 | selection = notes[notes.has_tags(["Japanese", "leech"])]
186 | selection = selection.add_tag(["difficult-japanese", "marked"])
187 | col.notes.update(selection)
188 | col.write(modify=True) # Overwrites your database after creating a backup!
189 | ```
190 |
191 | Set the `language` field to `English` for all notes of model
192 | `LanguageModel` that are tagged with `English`:
193 |
194 | ```python
195 | notes = col.notes
196 | selection = notes[notes.has_tag(["English"])].query("model=='LanguageModel'").copy()
197 | selection.fields_as_columns(inplace=True)
198 | selection["language"] = "English"
199 | col.notes.update(selection)
200 | col.write(modify=True)
201 | ```
202 |
203 | Move all cards tagged `leech` to the deck `Leeches Only`:
204 |
205 | ```python
206 | cards = col.cards
207 | selection = cards[cards.has_tag("leech")]
208 | selection["cdeck"] = "Leeches Only"
209 | col.cards.update(selection)
210 | col.write(modify=True)
211 | ```
212 |
213 | ## 🐞 Troubleshooting
214 |
215 | See the [troubleshooting section in the
216 | documentation](https://ankipandas.readthedocs.io/en/latest/troubleshooting.html).
217 |
218 | ## 💖 Contributing
219 |
220 | Your help is greatly appreciated! Suggestions, bug reports and feature
221 | requests are best opened as [github
222 | issues](https://github.com/klieret/ankipandas/issues). You could also
223 | first discuss in the [gitter
224 | community](https://matrix.to/#/#AnkiPandas_community:gitter.im). If you want to code
225 | something yourself, you are very welcome to submit a [pull
226 | request](https://github.com/klieret/AnkiPandas/pulls)!
227 |
228 | Bug reports and pull requests are credited with the help of the [allcontributors bot](https://allcontributors.org/).
229 |
230 | ## 📃 License & Disclaimer
231 |
232 | This software is licenced under the [MIT
233 | license](https://github.com/klieret/ankipandas/blob/master/LICENSE.txt)
234 | and (despite best testing efforts) comes **without any warranty**. The
235 | logo is inspired by the [Anki
236 | logo](https://github.com/ankitects/anki/blob/main/qt/aqt/data/web/imgs/anki-logo-thin.png)
237 | ([license](https://github.com/ankitects/anki/blob/main/LICENSE)) and
238 | the logo of the pandas package
239 | ([license2](https://github.com/pandas-dev/pandas/blob/main/LICENSE)).
240 | This library and its author(s) are not affiliated/associated with the
241 | main Anki or pandas project in any way.
242 |
243 | ## ✨ Contributors
244 |
245 | Thanks goes to these wonderful people ([emoji key](https://allcontributors.org/docs/en/emoji-key)):
246 |
247 |
248 |
249 |
250 |
268 |
269 |
270 |
271 |
272 |
273 |
274 | This project follows the [all-contributors](https://github.com/all-contributors/all-contributors) specification. Contributions of any kind welcome!
275 |
--------------------------------------------------------------------------------
/ankipandas/__init__.py:
--------------------------------------------------------------------------------
1 | # ours
2 | from __future__ import annotations
3 |
4 | import ankipandas.raw
5 | import ankipandas.util
6 | from ankipandas.ankidf import AnkiDataFrame
7 | from ankipandas.collection import Collection
8 | from ankipandas.paths import db_path_input, find_db
9 | from ankipandas.util.log import log, set_debug_log_level, set_log_level
10 |
--------------------------------------------------------------------------------
/ankipandas/_columns.py:
--------------------------------------------------------------------------------
1 | # std
2 | from __future__ import annotations
3 |
4 | import copy
5 | from pathlib import Path
6 | from typing import Any
7 |
8 | import numpy as np
9 |
10 | # 3rd
11 | import pandas as pd
12 |
13 | # ours
14 | from ankipandas.util.misc import invert_dict
15 |
16 | # todo: Docstrings, cleanup
17 |
18 |
19 | tables_ours2anki = {"revs": "revlog", "cards": "cards", "notes": "notes"}
20 | tables_anki2ours = invert_dict(tables_ours2anki)
21 |
22 | fields_file = Path(__file__).parent / "data" / "anki_fields.csv"
23 | fields_df = pd.read_csv(fields_file)
24 |
25 | #: Maps table type to name of the index. E.g. the index of the notes is called
26 | #: nid.
27 | table2index = {"cards": "cid", "notes": "nid", "revs": "rid"}
28 |
29 | our_tables = sorted(tables_ours2anki)
30 | our_columns = {
31 | table: sorted(
32 | fields_df[(fields_df["Table"] == table) & fields_df["Default"]][
33 | "Column"
34 | ].unique()
35 | )
36 | for table in our_tables
37 | }
38 | # Remove indices
39 | for table, columns in our_columns.items():
40 | columns.remove(table2index[table])
41 |
42 | # hard code this here, because order is important
43 | anki_columns = {
44 | "cards": [
45 | "id",
46 | "nid",
47 | "did",
48 | "ord",
49 | "mod",
50 | "usn",
51 | "type",
52 | "queue",
53 | "due",
54 | "ivl",
55 | "factor",
56 | "reps",
57 | "lapses",
58 | "left",
59 | "odue",
60 | "odid",
61 | "flags",
62 | "data",
63 | ],
64 | "notes": [
65 | "id",
66 | "guid",
67 | "mid",
68 | "mod",
69 | "usn",
70 | "tags",
71 | "flds",
72 | "sfld",
73 | "csum",
74 | "flags",
75 | "data",
76 | ],
77 | "revs": [
78 | "id",
79 | "cid",
80 | "usn",
81 | "ease",
82 | "ivl",
83 | "lastIvl",
84 | "factor",
85 | "time",
86 | "type",
87 | ],
88 | }
89 |
90 | columns_ours2anki = {
91 | table: dict(
92 | zip(
93 | fields_df[(fields_df["Table"] == table) & fields_df["Native"]][
94 | "Column"
95 | ],
96 | fields_df[(fields_df["Table"] == table) & fields_df["Native"]][
97 | "AnkiColumn"
98 | ],
99 | )
100 | )
101 | for table in our_tables
102 | }
103 |
104 |
105 | columns_anki2ours = {
106 | table: invert_dict(columns_ours2anki[table]) for table in our_tables
107 | }
108 |
109 | value_maps = {
110 | "cards": {
111 | "cqueue": {
112 | -3: "sched buried",
113 | -2: "user buried",
114 | -1: "suspended",
115 | 0: "new",
116 | 1: "learning",
117 | 2: "due",
118 | 3: "in learning",
119 | },
120 | "ctype": {0: "learning", 1: "review", 2: "relearn", 3: "cram"},
121 | },
122 | "revs": {"rtype": {0: "learning", 1: "review", 2: "relearn", 3: "cram"}},
123 | }
124 |
125 | dtype_casts: dict[str, dict[str, Any]] = {
126 | "notes": {},
127 | "cards": {},
128 | "revs": {},
129 | }
130 |
131 | # todo: more precise?
132 | dtype_casts_back: dict[str, dict[str, Any]] = {
133 | "notes": {},
134 | "cards": {},
135 | "revs": {},
136 | }
137 |
138 | # Avoiding problem with ints to floats such as here:
139 | # https://github.com/pandas-dev/pandas/issues/4094
140 | # Also be careful with platform dependent length of the int type, else this
141 | # causes this error https://stackoverflow.com/questions/38314118/
142 | # on Windows machines as stated by CalculusAce in
143 | # https://github.com/klieret/AnkiPandas/issues/41
144 | dtype_casts2 = {
145 | "cards": {
146 | "cord": np.int64,
147 | "cmod": np.int64,
148 | "cusn": np.int64,
149 | "cdue": np.int64,
150 | "civl": np.int64,
151 | "cfactor": np.int64,
152 | "creps": np.int64,
153 | "clapses": np.int64,
154 | "cleft": np.int64,
155 | "codue": np.int64,
156 | },
157 | "notes": {"nmod": np.int64, "nusn": np.int64},
158 | "revs": {
159 | "cid": np.int64,
160 | "rusn": np.int64,
161 | "rease": np.int64,
162 | "ivl": np.int64,
163 | "lastivl": np.int64,
164 | "rfactor": np.int64,
165 | "rtime": np.int64,
166 | },
167 | }
168 | dtype_casts_all = copy.deepcopy(dtype_casts2["cards"])
169 | dtype_casts_all.update(dtype_casts2["notes"])
170 | dtype_casts_all.update(dtype_casts2["revs"])
171 |
--------------------------------------------------------------------------------
/ankipandas/collection.py:
--------------------------------------------------------------------------------
1 | # std
2 | from __future__ import annotations
3 |
4 | import sqlite3
5 | import time
6 | from contextlib import closing
7 | from pathlib import Path, PurePath
8 | from typing import Any
9 |
10 | # ours
11 | import ankipandas.paths
12 | import ankipandas.raw as raw
13 | from ankipandas.ankidf import AnkiDataFrame
14 | from ankipandas.util.log import log
15 |
16 |
17 | class Collection:
18 | def __init__(self, path=None, user=None):
19 | """Initialize :class:`~ankipandas.collection.Collection` object.
20 |
21 | Args:
22 | path: (Search) path to database. See
23 | :py:func:`~ankipandas.paths.db_path_input` for more
24 | information.
25 | user: Anki user name. See
26 | :py:func:`~ankipandas.paths.db_path_input` for more
27 | information.
28 |
29 | Examples:
30 |
31 | .. code-block:: python
32 |
33 | from ankipandas import Collection
34 |
35 | # Let ankipandas find the db for you
36 | col = Collection()
37 |
38 | # Let ankipandas find the db for this user (important if you have
39 | # more than one user account in Anki)
40 | col = Collection(user="User 1")
41 |
42 | # Specify full path to Anki's database
43 | col = Collection("/full/path/to/collection.anki2")
44 |
45 | # Specify partial path to Anki's database and specify user
46 | col = Collection("/partial/path/to/collection", user="User 1")
47 |
48 | """
49 | path = ankipandas.paths.db_path_input(path, user=user)
50 |
51 | #: Path to currently loaded database
52 | self._path: Path = path
53 |
54 | #: Should be accessed with _get_item!
55 | self.__items: dict[str, AnkiDataFrame | None] = {
56 | "notes": None,
57 | "cards": None,
58 | "revs": None,
59 | }
60 |
61 | #: Should be accessed with _get_original_item!
62 | self.__original_items: dict[str, AnkiDataFrame | None] = {
63 | "notes": None,
64 | "cards": None,
65 | "revs": None,
66 | }
67 |
68 | @property
69 | def path(self) -> Path:
70 | """Path to currently loaded database"""
71 | return self._path
72 |
73 | @property
74 | def db(self) -> sqlite3.Connection:
75 | """Opened Anki database. Make sure to call `db.close()` after you're
76 | done. Better still, use `contextlib.closing`.
77 | """
78 | log.debug(f"Opening Db from {self._path}")
79 | return raw.load_db(self._path)
80 |
81 | def _get_original_item(self, item):
82 | r = self.__original_items[item]
83 | if r is None:
84 | if item in ["notes", "revs", "cards"]:
85 | r = AnkiDataFrame.init_with_table(self, item)
86 | self.__original_items[item] = r
87 | return r
88 |
89 | def _get_item(self, item):
90 | r = self.__items[item]
91 | if r is None:
92 | r = self._get_original_item(item).copy(True)
93 | self.__items[item] = r
94 | return r
95 |
96 | @property
97 | def notes(self) -> AnkiDataFrame:
98 | """Notes as :class:`ankipandas.ankidf.AnkiDataFrame`."""
99 | return self._get_item("notes")
100 |
101 | @notes.setter
102 | def notes(self, value):
103 | self.__items["notes"] = value
104 |
105 | @property
106 | def cards(self) -> AnkiDataFrame:
107 | """Cards as :class:`ankipandas.ankidf.AnkiDataFrame`."""
108 | return self._get_item("cards")
109 |
110 | @cards.setter
111 | def cards(self, value):
112 | self.__items["cards"] = value
113 |
114 | @property
115 | def revs(self) -> AnkiDataFrame:
116 | """Reviews as :class:`ankipandas.ankidf.AnkiDataFrame`."""
117 | return self._get_item("revs")
118 |
119 | @revs.setter
120 | def revs(self, value):
121 | self.__items["revs"] = value
122 |
123 | def empty_notes(self):
124 | """Similar :class:`ankipandas.ankidf.AnkiDataFrame`
125 | to :attr:`notes`, but without any rows."""
126 | return AnkiDataFrame.init_with_table(self, "notes", empty=True)
127 |
128 | def empty_cards(self):
129 | """Similar :class:`ankipandas.ankidf.AnkiDataFrame`
130 | to :attr:`cards`, but without any rows."""
131 | return AnkiDataFrame.init_with_table(self, "cards", empty=True)
132 |
133 | def empty_revs(self):
134 | """Similar :class:`ankipandas.ankidf.AnkiDataFrame`
135 | to :attr:`revs`, but without any rows."""
136 | return AnkiDataFrame.init_with_table(self, "revs", empty=True)
137 |
138 | def summarize_changes(self, output="print") -> dict[str, dict] | None:
139 | """Summarize changes that were made with respect to the table
140 | as loaded from the database.
141 | If notes/cards/etc. were not loaded at all (and hence also definitely
142 | not modified), they do not appear in the output.
143 |
144 | Args:
145 | output: Output mode: 'print' (default: print)
146 | or 'dict' (return as dictionary of dictionaries of format
147 | ``{: {: }}``.
148 |
149 | Returns:
150 | None or dictionary of dictionaries
151 | """
152 | if output == "dict":
153 | as_dict = {}
154 | for key, value in self.__items.items():
155 | if value is not None:
156 | changes = value.summarize_changes(output="dict")
157 | as_dict[key] = changes # type: ignore
158 | return as_dict # type: ignore
159 | elif output == "print":
160 | for key, value in self.__items.items():
161 | if value is not None:
162 | print(f"======== {key} ========")
163 | value.summarize_changes()
164 | return None # explicit for mypy
165 | else:
166 | raise ValueError(f"Invalid output setting: {output}")
167 |
168 | def _prepare_write_data(
169 | self, modify=False, add=False, delete=False
170 | ) -> dict[str, Any]:
171 | prepared = {}
172 | for key, value in self.__items.items():
173 | if value is None:
174 | log.debug("Write: Skipping %s, because it's None.", key)
175 | continue
176 | if key in ["notes", "cards", "revs"]:
177 | ndeleted = len(value.was_deleted())
178 | nmodified = sum(value.was_modified(na=False))
179 | nadded = sum(value.was_added())
180 |
181 | if not delete and ndeleted:
182 | raise ValueError(
183 | "You specified delete=False, but {} rows of item "
184 | "{} would be deleted.".format(ndeleted, key)
185 | )
186 | if not modify and nmodified:
187 | raise ValueError(
188 | "You specified modify=False, but {} rows of item "
189 | "{} would be modified.".format(nmodified, key)
190 | )
191 | if not add and nadded:
192 | raise ValueError(
193 | "You specified add=False, but {} rows of item "
194 | "{} would be modified.".format(nadded, key)
195 | )
196 |
197 | if not ndeleted and not nmodified and not nadded:
198 | log.debug(
199 | "Skipping table %s for writing, because nothing "
200 | "seemed to have changed",
201 | key,
202 | )
203 | continue
204 |
205 | mode = "replace"
206 | if modify and not add and not delete:
207 | mode = "update"
208 | if add and not modify and not delete:
209 | mode = "append"
210 | log.debug("Will update table %s with mode %s", key, mode)
211 | value.check_table_integrity()
212 | raw_table = value.raw()
213 | prepared[key] = {"raw": raw_table, "mode": mode}
214 |
215 | return prepared
216 |
217 | def _get_and_update_info(self) -> dict[str, Any]:
218 | with closing(self.db) as db:
219 | info = raw.get_info(db)
220 |
221 | info_updates = dict(
222 | mod=int(time.time() * 1000), # Modification time stamp
223 | usn=-1, # Signals update needed
224 | )
225 | if raw.get_db_version(db) == 0:
226 | for key in info_updates:
227 | assert key in info
228 | info.update(info_updates)
229 | elif raw.get_db_version(db) == 1:
230 | assert len(info) == 1
231 | first_key = list(info)[0]
232 | info[first_key].update(info_updates)
233 | # fixme: this currently doesn't work. In the new db structure there's
234 | # a tags table instead of a field, but it doesn't seem to be
235 | # used.
236 | # if self.__items["notes"] is not None:
237 | #
238 | # missing_tags = list(
239 | # set(info["tags"].keys())
240 | # - set(self.__items["notes"].list_tags())
241 | # )
242 | # for tag in missing_tags:
243 | # # I'm assuming that this is the usn (update sequence number)
244 | # # of the tags
245 | # info["tags"][tag] = -1
246 | return info
247 |
248 | def write(
249 | self,
250 | modify=False,
251 | add=False,
252 | delete=False,
253 | backup_folder: PurePath | str | None = None,
254 | _override_exception=False,
255 | ):
256 | """Creates a backup of the database and then writes back the new
257 | data.
258 |
259 | .. danger::
260 |
261 | The write capabilities of ``AnkiPandas`` have currently been disabled
262 | because of `#137 `_.
263 | Help in fixing this issue would be greatly appreciated!
264 |
265 | .. danger::
266 |
267 | The switches ``modify``, ``add`` and ``delete`` will run additional
268 | cross-checks, but do not rely on them to 100%!
269 |
270 | .. warning::
271 |
272 | It is recommended to run :meth:`summarize_changes` before to check
273 | whether the changes match your expectation.
274 |
275 | .. note::
276 |
277 | Please make sure to thoroughly check your collection in Anki after
278 | every write process!
279 |
280 | Args:
281 | modify: Allow modification of existing items (notes, cards, etc.)
282 | add: Allow adding of new items (notes, cards, etc.)
283 | delete: Allow deletion of items (notes, cards, etc.)
284 | backup_folder: Path to backup folder. If None is given, the backup
285 | is created in the Anki backup directory (if found).
286 |
287 | Returns:
288 | None
289 | """
290 | if not _override_exception:
291 | raise NotImplementedError(
292 | "The write capabilities of AnkiPandas have currently been disabled"
293 | " because of https://github.com/klieret/AnkiPandas/issues/137/. "
294 | "Help in fixing this issue would be greatly appreciated!"
295 | )
296 |
297 | if not modify and not add and not delete:
298 | log.warning(
299 | "Please set modify=True, add=True or delete=True, you're"
300 | " literally not allowing me any modification at all."
301 | )
302 | return None
303 |
304 | try:
305 | prepared = self._prepare_write_data(
306 | modify=modify, add=add, delete=delete
307 | )
308 | log.debug("Now getting & updating info.")
309 | self._get_and_update_info()
310 | except Exception as e:
311 | log.critical(
312 | "Something went wrong preparing the data for writing. "
313 | "However, no data has been written out, so your "
314 | "database is save!"
315 | )
316 | raise e
317 | else:
318 | log.debug("Successfully prepared data for writing.")
319 |
320 | if prepared == {}:
321 | log.warning(
322 | "Nothing seems to have been changed. Will not do anything!"
323 | )
324 | return None
325 |
326 | backup_path = ankipandas.paths.backup_db(
327 | self.path, backup_folder=backup_folder
328 | )
329 | log.info("Backup created at %s", backup_path.resolve())
330 | log.warning(
331 | "Currently AnkiPandas might not be able to tell Anki to"
332 | " sync its database. "
333 | "You might have to manually tell Anki to sync everything "
334 | "to AnkiDroid.\n"
335 | "Furthermore, if you run into issues with tag searches not working"
336 | "anymore, please first do Notes > Clear unused notes and then "
337 | "Tools > Check Database (from the main menu). This should get them"
338 | " to work (sorry about this issue)."
339 | )
340 |
341 | # Actually setting values here, after all conversion tasks have been
342 | # carried out. That way if any of them fails, we don't have a
343 | # partially written collection.
344 | log.debug("Now actually writing to database.")
345 | try:
346 | for table, values in prepared.items():
347 | log.debug("Now setting table %s.", table)
348 | with closing(self.db) as db:
349 | raw.set_table(
350 | db, values["raw"], table=table, mode=values["mode"]
351 | )
352 | log.debug("Setting table %s successful.", table)
353 |
354 | if table == "cards":
355 | raw.update_card_indices(db)
356 |
357 | elif table == "notes":
358 | raw.update_note_indices(db)
359 |
360 | # log.debug("Now setting info")
361 | # raw.set_info(self.db, info)
362 | # log.debug("Setting info successful.")
363 | except Exception as e:
364 | log.critical(
365 | "Error while writing data to database at %s. "
366 | "This means that your database might have become corrupted. "
367 | "It's STRONGLY advised that you manually restore the database "
368 | "by replacing it with the backup from %s and restart"
369 | " from scratch. "
370 | "Please also open a bug report at "
371 | "https://github.com/klieret/AnkiPandas/issues/, as errors "
372 | "during the actual writing process should never occur!",
373 | self.path.resolve(),
374 | backup_path.resolve(),
375 | )
376 | raise e
377 | # This is important, because we have written to the database but still
378 | # have it opened, which can cause DatabaseErrors.
379 | log.debug("I will now drop all copies of the tables")
380 | for key in self.__items:
381 | self.__items[key] = None
382 | for key in self.__original_items:
383 | self.__original_items[key] = None
384 | log.debug("I will now reload the connection.")
385 | self._db = raw.load_db(self.path)
386 | log.info(
387 | "In case you're running this from a Jupyter notebook, make "
388 | "sure to shutdown the kernel or delete all ankipandas objects"
389 | " before you open anki to take a look at the result (only one "
390 | "application can use the database at a time)."
391 | )
392 |
--------------------------------------------------------------------------------
/ankipandas/conftest.py:
--------------------------------------------------------------------------------
1 | from __future__ import annotations
2 |
3 | from ankipandas.util.log import set_debug_log_level
4 |
5 | set_debug_log_level()
6 |
--------------------------------------------------------------------------------
/ankipandas/data/anki_fields.csv:
--------------------------------------------------------------------------------
1 | Column,AnkiColumn,Table,Description,Native,Default
2 | cdata,data,cards,Currently unused,TRUE,FALSE
3 | cdeck,,cards,Name of the current deck,FALSE,TRUE
4 | cdue,due,cards,"Due is used differently for different card types: new: note id or random int, due: integer day, relative to the collection's creation time, learning: integer timestamp.",TRUE,TRUE
5 | cfactor,factor,cards,"The new ease factor of the card in permille (parts per thousand). If the ease factor is 2500, the card’s interval will be multiplied by 2.5 the next time you press Good.",TRUE,TRUE
6 | cflags,flags,cards,Currently unused,TRUE,FALSE
7 | cid,cid,revs,ID of the card (this corresponds to a epoch millisecond time stamp from the creating time),TRUE,TRUE
8 | cid,id,cards,ID of the card (this corresponds to a epoch millisecond time stamp from the creating time),TRUE,TRUE
9 | civl,ivl,cards,"The new interval that the card was pushed to after the review. Positive values are in days, negative values are in seconds (for learning cards).",TRUE,TRUE
10 | clapses,lapses,cards,"The number of times the card went from a ""was answered correctly"" to ""was answered incorrectly"" state",TRUE,TRUE
11 | cleft,left,cards,"Of the form ``a*1000+b``, with: ``b`` the number of reps left till graduation and ``a`` the number of reps left today",TRUE,TRUE
12 | cmod,mod,cards,Modification time [epoch seconds],TRUE,TRUE
13 | codeck,,cards,Original deck: only used when the card is currently in filtered deck,FALSE,TRUE
14 | codid,odid,cards,Original did: only used when the card is currently in filtered deck,TRUE,FALSE
15 | codue,odue,cards,Original due: only used when the card is currently in filtered deck,TRUE,TRUE
16 | cord,ord,cards,Identifies which of the card templates the card corresponds to. Valid values are from 0 to num templates -1,TRUE,TRUE
17 | cqueue,queue,cards,"Can take the value ‘sched buried’, ‘user buried’, ‘suspended’, ‘new’, ‘learning’, ‘due’, ‘in learning’ (learning but next rev at least a day after the previous review). In the raw Anki database, these values are encoded as -3=sched buried, -2=user buried, -1=suspended, 0=new, 1=learning, 2=due (as for type), 3=in learning.",TRUE,TRUE
18 | creps,reps,cards,Number of reviews,TRUE,TRUE
19 | ctype,type,cards,"Can take the values ‘learning’, ‘review’, ‘relearn’, ‘cram’ (cards being studied in a filtered deck when they are not due). In the raw Anki database, these values are encoded as 0=learning, 1=review, 2=relearn, and 3=cram.",TRUE,TRUE
20 | cusn,usn,cards,This column (update sequence number) is used to keep track of the sync state of reviews and provides no useful information for analysis.Value of -1 indicates changes that need to be pushed to server. usn < server usn indicates changes that need to be pulled from server.,TRUE,TRUE
21 | did,did,cards,ID of the deck,TRUE,FALSE
22 | mid,mid,notes,Model ID,TRUE,FALSE
23 | ncsum,csum,notes,Field checksum used for duplicate check: Integer representation of first 8 digits of sha1 hash of the first field. See source code for details.,TRUE,FALSE
24 | ndata,data,notes,Currently unused,TRUE,FALSE
25 | nflags,flags,notes,Currently unused,TRUE,FALSE
26 | nflds,flds,notes,Fields of the card as list. In the raw anki database the fields are encoded as a string joined by ‘\x1f’ characters.,TRUE,TRUE
27 | nguid,guid,notes,"Globally Unique Id, almost certainly used for syncing",TRUE,TRUE
28 | nid,id,notes,ID of the note (this corresponds to a epoch millisecond time stamp from the creating time),TRUE,TRUE
29 | nid,nid,cards,ID of the note (this corresponds to a epoch millisecond time stamp from the creating time),TRUE,TRUE
30 | nmod,mod,notes,"Modification timestamp, epoch seconds",TRUE,TRUE
31 | nmodel,,notes,Name of the model,FALSE,TRUE
32 | nsfld,sfld,notes,Content of the sort field.,TRUE,FALSE
33 | ntags,tags,notes,Tags of the note as list. In the raw anki database tags are saved space joined string.,TRUE,TRUE
34 | nusn,usn,notes,This column (update sequence number) is used to keep track of the sync state of reviews and provides no useful information for analysis.Value of -1 indicates changes that need to be pushed to server. usn < server usn indicates changes that need to be pulled from server.,TRUE,TRUE
35 | rease,ease,revs,"Which button you pushed to score your recall: review: 1 (wrong), 2 (hard), 3 (ok), 4 (easy), learn/relearn: 1 (wrong), 2 (ok), 3 (easy)",TRUE,TRUE
36 | rfactor,factor,revs,"The new ease factor of the card in permille (parts per thousand). If the ease factor is 2500, the card’s interval will be multiplied by 2.5 the next time you press Good.",TRUE,TRUE
37 | rid,id,revs,ID of the review (this corresponds to a epoch millisecond time stamp from the creating time),TRUE,TRUE
38 | rivl,ivl,revs,"The new interval that the card was pushed to after the review. Positive values are in days, negative values are in seconds (for learning cards).",TRUE,TRUE
39 | rlastIvl,lastIvl,revs,Last interval,TRUE,TRUE
40 | rtime,time,revs,"How many milliseconds your review took, up to 60000 (60s)",TRUE,TRUE
41 | rtype,type,revs,"Values: ‘learning’, ‘review’, ‘relearn’, ‘cram’ (cards being studied in a filtered deck when they are not due). In the raw Anki database this is 0 for learning cards, 1 for review cards, 2 for relearn cards, and 3 for cram cards. ",TRUE,TRUE
42 | rusn,usn,revs,This column (update sequence number) is used to keep track of the sync state of reviews and provides no useful information for analysis.Value of -1 indicates changes that need to be pushed to server. usn < server usn indicates changes that need to be pulled from server.,TRUE,TRUE
43 |
--------------------------------------------------------------------------------
/ankipandas/paths.py:
--------------------------------------------------------------------------------
1 | """ Convenience functions to find the database and other system locations
2 | without the user having to specify full paths.
3 | """
4 |
5 | from __future__ import annotations
6 |
7 | import collections
8 | import datetime
9 |
10 | # std
11 | import os
12 | import shutil
13 | from functools import lru_cache
14 | from pathlib import Path, PurePath
15 | from typing import DefaultDict
16 |
17 | # ours
18 | from ankipandas.util.log import log
19 |
20 |
21 | @lru_cache(32)
22 | def _find_db(
23 | search_path,
24 | maxdepth=6,
25 | filename="collection.anki2",
26 | break_on_first=False,
27 | user: str | None = None,
28 | ) -> DefaultDict[str, list[Path]]:
29 | """
30 | Like find_database but only for one search path at a time. Also doesn't
31 | raise any error, even if the search path doesn't exist.
32 |
33 | Args:
34 | search_path:
35 | maxdepth: Maximum depth relative to search_path
36 | filename:
37 | break_on_first: Break on first search result
38 | user: Only search for this user
39 |
40 | Returns:
41 | collection.defaultdict({user: [list of results]})
42 | """
43 | search_path = Path(search_path)
44 | if not search_path.exists():
45 | log.debug("_find_db: Search path %r does not exist.", str(search_path))
46 | return collections.defaultdict(list)
47 | if search_path.is_file():
48 | if search_path.name == filename:
49 | return collections.defaultdict(
50 | list, {search_path.parent.name: [search_path]}
51 | )
52 | else:
53 | log.warning(
54 | "_find_db: Search path %r is a file, but filename does not "
55 | "match that of %r.",
56 | str(search_path),
57 | filename,
58 | )
59 | return collections.defaultdict(list)
60 | found: DefaultDict[str, list[Path]] = collections.defaultdict(list)
61 | for root, dirs, files in os.walk(str(search_path)):
62 | if filename in files:
63 | _user = os.path.basename(root)
64 | if user and not _user == user:
65 | continue
66 | found[_user].append(Path(root) / filename)
67 | if break_on_first:
68 | log.debug("_find_db: Breaking after first hit.")
69 | break
70 | depth = len(Path(root).relative_to(search_path).parts)
71 | if maxdepth and depth >= maxdepth:
72 | # log.debug(
73 | # "_find_db: Abort search at %r. "
74 | # "Max depth exceeded.",
75 | # str(root)
76 | # )
77 | del dirs[:]
78 | return found
79 |
80 |
81 | @lru_cache(32)
82 | def find_db(
83 | search_paths=None,
84 | maxdepth=8,
85 | filename="collection.anki2",
86 | user=None,
87 | break_on_first=True,
88 | ) -> Path:
89 | """
90 | Find path to anki2 database.
91 |
92 | Args:
93 | search_paths: Search path as string or pathlib object or list/iterable
94 | thereof. If None, some search paths are set by default.
95 | maxdepth: Maximal search depth.
96 | filename: Filename of the collection (default: ``collections.anki2``)
97 | user: Username to which the collection belongs. If None, search for
98 | databases of any user.
99 | break_on_first: Stop searching once a database is found. This is
100 | obviously faster, but you will not get any errors if there are
101 | multiple databases matching your criteria.
102 |
103 | Raises:
104 | If none or more than one result is found: :class:`ValueError`
105 |
106 | Returns:
107 | Path to the anki2 database
108 | """
109 | if not search_paths:
110 | log.info(
111 | "Searching for database. This might take some time. "
112 | "You can speed this up by specifying a search path or "
113 | "directly entering the path to your database."
114 | )
115 | search_paths = [
116 | "~/.local/share/Anki2/",
117 | "~/Documents/Anki2",
118 | Path(os.getenv("APPDATA", "~") + "/Anki2/"),
119 | "~/.local/share/Anki2",
120 | Path.home(),
121 | ]
122 | search_paths = [Path(sp).expanduser().resolve() for sp in search_paths]
123 | if break_on_first:
124 | log.warning(
125 | "The search will stop at the first hit, so please verify that "
126 | "the result is correct (for example in case there might be more "
127 | "than one Anki installation)"
128 | )
129 | if isinstance(search_paths, (str, PurePath)):
130 | search_paths = [search_paths]
131 | found: dict[str, list[Path]] = {}
132 | for search_path in search_paths:
133 | found = {
134 | **found,
135 | **_find_db(
136 | search_path,
137 | maxdepth=maxdepth,
138 | filename=filename,
139 | user=user,
140 | break_on_first=break_on_first,
141 | ),
142 | }
143 | if break_on_first:
144 | if user is not None:
145 | if user in found:
146 | break
147 | else:
148 | if found:
149 | break
150 |
151 | if user:
152 | # We were searching for a specific user
153 | if user not in found:
154 | raise ValueError(
155 | f"Could not find database belonging to user {user}"
156 | )
157 | else:
158 | results_user = found[user]
159 | else:
160 | if len(found) >= 2:
161 | raise ValueError(
162 | "Found databases for more than one user: {}. Please specify "
163 | "the user.".format(", ".join(found))
164 | )
165 | elif not found:
166 | raise ValueError(
167 | "No database found. You might increase the search depth or "
168 | "specify search paths to find more."
169 | )
170 | else:
171 | # No user specified but we found only one
172 | results_user = found.popitem()[1]
173 |
174 | if len(results_user) >= 2:
175 | raise ValueError(
176 | "Found more than one database belonging to user {} at {}".format(
177 | user, ", ".join(map(str, results_user))
178 | )
179 | )
180 |
181 | assert len(results_user) == 1
182 | final_result = results_user[0]
183 | log.debug("Database found at %r.", final_result)
184 | return final_result
185 |
186 |
187 | @lru_cache(32)
188 | def db_path_input(
189 | path: str | PurePath | None = None, user: str | None = None
190 | ) -> Path:
191 | """Helper function to interpret user input of path to database.
192 |
193 | 1. If no path is given, we search through some default locations
194 | 2. If path points to a file: Take that file
195 | 3. If path points to a directory: Search in that directory
196 |
197 | Args:
198 | path: Path to database or search path or None
199 | user: User name of anki collection or None
200 |
201 | Returns:
202 | Path to anki database as :class:`Path` object
203 |
204 | Raises:
205 | If path does not exist: :class:`FileNotFoundError`
206 | In various other cases: :class:`ValueError`
207 | """
208 | if path is None:
209 | result = find_db(user=user)
210 | else:
211 | path = Path(path)
212 | if not path.exists():
213 | raise FileNotFoundError(
214 | f"db_path_input: File '{str(path)}' does not exist."
215 | )
216 | if path.is_file():
217 | log.debug(
218 | "db_path_input: Database explicitly set to %r.", str(path)
219 | )
220 | result = path
221 | else:
222 | result = find_db(
223 | search_paths=(path,), user=user, break_on_first=False
224 | )
225 | log.info("Database found at %r.", str(result))
226 | if result:
227 | return result
228 | else:
229 | raise ValueError("Database could not be found.")
230 |
231 |
232 | def db_backup_file_name() -> str:
233 | """Time based file name of the backup file."""
234 | return "backup-ankipandas-{}.anki2".format(
235 | datetime.datetime.now().strftime("%Y-%m-%d-%H.%M.%S.%f")
236 | )
237 |
238 |
239 | def get_anki_backup_folder(path: str | PurePath, nexist="raise") -> Path:
240 | """Return path to Anki backup folder.
241 |
242 | Args:
243 | path: Path to Aki database as :class:`Path`
244 | nexist: What to do if backup folder doesn't seem to exist: ``raise`` or
245 | ``ignore``.
246 |
247 | Returns:
248 | Path to Anki backup folder as :class:`Path`.
249 | """
250 | path = Path(path)
251 | if not path.is_file():
252 | raise FileNotFoundError(f"Database path {path} seems to be invalid.")
253 | backup_folder = path.parent / "backups"
254 | if nexist == "raise" and not backup_folder.is_dir():
255 | raise ValueError(
256 | f"Anki backup folder corresponding to database at {path} doesn't seem"
257 | " to exist. Perhaps you can specify a custom backup "
258 | "folder?"
259 | )
260 | return backup_folder
261 |
262 |
263 | def backup_db(
264 | db_path: str | PurePath,
265 | backup_folder: str | PurePath | None = None,
266 | ) -> Path:
267 | """
268 | Back up database file.
269 |
270 | Args:
271 | db_path: Path to database
272 | backup_folder: Path to backup folder. If None is given, the backup is
273 | created in the Anki backup directory.
274 |
275 | Returns:
276 | Path to newly created backup file as :class:`Path`.
277 | """
278 | db_path = Path(db_path)
279 | if backup_folder:
280 | backup_folder = Path(backup_folder)
281 | if not backup_folder.is_dir():
282 | log.debug("Creating backup directory %s.", backup_folder)
283 | backup_folder.mkdir(parents=True)
284 | else:
285 | backup_folder = get_anki_backup_folder(db_path, nexist="raise")
286 | if not db_path.is_file():
287 | raise FileNotFoundError("Database does not seem to exist.")
288 | backup_path = backup_folder / db_backup_file_name()
289 | shutil.copy2(str(db_path), str(backup_path))
290 | return backup_path
291 |
--------------------------------------------------------------------------------
/ankipandas/raw.py:
--------------------------------------------------------------------------------
1 | """ These function implement the more direct interactions with the Anki
2 | database and provide basic functionality that is then used to implement the
3 | functionality in :class:`~ankipandas.collection.Collection`,
4 | :class:`ankipandas.ankidf.AnkiDataFrame` etc.
5 |
6 | .. warning::
7 |
8 | Please only use these function if you know what you are doing, as they
9 | come with less consistency checks as the functionality implemented in
10 | :class:`~ankipandas.collection.Collection` and
11 | :class:`ankipandas.ankidf.AnkiDataFrame`.
12 | Also note that the functions here are considered to be internal, i.e. might
13 | change without prior notice.
14 | """
15 |
16 | from __future__ import annotations
17 |
18 | import json
19 | import pathlib
20 | import sqlite3
21 |
22 | # std
23 | from collections import defaultdict
24 | from functools import lru_cache
25 |
26 | import numpy as np
27 |
28 | # 3rd
29 | import pandas as pd
30 |
31 | from ankipandas._columns import anki_columns, tables_ours2anki
32 |
33 | # ours
34 | from ankipandas.util.log import log
35 | from ankipandas.util.misc import defaultdict2dict, nested_dict
36 |
37 | CACHE_SIZE = 32
38 |
39 |
40 | # Open/Close db
41 | # ==============================================================================
42 |
43 |
44 | def load_db(path: str | pathlib.PurePath) -> sqlite3.Connection:
45 | """
46 | Load database from path.
47 |
48 | Args:
49 | path: String or :class:`pathlib.PurePath`.
50 |
51 | Returns:
52 | :class:`sqlite3.Connection`
53 | """
54 | path = pathlib.Path(path)
55 | if not path.is_file():
56 | raise FileNotFoundError(f"Not a file/file not found: {path}")
57 | return sqlite3.connect(str(path.resolve()))
58 |
59 |
60 | def close_db(db: sqlite3.Connection) -> None:
61 | """Close the database.
62 |
63 | Args:
64 | db: Database (:class:`sqlite3.Connection`)
65 |
66 | Returns:
67 | None
68 | """
69 | db.close()
70 |
71 |
72 | # Basic getters
73 | # ==============================================================================
74 |
75 |
76 | def get_table(db: sqlite3.Connection, table: str) -> pd.DataFrame:
77 | """Get raw table from the Anki database.
78 |
79 | Args:
80 | db: Database (:class:`sqlite3.Connection`)
81 | table: ``cards``, ``notes`` or ``revs``
82 |
83 | Returns:
84 | :class:`pandas.DataFrame`
85 | """
86 |
87 | df = pd.read_sql_query(f"SELECT * FROM {tables_ours2anki[table]}", db)
88 | return df
89 |
90 |
91 | def get_empty_table(table: str) -> pd.DataFrame:
92 | """Get empty table
93 |
94 | Args:
95 | table: ``cards``, ``notes`` or ``revs``
96 |
97 | Returns:
98 | :class: `pandas.DataFrame`
99 | """
100 | return pd.DataFrame(columns=anki_columns[table])
101 |
102 |
103 | def _interpret_json_val(val):
104 | if isinstance(val, str) and val:
105 | try:
106 | return json.loads(val)
107 | except json.decoder.JSONDecodeError:
108 | return val
109 | # msg = (
110 | # "AN ERROR OCCURRED WHILE TRYING TO LOAD INFORMATION "
111 | # "FROM THE DATABASE. PLEASE COPY THE WHOLE INFORMATION"
112 | # " BELOW AND ABOVE AND OPEN A BUG REPORT ON GITHUB!\n\n"
113 | # )
114 | # msg += "value to be parsed: {}".format(repr(val))
115 | # log.critical(msg)
116 | # raise
117 | else:
118 | return val
119 |
120 |
121 | def read_info(db: sqlite3.Connection, table_name: str) -> dict:
122 | """Get a table from the database and return nested dictionary mapping of
123 | it.
124 |
125 | Args:
126 | db:
127 | table_name:
128 |
129 | Returns:
130 |
131 | """
132 | version = get_db_version(db)
133 | _df = pd.read_sql_query(f"SELECT * FROM {table_name} ", db)
134 | if version == 0:
135 | assert len(_df) == 1, _df
136 | ret = nested_dict()
137 | for col in _df.columns:
138 | ret[col] = _interpret_json_val(_df[col][0])
139 | elif version == 1:
140 | ret = nested_dict()
141 | cols = _df.columns
142 | # todo: this is a hack, but oh well:
143 | index_cols = 1
144 | if len(_df[cols[0]].unique()) != len(_df):
145 | index_cols = 2
146 | for row in _df.iterrows():
147 | row = row[1].to_list()
148 | if index_cols == 1:
149 | for icol in range(1, len(cols)):
150 | ret[row[0]][cols[icol]] = _interpret_json_val(row[icol])
151 | elif index_cols == 2:
152 | for icol in range(2, len(cols)):
153 | ret[row[0]][row[1]][cols[icol]] = _interpret_json_val(
154 | row[icol]
155 | )
156 | else:
157 | raise ValueError
158 | else:
159 | raise NotImplementedError
160 | return defaultdict2dict(ret)
161 |
162 |
163 | @lru_cache(CACHE_SIZE)
164 | def get_info(db: sqlite3.Connection) -> dict:
165 | """
166 | Get all other information from the database, e.g. information about models,
167 | decks etc.
168 |
169 | Args:
170 | db: Database (:class:`sqlite3.Connection`)
171 |
172 | Returns:
173 | Nested dictionary.
174 | """
175 | return read_info(db, "col")
176 |
177 |
178 | @lru_cache(CACHE_SIZE)
179 | def get_db_version(db: sqlite3.Connection) -> int:
180 | """Get "version" of database structure
181 |
182 | Args:
183 | db:
184 |
185 | Returns: 0: all info (also for decks and models) was in the 'col' table
186 | 1: separate 'notetypes' and 'decks' tables
187 |
188 | """
189 | _tables = (
190 | db.cursor()
191 | .execute(
192 | "SELECT name FROM sqlite_master "
193 | "WHERE type ='table' AND name NOT LIKE 'sqlite_%';"
194 | )
195 | .fetchall()
196 | )
197 | tables = [ret[0] for ret in _tables]
198 | if "decks" in tables:
199 | return 1
200 | else:
201 | return 0
202 |
203 |
204 | # Basic Setters
205 | # ==============================================================================
206 |
207 |
208 | def _consolidate_tables(
209 | df: pd.DataFrame, df_old: pd.DataFrame, mode: str, id_column="id"
210 | ):
211 | if not list(df.columns) == list(df_old.columns):
212 | raise ValueError(
213 | "Columns do not match: Old: {}, New: {}".format(
214 | ", ".join(df_old.columns), ", ".join(df.columns)
215 | )
216 | )
217 |
218 | old_indices = set(df_old[id_column])
219 | new_indices = set(df[id_column])
220 |
221 | # Get indices
222 | # -----------
223 |
224 | if mode == "update":
225 | indices = set(old_indices)
226 | elif mode == "append":
227 | indices = set(new_indices) - set(old_indices)
228 | if not indices:
229 | log.warning(
230 | "Was told to append to table, but there do not seem to be any"
231 | " new entries. "
232 | )
233 | elif mode == "replace":
234 | indices = set(new_indices)
235 | else:
236 | raise ValueError(f"Unknown mode '{mode}'.")
237 |
238 | df = df[df[id_column].isin(indices)]
239 |
240 | # Apply
241 | # -----
242 |
243 | if mode == "update":
244 | df_new = df_old.copy()
245 | df_new.update(df)
246 | elif mode == "append":
247 | df_new = pd.concat([df_old, df], verify_integrity=True)
248 | elif mode == "replace":
249 | df_new = df.copy()
250 | else:
251 | raise ValueError(f"Unknown mode '{mode}'.")
252 |
253 | return df_new
254 |
255 |
256 | # fixme: update mode also can delete things if we do not have all rows
257 | def set_table(
258 | db: sqlite3.Connection,
259 | df: pd.DataFrame,
260 | table: str,
261 | mode: str,
262 | id_column="id",
263 | ) -> None:
264 | """
265 | Write table back to database.
266 |
267 | Args:
268 | db: Database (:class:`sqlite3.Connection`)
269 | df: The :class:`pandas.DataFrame` to write
270 | table: Table to write to: 'notes', 'cards', 'revs'
271 | mode: 'update': Update all existing entries, 'append': Only append new
272 | entries, but do not modify, 'replace': Append, modify and delete
273 | id_column: Column with ID
274 | Returns:
275 | None
276 | """
277 | df_old = get_table(db, table)
278 | df_new = _consolidate_tables(
279 | df=df, df_old=df_old, mode=mode, id_column=id_column
280 | )
281 | df_new.to_sql(tables_ours2anki[table], db, if_exists="replace", index=False)
282 |
283 |
284 | class NumpyJSONEncoder(json.JSONEncoder):
285 | """JSON Encoder that support numpy datatypes by converting them to
286 | built in datatypes."""
287 |
288 | def default(self, obj):
289 | if isinstance(obj, np.integer):
290 | return int(obj)
291 | elif isinstance(obj, np.floating):
292 | return float(obj)
293 | elif isinstance(obj, np.ndarray):
294 | return obj.tolist()
295 | else:
296 | return super().default(obj)
297 |
298 |
299 | def set_info(db: sqlite3.Connection, info: dict) -> None:
300 | """Write back extra info to database
301 |
302 | Args:
303 | db: Database (:class:`sqlite3.Connection`)
304 | info: Output of :func:`get_info`
305 |
306 | Returns:
307 | None
308 | """
309 |
310 | def encode_value(value):
311 | if isinstance(value, (float, np.floating)):
312 | return value
313 | elif isinstance(value, (int, np.integer)):
314 | return value
315 | elif isinstance(value, str):
316 | return value
317 | else:
318 | return json.dumps(value, cls=NumpyJSONEncoder)
319 |
320 | info_json_strings = {
321 | key: encode_value(value) for key, value in info.items()
322 | }
323 | df = pd.DataFrame(info_json_strings, index=[0])
324 | df.to_sql("col", db, if_exists="replace", index=False)
325 |
326 |
327 | def update_note_indices(db: sqlite3.Connection) -> None:
328 | """Update search indices for 'notes' table. This does not modify any information
329 | in the table itself.
330 | See https://github.com/klieret/AnkiPandas/issues/124 for more informationl
331 | """
332 | cur = db.cursor()
333 | cur.execute("CREATE INDEX IF NOT EXISTS idx_notes_mid ON notes (mid)")
334 | cur.execute("CREATE INDEX IF NOT EXISTS ix_notes_csum on notes (csum)")
335 | cur.execute("CREATE INDEX IF NOT EXISTS ix_notes_usn on notes (usn)")
336 |
337 |
338 | def update_card_indices(db: sqlite3.Connection) -> None:
339 | """Update search indices for 'cards' table. This does not modify any information
340 | in the table itself.
341 | See https://github.com/klieret/AnkiPandas/issues/124 for more informationl
342 | """
343 | cur = db.cursor()
344 | cur.execute(
345 | "CREATE INDEX IF NOT EXISTS idx_cards_odid ON cards (odid) WHERE odid != 0"
346 | )
347 | cur.execute("CREATE INDEX IF NOT EXISTS ix_cards_nid on cards (nid)")
348 | cur.execute(
349 | "CREATE INDEX IF NOT EXISTS ix_cards_sched on cards (did, queue, due)"
350 | )
351 | cur.execute("CREATE INDEX IF NOT EXISTS ix_cards_usn on cards (usn)")
352 |
353 |
354 | # Trivially derived getters
355 | # ==============================================================================
356 |
357 | # todo: Using decorators here causes the function signatures to be messed up
358 | # with sphinx but oh well.
359 |
360 |
361 | @lru_cache(CACHE_SIZE)
362 | def get_ids(db: sqlite3.Connection, table: str) -> list[int]:
363 | """Get list of IDs, e.g. note IDs etc.
364 |
365 | Args:
366 | db: Database (:class:`sqlite3.Connection`)
367 | table: 'revs', 'cards', 'notes'
368 |
369 | Returns:
370 | Nested dictionary
371 | """
372 | return get_table(db, table)["id"].astype(int).tolist()
373 |
374 |
375 | @lru_cache(CACHE_SIZE)
376 | def get_deck_info(db: sqlite3.Connection) -> dict:
377 | """Get information about decks.
378 |
379 | Args:
380 | db: Database (:class:`sqlite3.Connection`)
381 |
382 | Returns:
383 | Nested dictionary
384 | """
385 | if get_db_version(db) == 0:
386 | _dinfo = get_info(db)["decks"]
387 | elif get_db_version(db) == 1:
388 | _dinfo = read_info(db, "decks")
389 | else:
390 | raise NotImplementedError
391 |
392 | if not _dinfo:
393 | return {}
394 | else:
395 | return _dinfo
396 |
397 |
398 | @lru_cache(CACHE_SIZE)
399 | def get_did2deck(db: sqlite3.Connection) -> dict[int, str]:
400 | """Mapping of deck IDs (did) to deck names.
401 |
402 | Args:
403 | db: Database (:class:`sqlite3.Connection`)
404 |
405 | Returns:
406 | Dictionary mapping
407 | """
408 | dinfo = get_deck_info(db)
409 | _did2dec = {int(did): dinfo[did]["name"] for did in dinfo}
410 | return defaultdict(str, _did2dec)
411 |
412 |
413 | @lru_cache(CACHE_SIZE)
414 | def get_deck2did(db: sqlite3.Connection) -> dict[str, int]:
415 | """Mapping of deck names to deck IDs
416 |
417 | Args:
418 | db: Database (:class:`sqlite3.Connection`)
419 |
420 | Returns:
421 | Dictionary mapping of deck id to deck name
422 | """
423 | dinfo = get_deck_info(db)
424 | _did2dec = {dinfo[did]["name"]: int(did) for did in dinfo}
425 | return defaultdict(int, _did2dec)
426 |
427 |
428 | @lru_cache(CACHE_SIZE)
429 | def get_model_info(db: sqlite3.Connection) -> dict:
430 | """Get information about models.
431 |
432 | Args:
433 | db: Database (:class:`sqlite3.Connection`)
434 |
435 | Returns:
436 | Nested dictionary
437 | """
438 | if get_db_version(db) == 0:
439 | _dinfo = get_info(db)["models"]
440 | elif get_db_version(db) == 1:
441 | _dinfo = read_info(db, "notetypes")
442 | else:
443 | raise NotImplementedError
444 | if not _dinfo:
445 | return {}
446 | else:
447 | return {int(key): value for key, value in _dinfo.items()}
448 |
449 |
450 | @lru_cache(CACHE_SIZE)
451 | def get_mid2model(db: sqlite3.Connection) -> dict[int, str]:
452 | """Mapping of model IDs (mid) to model names.
453 |
454 | Args:
455 | db: Database (:class:`sqlite3.Connection`)
456 |
457 | Returns:
458 | Dictionary mapping
459 | """
460 | minfo = get_model_info(db)
461 | _mid2model = {int(mid): minfo[mid]["name"] for mid in minfo}
462 | return defaultdict(str, _mid2model)
463 |
464 |
465 | @lru_cache(CACHE_SIZE)
466 | def get_model2mid(db: sqlite3.Connection) -> dict[str, int]:
467 | """Mapping of model name to model ID (mid)
468 |
469 | Args:
470 | db: Database (:class:`sqlite3.Connection`)
471 |
472 | Returns:
473 | Dictionary mapping
474 | """
475 | minfo = get_model_info(db)
476 | _mid2model = {minfo[mid]["name"]: int(mid) for mid in minfo}
477 | return defaultdict(int, _mid2model)
478 |
479 |
480 | @lru_cache(CACHE_SIZE)
481 | def get_mid2sortfield(db: sqlite3.Connection) -> dict[int, int]:
482 | """Mapping of model ID to index of sort field."""
483 | if get_db_version(db) == 0:
484 | minfo = get_model_info(db)
485 | _mid2sortfield = {mid: minfo[mid]["sortf"] for mid in minfo}
486 | return defaultdict(int, _mid2sortfield)
487 | else:
488 | # fixme: Don't know how to retrieve sort field yet
489 | minfo = get_model_info(db)
490 | return {mid: 0 for mid in minfo}
491 |
492 |
493 | @lru_cache(CACHE_SIZE)
494 | def get_mid2fields(db: sqlite3.Connection) -> dict[int, list[str]]:
495 | """Get mapping of model ID to field names.
496 |
497 | Args:
498 | db: Database (:class:`sqlite3.Connection`)
499 |
500 | Returns:
501 | Dictionary mapping of model ID (mid) to list of field names.
502 | """
503 | if get_db_version(db) == 0:
504 | minfo = get_model_info(db)
505 | return {
506 | int(mid): [flds["name"] for flds in minfo[mid]["flds"]]
507 | for mid in minfo
508 | }
509 | elif get_db_version(db) == 1:
510 | finfo = read_info(db, "fields")
511 | mid2fields = {
512 | int(mid): [finfo[mid][ord]["name"] for ord in finfo[mid]]
513 | for mid in finfo
514 | }
515 | return mid2fields
516 | else:
517 | raise NotImplementedError
518 |
519 |
520 | @lru_cache(CACHE_SIZE)
521 | def get_mid2templateords(db: sqlite3.Connection) -> dict[int, list[int]]:
522 | """Get mapping of model ID to available templates ids
523 |
524 | Args:
525 | db:
526 |
527 | Returns:
528 |
529 | """
530 | if get_db_version(db) == 0:
531 | minfo = get_model_info(db)
532 | return {mid: [x["ord"] for x in minfo[mid]["tmpls"]] for mid in minfo}
533 | elif get_db_version(db) == 1:
534 | tinfo = read_info(db, "templates")
535 | return {int(mid): [int(x) for x in tinfo[mid]] for mid in tinfo}
536 | else:
537 | raise NotImplementedError
538 |
539 |
540 | @lru_cache(CACHE_SIZE)
541 | def get_cid2nid(db: sqlite3.Connection) -> dict[int, int]:
542 | """Mapping card ID to note ID.
543 |
544 | Args:
545 | db: Database (:class:`sqlite3.Connection`)
546 |
547 | Returns:
548 | Dictionary
549 | """
550 | cards = get_table(db, "cards")
551 | _cid2nid = dict(zip(cards["id"], cards["nid"]))
552 | return defaultdict(int, _cid2nid)
553 |
554 |
555 | @lru_cache(CACHE_SIZE)
556 | def get_cid2did(db: sqlite3.Connection) -> dict[int, int]:
557 | """Mapping card ID to deck ID.
558 |
559 | Args:
560 | db: Database (:class:`sqlite3.Connection`)
561 |
562 | Returns:
563 | Dictionary
564 | """
565 | cards = get_table(db, "cards")
566 | _cid2did = dict(zip(cards["id"], cards["did"]))
567 | return defaultdict(int, _cid2did)
568 |
569 |
570 | @lru_cache(CACHE_SIZE)
571 | def get_nid2mid(db: sqlite3.Connection) -> dict[int, int]:
572 | """Mapping note ID to model ID.
573 |
574 | Args:
575 | db: Database (:class:`sqlite3.Connection`)
576 |
577 | Returns:
578 | Dictionary
579 | """
580 | notes = get_table(db, "notes")
581 | _nid2mid = dict(zip(notes["id"], notes["mid"]))
582 | return defaultdict(int, _nid2mid)
583 |
--------------------------------------------------------------------------------
/ankipandas/test/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/klieret/AnkiPandas/0b17a1870711d4adc3e9fc82bff8aac986b09f5e/ankipandas/test/__init__.py
--------------------------------------------------------------------------------
/ankipandas/test/data/few_basic_cards/collection.anki2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/klieret/AnkiPandas/0b17a1870711d4adc3e9fc82bff8aac986b09f5e/ankipandas/test/data/few_basic_cards/collection.anki2
--------------------------------------------------------------------------------
/ankipandas/test/data/few_basic_cards/collection_v1.anki2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/klieret/AnkiPandas/0b17a1870711d4adc3e9fc82bff8aac986b09f5e/ankipandas/test/data/few_basic_cards/collection_v1.anki2
--------------------------------------------------------------------------------
/ankipandas/test/test_ankidf.py:
--------------------------------------------------------------------------------
1 | """ Most of the functionality of the AnkiDataFrames is already tested in
2 | test_core, because that saves to write a lot of duplicate code.
3 |
4 | Everything else is tested here.
5 | """
6 |
7 | from __future__ import annotations
8 |
9 | import copy
10 |
11 | # std
12 | import pathlib
13 | import unittest
14 |
15 | # 3rd
16 | import numpy as np
17 |
18 | import ankipandas._columns as _columns
19 | import ankipandas.raw as raw
20 | from ankipandas._columns import our_columns
21 |
22 | # ours
23 | from ankipandas.ankidf import AnkiDataFrame as AnkiDF
24 | from ankipandas.collection import Collection
25 | from ankipandas.util.log import set_debug_log_level
26 |
27 |
28 | class TestAnkiDF(unittest.TestCase):
29 | db_path = (
30 | pathlib.Path(__file__).parent
31 | / "data"
32 | / "few_basic_cards"
33 | / "collection.anki2"
34 | )
35 |
36 | def setUp(self):
37 | set_debug_log_level()
38 | self.db = raw.load_db(self.db_path)
39 |
40 | self.col = Collection(self.db_path)
41 |
42 | # Do not modify this one!
43 | self.notes = self.col.notes
44 | self.cards = self.col.cards
45 | self.revs = self.col.revs
46 | self.table2adf = {
47 | "notes": self.notes,
48 | "cards": self.cards,
49 | "revs": self.revs,
50 | }
51 | self.adfs = [self.notes, self.cards, self.revs]
52 |
53 | self.empty_notes = self.col.empty_notes()
54 | self.empty_cards = self.col.empty_cards()
55 | self.empty_revs = self.col.empty_revs()
56 |
57 | def nnotes(self):
58 | return self.notes.copy()
59 |
60 | def ncards(self):
61 | return self.cards.copy()
62 |
63 | def nrevs(self):
64 | return self.revs.copy()
65 |
66 | def nenotes(self):
67 | return self.empty_notes.copy()
68 |
69 | def necards(self):
70 | return self.empty_cards.copy()
71 |
72 | def nerevs(self):
73 | return self.empty_revs.copy()
74 |
75 | def ntable(self, table):
76 | if table == "notes":
77 | return self.nnotes()
78 | elif table == "cards":
79 | return self.ncards()
80 | elif table == "revs":
81 | return self.nrevs()
82 | else:
83 | raise ValueError("Unknown table")
84 |
85 | def ntable2adf(self):
86 | return {
87 | "notes": self.nnotes(),
88 | "cards": self.ncards(),
89 | "revs": self.nrevs(),
90 | }
91 |
92 | def nadfs(self):
93 | return [self.nnotes(), self.ncards(), self.nrevs()]
94 |
95 | # Test constructors
96 | # ==========================================================================
97 |
98 | def test_empty(self):
99 | eadfs = {
100 | "notes": self.col.empty_notes(),
101 | "cards": self.col.empty_cards(),
102 | "revs": self.col.empty_revs(),
103 | }
104 | for table, eadf in eadfs.items():
105 | self.assertEqual(len(eadf), 0)
106 | adf = self.table2adf[table]
107 | self.assertListEqual(sorted(adf.columns), sorted(eadf.columns))
108 |
109 | def test_tags(self):
110 | self.assertListEqual(
111 | list(self.notes.query("index==1555579337683")["ntags"].values)[0],
112 | ["other_test_tag"],
113 | )
114 | self.assertListEqual(
115 | list(self.notes.query("index==1555579352896")["ntags"].values)[0],
116 | ["some_test_tag"],
117 | )
118 |
119 | def test_cards(self):
120 | cards = self.cards
121 | self.assertGreater(len(cards), 11)
122 | self.assertEqual(sorted(cards.columns), sorted(our_columns["cards"]))
123 |
124 | def test_notes(self):
125 | notes = self.notes
126 | self.assertGreater(len(notes), 6)
127 | self.assertEqual(sorted(notes.columns), sorted(our_columns["notes"]))
128 |
129 | def test_get_revs(self):
130 | revs = self.revs
131 | self.assertEqual(sorted(revs.columns), sorted(our_columns["revs"]))
132 | self.assertGreater(len(revs), 4)
133 |
134 | # Test merging
135 | # ==========================================================================
136 |
137 | def test_merge_notes_cards(self):
138 | merged = self.ncards().merge_notes()
139 | self.assertListEqual(
140 | sorted(merged.columns),
141 | sorted(set(our_columns["cards"]) | set(our_columns["notes"])),
142 | )
143 |
144 | def test_merge_notes_revs(self):
145 | merged = self.nrevs().merge_notes()
146 | self.assertListEqual(
147 | sorted(merged.columns),
148 | sorted(
149 | # Note: 'nid' is not a notes column.
150 | set(our_columns["revs"])
151 | | set(our_columns["notes"])
152 | | {"nid"}
153 | ),
154 | )
155 |
156 | def test_merge_notes_raises(self):
157 | with self.assertRaises(ValueError):
158 | self.nnotes().merge_notes()
159 |
160 | def test_merge_cards(self):
161 | merged = self.nrevs().merge_cards()
162 | self.assertListEqual(
163 | sorted(merged.columns),
164 | sorted(set(our_columns["revs"]) | set(our_columns["cards"])),
165 | )
166 |
167 | def test_merge_cards_raises(self):
168 | with self.assertRaises(ValueError):
169 | self.ncards().merge_cards()
170 | with self.assertRaises(ValueError):
171 | self.nnotes().merge_cards()
172 |
173 | # Test properties
174 | # ==========================================================================
175 |
176 | # Trivial cases
177 | # --------------------------------------------------------------------------
178 |
179 | def test_nids_notes(self):
180 | self.assertListEqual(list(self.notes.index), list(self.notes.nid))
181 | self.assertListEqual(
182 | list(self.notes.index),
183 | list(raw.get_table(self.db, "notes")["id"].unique()),
184 | )
185 | self.assertEqual(len(self.notes.nid.unique()), len(self.notes.nid))
186 |
187 | def test_cids_cards(self):
188 | self.assertListEqual(list(self.cards.index), list(self.cards.cid))
189 | self.assertListEqual(
190 | list(self.cards.index),
191 | list(raw.get_table(self.db, "cards")["id"].unique()),
192 | )
193 | self.assertEqual(len(self.cards.cid.unique()), len(self.cards.cid))
194 |
195 | def test_rids_revs(self):
196 | self.assertListEqual(list(self.revs.index), list(self.revs.rid))
197 | self.assertListEqual(
198 | list(self.revs.index),
199 | list(raw.get_table(self.db, "revs")["id"].unique()),
200 | )
201 | self.assertEqual(len(self.revs.rid.unique()), len(self.revs.rid))
202 |
203 | # Slightly more elaborate cases
204 | # --------------------------------------------------------------------------
205 |
206 | def test_nids_cards(self):
207 | self.assertListEqual(
208 | sorted(list(self.cards.nid.unique())),
209 | sorted(list(self.notes.nid.unique())),
210 | )
211 |
212 | def test_nids_revs(self):
213 | self.assertTrue(
214 | set(self.notes.nid.unique()).issuperset(set(self.revs.nid.unique()))
215 | )
216 |
217 | def test_nids_nexist(self):
218 | nexist = AnkiDF()
219 | with self.assertRaises(ValueError):
220 | # noinspection PyStatementEffect
221 | nexist.nid
222 |
223 | def test_cids_revs(self):
224 | self.assertTrue(
225 | set(self.revs.cid.unique()).issubset(set(self.cards.cid.unique()))
226 | )
227 |
228 | def test_cids_notes(self):
229 | with self.assertRaises(ValueError):
230 | # noinspection PyStatementEffect
231 | self.notes.cid
232 |
233 | # --------------------------------------------------------------------------
234 |
235 | def test_mids(self):
236 | mids2s = {
237 | "notes": set(self.notes.mid),
238 | "cards": set(self.cards.mid),
239 | "revs": set(self.revs.mid),
240 | }
241 | mids = set(raw.get_mid2model(self.db))
242 | for table, mids2 in mids2s.items():
243 | with self.subTest(table=table):
244 | self.assertTrue(mids2.issubset(mids))
245 |
246 | def test_dids(self):
247 | did2s = {"cards": set(self.cards.did), "revs": set(self.revs.did)}
248 | dids = set(raw.get_did2deck(self.db))
249 | for table, dids2 in did2s.items():
250 | with self.subTest(table=table):
251 | self.assertTrue(dids2.issubset(dids))
252 |
253 | # ==========================================================================
254 |
255 | def test_fields_as_columns(self):
256 | notes = self.nnotes()
257 | notes = notes.fields_as_columns()
258 | cols = our_columns["notes"].copy()
259 | cols.remove("nflds")
260 | prefix = notes.fields_as_columns_prefix
261 | new_cols = [prefix + item for item in ["Front", "Back"]]
262 | self.assertEqual(sorted(notes.columns), sorted(cols + new_cols))
263 |
264 | def test_fields_as_columns_x2(self):
265 | notes = self.nnotes()
266 | notes = notes.fields_as_columns()
267 | notes2 = notes.fields_as_columns()
268 | self.assertTrue(notes.equals(notes2))
269 |
270 | def test_fields_as_list(self):
271 | # Add fields as column, remove original 'flds' column, then
272 | # add it back from the field columns and see if things still check
273 | # out
274 | notes = self.nnotes()
275 | flds = copy.copy(notes["nflds"].values)
276 | notes = notes.fields_as_columns().fields_as_list()
277 | self.assertEqual(list(flds), list(notes["nflds"].values))
278 | self.assertListEqual(
279 | sorted(notes.columns), sorted(our_columns["notes"])
280 | )
281 |
282 | def test_fields_as_list_x2(self):
283 | notes = self.nnotes()
284 | notes2 = notes.fields_as_list()
285 | self.assertTrue(notes.equals(notes2))
286 |
287 | # Convenience
288 | # ==========================================================================
289 |
290 | def test_list_decks(self):
291 | decks = self.cards.list_decks()
292 | self.assertTrue(set(decks).issuperset({"Testing", "EnglishGerman"}))
293 |
294 | def test_list_models(self):
295 | models = self.notes.list_models()
296 | self.assertTrue(
297 | set(models).issuperset({"Basic", "Basic (and reversed card)"})
298 | )
299 |
300 | # Properties
301 | # ==========================================================================
302 |
303 | def test_prop_nid(self):
304 | notes, cards, revs = self.nadfs()
305 | with self.assertRaises(ValueError):
306 | notes.nid = ""
307 | cards.nid = "a"
308 | revs.nid = "a"
309 | # noinspection PyUnresolvedReferences
310 | self.assertEqual(cards.nid.unique().tolist(), ["a"])
311 | # noinspection PyUnresolvedReferences
312 | self.assertEqual(revs.nid.unique().tolist(), ["a"])
313 |
314 | def test_prop_cid(self):
315 | notes, cards, revs = self.nadfs()
316 | with self.assertRaises(ValueError):
317 | cards.cid = ""
318 | with self.assertRaises(ValueError):
319 | notes.cid = ""
320 | revs.cid = "a"
321 | self.assertEqual(revs["cid"].unique().tolist(), ["a"])
322 |
323 | def test_prop_rid(self):
324 | notes, cards, revs = self.nadfs()
325 | with self.assertRaises(ValueError):
326 | revs.rid = ""
327 | with self.assertRaises(ValueError):
328 | cards.rid = ""
329 | with self.assertRaises(ValueError):
330 | notes.rid = ""
331 |
332 | # Tags
333 | # ==========================================================================
334 |
335 | def test_list_tags(self):
336 | tags = self.notes.list_tags()
337 | self.assertTrue(set(tags).issuperset(["adjective", "noun"]))
338 |
339 | def test_remove_tags(self):
340 | notes = self.nnotes()
341 | notes = notes.remove_tag(None)
342 | self.assertEqual(list({tuple(x) for x in notes["ntags"]}), [()])
343 |
344 | def test_add_tags(self):
345 | notes = self.nnotes().remove_tag(None).add_tag("1145")
346 | self.assertListEqual(
347 | list({tuple(x) for x in notes["ntags"]}), [("1145",)]
348 | )
349 | notes.add_tag("abc", inplace=True)
350 | self.assertListEqual(
351 | list({tuple(x) for x in notes["ntags"]}), [("1145", "abc")]
352 | )
353 | notes.add_tag(["abc", "def"], inplace=True)
354 | self.assertListEqual(
355 | list({tuple(x) for x in notes["ntags"]}), [("1145", "abc", "def")]
356 | )
357 | notes.add_tag([], inplace=True)
358 | self.assertListEqual(
359 | list({tuple(x) for x in notes["ntags"]}), [("1145", "abc", "def")]
360 | )
361 |
362 | def test_has_tag(self):
363 | notes = self.nnotes().remove_tag(None).add_tag("1145")
364 | self.assertListEqual(list(notes.has_tag("1145").unique()), [True])
365 | self.assertListEqual(list(notes.has_tag("asdf").unique()), [False])
366 | self.assertListEqual(list(notes.has_tag().unique()), [True])
367 | self.assertListEqual(
368 | list(notes.has_tag(["asdf", "1145"]).unique()), [True]
369 | )
370 |
371 | def test_has_tag_natural(self):
372 | notes = self.notes
373 | self.assertListEqual(
374 | sorted(list(notes.has_tag(["some_test_tag"]).unique())),
375 | [False, True],
376 | )
377 |
378 | def test_has_tags(self):
379 | notes = self.nnotes().remove_tag(None).add_tag("1145")
380 | self.assertListEqual(list(notes.has_tags("1145").unique()), [True])
381 | self.assertListEqual(list(notes.has_tags("asdf").unique()), [False])
382 | self.assertListEqual(list(notes.has_tags().unique()), [True])
383 | self.assertListEqual(
384 | list(notes.has_tags(["asdf", "1145"]).unique()), [False]
385 | )
386 | notes = notes.add_tag("asdf")
387 | self.assertListEqual(
388 | list(notes.has_tags(["asdf", "1145"]).unique()), [True]
389 | )
390 |
391 | def test_remove_tag(self):
392 | notes = self.nnotes().add_tag(["1145", "asdf"])
393 | notes.remove_tag("1145", inplace=True)
394 | self.assertListEqual(list(notes.has_tag(["1145"]).unique()), [False])
395 | self.assertListEqual(list(notes.has_tag(["asdf"]).unique()), [True])
396 |
397 | # Changes
398 | # ==========================================================================
399 |
400 | def test_show_modification_unchanged(self):
401 | for table in ["cards", "revs", "notes"]:
402 | with self.subTest(table=table):
403 | adf = self.table2adf[table]
404 | self.assertEqual(np.sum(~adf.was_modified()), len(adf))
405 | self.assertEqual(np.sum(~adf.was_added()), len(adf))
406 | self.assertEqual(len(adf.was_deleted()), 0)
407 | self.assertEqual(np.sum(~adf.was_modified(adf)), len(adf))
408 | self.assertEqual(np.sum(~adf.was_added(adf)), len(adf))
409 | self.assertEqual(len(adf.was_deleted(adf)), 0)
410 |
411 | def test_show_modification_empty(self):
412 | for table in ["cards", "revs", "notes", "notes_cols"]:
413 | with self.subTest(table=table):
414 | if table == "notes_cols":
415 | adf = self.ntable("notes")
416 | else:
417 | adf = self.ntable(table)
418 | adf_old = adf.copy()
419 | if table == "notes_cols":
420 | adf.fields_as_columns(inplace=True)
421 | adf["new_col"] = "blargh"
422 | n = len(adf)
423 | adf = adf.drop(adf.index)
424 | self.assertEqual(np.sum(~adf.was_modified()), 0)
425 | self.assertEqual(np.sum(~adf.was_added()), 0)
426 | self.assertEqual(len(adf.was_deleted()), n)
427 | self.assertEqual(np.sum(~adf.was_modified(adf_old)), 0)
428 | self.assertEqual(np.sum(~adf.was_added(adf_old)), 0)
429 | self.assertEqual(len(adf.was_deleted(adf_old)), n)
430 |
431 | def test_show_modification_all_modified(self):
432 | for table in ["cards", "revs", "notes", "notes_cols"]:
433 | with self.subTest(table=table):
434 | if table == "notes_cols":
435 | adf = self.ntable("notes")
436 | else:
437 | adf = self.ntable(table)
438 | adf_old = adf.copy()
439 | if table == "notes_cols":
440 | adf.fields_as_columns(inplace=True)
441 | adf[adf.columns[2]] = "changed!"
442 | self.assertEqual(np.sum(~adf.was_modified()), 0)
443 | self.assertEqual(np.sum(adf.was_added()), 0)
444 | self.assertEqual(len(adf.was_deleted()), 0)
445 | # ----
446 | self.assertEqual(len(adf.modified_columns(only=True)), len(adf))
447 | self.assertEqual(
448 | len(adf.modified_columns(only=False)), len(adf)
449 | )
450 | self.assertEqual(
451 | list(adf.modified_columns().loc[adf.index[0]]).index(True),
452 | 2,
453 | )
454 | # ----
455 | self.assertEqual(np.sum(~adf.was_modified(adf_old)), 0)
456 | self.assertEqual(np.sum(adf.was_added(adf_old)), 0)
457 | self.assertEqual(len(adf.was_deleted(adf_old)), 0)
458 | # ----
459 | self.assertEqual(
460 | len(adf.modified_columns(only=True, other=adf_old)),
461 | len(adf),
462 | )
463 | self.assertEqual(
464 | len(adf.modified_columns(only=False, other=adf_old)),
465 | len(adf),
466 | )
467 | self.assertEqual(
468 | list(
469 | adf.modified_columns(other=adf_old).loc[adf.index[0]]
470 | ).index(True),
471 | 2,
472 | )
473 |
474 | def test_show_modification_some_modified(self):
475 | for table in ["cards", "revs", "notes", "notes_cols"]:
476 | with self.subTest(table=table):
477 | if table == "notes_cols":
478 | adf = self.ntable("notes")
479 | else:
480 | adf = self.ntable(table)
481 | adf_old = adf.copy()
482 | if table == "notes_cols":
483 | adf.fields_as_columns(inplace=True)
484 | adf.loc[adf.index[0], [adf.columns[2]]] = "changed!"
485 | self.assertEqual(np.sum(adf.was_modified()), 1)
486 | self.assertEqual(adf.was_modified().tolist()[0], True)
487 | self.assertEqual(np.sum(adf.was_added()), 0)
488 | self.assertEqual(len(adf.was_deleted()), 0)
489 | # ----
490 | self.assertEqual(len(adf.modified_columns(only=True)), 1)
491 | self.assertEqual(
492 | len(adf.modified_columns(only=False)), len(adf)
493 | )
494 | self.assertEqual(
495 | list(adf.modified_columns().loc[adf.index[0]]).index(True),
496 | 2,
497 | )
498 | # ----
499 | self.assertEqual(np.sum(adf.was_modified(adf_old)), 1)
500 | self.assertEqual(adf.was_modified(adf_old).tolist()[0], True)
501 | self.assertEqual(np.sum(adf.was_added(adf_old)), 0)
502 | self.assertEqual(len(adf.was_deleted(adf_old)), 0)
503 | # ----
504 | self.assertEqual(
505 | len(adf.modified_columns(only=True, other=adf_old)), 1
506 | )
507 | self.assertEqual(
508 | len(adf.modified_columns(only=False, other=adf_old)),
509 | len(adf),
510 | )
511 | self.assertEqual(
512 | list(
513 | adf.modified_columns(other=adf_old).loc[adf.index[0]]
514 | ).index(True),
515 | 2,
516 | )
517 |
518 | # Formats
519 | # ==========================================================================
520 |
521 | def test_reformat_trivial(self):
522 | for table in ["notes", "revs", "cards"]:
523 | with self.subTest(table=table):
524 | adf = self.ntable(table)
525 | adf2 = adf.normalize()
526 | self.assertTrue(adf.equals(adf2))
527 |
528 | def test_convert_raw_load_raw(self):
529 | for table in ["notes", "revs", "cards"]:
530 | with self.subTest(table=table):
531 | adf = self.ntable(table).raw()
532 | df = raw.get_table(self.db, table)
533 | if table == "notes":
534 | df["tags"] = df["tags"].str.strip()
535 | self.assertTrue(adf.equals(df))
536 |
537 | def test_raw_normalize(self):
538 | for table in ["notes", "revs", "cards"]:
539 | with self.subTest(table=table):
540 | adf = self.ntable(table)
541 | adf2 = adf.raw().normalize()
542 | self.assertTrue(adf.equals(adf2))
543 |
544 | # Update modification stamps
545 | # ==========================================================================
546 |
547 | def test_set_usn(self):
548 | for table in ["notes", "revs", "cards"]:
549 | with self.subTest(table=table):
550 | adf = self.ntable(table)
551 | print(adf.columns)
552 | adf[table[0] + "usn"] = 999
553 | adf_old = adf.copy()
554 | adf.loc[adf.index[0], adf_old.columns[0]] = "definitely changed"
555 | adf._set_usn()
556 | self.assertEqual(
557 | adf.loc[
558 | adf.index[0], _columns.columns_anki2ours[table]["usn"]
559 | ],
560 | -1,
561 | )
562 |
563 | def test_set_mod(self):
564 | for table in ["notes", "cards"]:
565 | with self.subTest(table=table):
566 | adf = self.ntable(table)
567 | adf_old = adf.copy()
568 | adf.loc[adf.index[0], adf.columns[0]] = "definitely changed"
569 | adf._set_mod()
570 | val1 = adf.loc[
571 | adf.index[0], _columns.columns_anki2ours[table]["mod"]
572 | ]
573 | val_rest_1 = adf.loc[
574 | adf.index[1:], _columns.columns_anki2ours[table]["mod"]
575 | ]
576 | val2 = adf_old.loc[
577 | adf.index[0], _columns.columns_anki2ours[table]["mod"]
578 | ]
579 | val_rest_2 = adf.loc[
580 | adf.index[1:], _columns.columns_anki2ours[table]["mod"]
581 | ]
582 | self.assertFalse(val1 == val2)
583 | self.assertListEqual(list(val_rest_1), list(val_rest_2))
584 |
585 | # New
586 | # ==========================================================================
587 |
588 | # Add cards
589 | # --------------------------------------------------------------------------
590 |
591 | @staticmethod
592 | def _cards_dict(card):
593 | return dict(
594 | nid=card["nid"],
595 | cdeck=card["cdeck"],
596 | cord=card["cord"],
597 | cmod=card["cmod"],
598 | cusn=card["cusn"],
599 | cqueue=card["cqueue"],
600 | ctype=card["ctype"],
601 | civl=card["civl"],
602 | cfactor=card["cfactor"],
603 | clapses=card["clapses"],
604 | cleft=card["cleft"],
605 | cdue=card["cdue"],
606 | )
607 |
608 | def _test_new_card_default_values(self, cards, **kwargs):
609 | self.assertEqual(cards["cusn"].unique().tolist(), [-1])
610 | self.assertEqual(cards["cqueue"].unique().tolist(), ["new"])
611 | self.assertEqual(cards["ctype"].unique().tolist(), ["learning"])
612 | self.assertEqual(cards["civl"].unique().tolist(), [0])
613 | self.assertEqual(cards["cfactor"].unique().tolist(), [0])
614 | self.assertEqual(cards["creps"].unique().tolist(), [0])
615 | self.assertEqual(cards["cleft"].unique().tolist(), [0])
616 | for key, value in kwargs.items():
617 | self.assertEqual(cards[key].unique().tolist(), [value])
618 |
619 | def test_new_cards_default_values(self):
620 | empty = self.necards()
621 |
622 | nid1 = 1555579352896
623 | nid2 = 1557223191575
624 | nids = [nid1, nid2]
625 | deck = list(raw.get_did2deck(self.db).values())[0]
626 |
627 | kwargs = dict(cdeck=deck)
628 |
629 | with self.subTest(type="default values single note"):
630 | self._test_new_card_default_values(
631 | empty.add_card(nid1, deck), **kwargs
632 | )
633 | with self.subTest(type="default values single card"):
634 | self._test_new_card_default_values(
635 | empty.add_card(nid1, deck, cord=0), **kwargs, cord=0
636 | )
637 | with self.subTest(type="default values several notes"):
638 | self._test_new_card_default_values(
639 | empty.add_cards(nids, deck), **kwargs
640 | )
641 | with self.subTest(type="default values several notes one cord"):
642 | self._test_new_card_default_values(
643 | empty.add_cards(nids, deck, cord=0), **kwargs, cord=0
644 | )
645 |
646 | def test_new_cards_raises_missing_nid(self):
647 | empty = self.necards()
648 | nids = [1555579352896, -15, -16]
649 | with self.assertRaises(ValueError) as context:
650 | empty.add_cards(nids, "Default")
651 | self.assertTrue("-15" in str(context.exception))
652 | self.assertTrue("-16" in str(context.exception))
653 | self.assertFalse("1555579352896" in str(context.exception))
654 |
655 | def test_new_cards_raises_inconsistent_model(self):
656 | empty = self.necards()
657 | nids = [1555579352896, 1555579337683]
658 | with self.assertRaises(ValueError) as context:
659 | empty.add_cards(nids, "Default")
660 | self.assertTrue("for notes of the same model" in str(context.exception))
661 |
662 | def test_new_cards_raises_missing_deck(self):
663 | empty = self.necards()
664 | nids = [1555579352896]
665 | deck = "not existing for sure"
666 | with self.assertRaises(ValueError) as context:
667 | empty.add_cards(nids, deck)
668 | self.assertTrue(deck in str(context.exception))
669 |
670 | def test_new_cards_raises_due_default_not_new(self):
671 | empty = self.necards()
672 | nids = [1555579352896]
673 | deck = list(raw.get_did2deck(self.db).values())[0]
674 | with self.assertRaises(ValueError) as context:
675 | empty.add_cards(nids, deck, cqueue="learning")
676 | self.assertTrue("Due date can only be set" in str(context.exception))
677 |
678 | def test_new_card_fully_specified(self):
679 | empty = self.necards()
680 | empty_combined = self.necards()
681 |
682 | # Careful: Need notes of the same model!
683 | nid1 = 1555579352896
684 | nid2 = 1557223191575
685 | deck1 = list(raw.get_did2deck(self.db).values())[0]
686 | deck2 = list(raw.get_did2deck(self.db).values())[1]
687 |
688 | init_dict1 = dict(
689 | nid=nid1,
690 | cdeck=deck1,
691 | cord=0,
692 | cmod=123,
693 | cusn=5,
694 | cqueue="learning",
695 | ctype="relearn",
696 | civl=5,
697 | cfactor=17,
698 | clapses=89,
699 | cleft=15,
700 | cdue=178,
701 | )
702 | init_dict2 = dict(
703 | nid=nid2,
704 | cdeck=deck2,
705 | cord=0,
706 | cmod=1123,
707 | cusn=15,
708 | cqueue="due",
709 | ctype="review",
710 | civl=15,
711 | cfactor=117,
712 | clapses=189,
713 | cleft=115,
714 | cdue=1178,
715 | )
716 | init_dict_combined = dict(
717 | nid=[nid1, nid2],
718 | cdeck=[deck1, deck2],
719 | cord=0,
720 | cmod=[123, 1123],
721 | cusn=[5, 15],
722 | cqueue=["learning", "due"],
723 | ctype=["relearn", "review"],
724 | civl=[5, 15],
725 | cfactor=[17, 117],
726 | clapses=[89, 189],
727 | cleft=[15, 115],
728 | cdue=[178, 1178],
729 | )
730 |
731 | cid1 = empty.add_card(**init_dict1, inplace=True)[0]
732 | card1 = empty.loc[cid1]
733 | cid2 = empty.add_card(**init_dict2, inplace=True)[0]
734 | card2 = empty.loc[cid2]
735 |
736 | cid1, cid2 = empty_combined.add_cards(
737 | **init_dict_combined, inplace=True
738 | )
739 | card1c = empty_combined.loc[cid1]
740 | card2c = empty_combined.loc[cid2]
741 |
742 | self.assertDictEqual(init_dict1, self._cards_dict(card1))
743 | self.assertDictEqual(init_dict2, self._cards_dict(card2))
744 | self.assertDictEqual(init_dict1, self._cards_dict(card1c))
745 | self.assertDictEqual(init_dict2, self._cards_dict(card2c))
746 |
747 | self.assertEqual(len(empty), 2)
748 | self.assertEqual(len(empty_combined), 2)
749 |
750 | def test_new_cards_vs_new_card(self):
751 | # Also done in test_new_card_fully_specified
752 |
753 | empty = self.necards()
754 | empty2 = self.necards()
755 |
756 | nid = list(raw.get_nid2mid(self.db))[0]
757 | deck = list(raw.get_did2deck(self.db).values())[0]
758 |
759 | init_dict2 = dict(
760 | nid=[nid],
761 | cdeck=deck,
762 | cord=0,
763 | cmod=123,
764 | cusn=5,
765 | cqueue="learning",
766 | ctype="relearn",
767 | civl=5,
768 | cfactor=17,
769 | clapses=89,
770 | cleft=15,
771 | cdue=178,
772 | )
773 | init_dict1 = copy.deepcopy(init_dict2)
774 | init_dict1["nid"] = nid
775 |
776 | cids = empty2.add_cards(**init_dict2, inplace=True)
777 | card2 = empty2.loc[cids[0]]
778 |
779 | cid = empty.add_card(**init_dict1, inplace=True)[0]
780 | card1 = empty.loc[cid]
781 |
782 | self.assertDictEqual(self._cards_dict(card2), self._cards_dict(card1))
783 |
784 | # Add notes
785 | # --------------------------------------------------------------------------
786 |
787 | def test_new_notes_raises_inconsistent(self):
788 | with self.assertRaises(ValueError):
789 | self.nnotes().add_notes("Basic", [["1", "2"]], ntags=[["1"], ["2"]])
790 | with self.assertRaises(ValueError):
791 | self.nnotes().add_notes("Basic", [["1", "2"]], nid=[123, 124])
792 | with self.assertRaises(ValueError):
793 | self.nnotes().add_notes("Basic", [["1", "2"]], nguid=[123, 124])
794 |
795 | def test_new_notes_raises_nid_clash(self):
796 | with self.assertRaises(ValueError):
797 | self.nnotes().add_note("Basic", ["11", "12"], nid=10).add_note(
798 | "Basic", ["21", "22"], nid=10
799 | )
800 | with self.assertRaises(ValueError):
801 | self.nnotes().add_notes(
802 | "Basic", [["11", "12"], ["22", "22"]], nid=[10, 10]
803 | )
804 |
805 | def test_new_notes_raises_nguid_clash(self):
806 | with self.assertRaises(ValueError):
807 | self.nnotes().add_notes(
808 | "Basic", [["11", "12"], ["21", "22"]], nguid=[10, 10]
809 | )
810 | with self.assertRaises(ValueError):
811 | self.nnotes().add_note("Basic", ["11", "12"], nguid=10).add_note(
812 | "Basic", ["21", "22"], nguid=10
813 | )
814 |
815 | def test_new_notes_fields_as_columns(self):
816 | empty = self.nenotes()
817 | empty.add_notes(
818 | "Basic",
819 | [["field1", "field2"], ["field21", "field22"]],
820 | ntags=[["tag1", "tag2"], ["tag21", "tag22"]],
821 | nguid=["cryptic", "cryptic2"],
822 | nmod=[124, 1235],
823 | nusn=[42, 17],
824 | nid=[123, 125],
825 | inplace=True,
826 | )
827 |
828 | empty2 = self.nenotes().fields_as_columns()
829 | empty2.add_notes(
830 | "Basic",
831 | [["field1", "field2"], ["field21", "field22"]],
832 | ntags=[["tag1", "tag2"], ["tag21", "tag22"]],
833 | nguid=["cryptic", "cryptic2"],
834 | nmod=[124, 1235],
835 | nusn=[42, 17],
836 | nid=[123, 125],
837 | inplace=True,
838 | )
839 |
840 | self.assertDictEqual(
841 | empty.fields_as_columns().to_dict(), empty2.to_dict()
842 | )
843 |
844 | @staticmethod
845 | def _notes_dict(notes):
846 | return {
847 | "nmodel": notes["nmodel"],
848 | "nflds": notes["nflds"],
849 | "ntags": notes["ntags"],
850 | "nguid": notes["nguid"],
851 | "nmod": notes["nmod"],
852 | "nusn": notes["nusn"],
853 | }
854 |
855 | def test_new_note_empty_fully_specified(self):
856 | empty = self.nenotes()
857 |
858 | init_dict = dict(
859 | nmodel="Basic",
860 | nflds=["field1", "field2"],
861 | ntags=["tag1", "tag2"],
862 | nguid="cryptic",
863 | nmod=124,
864 | nusn=42,
865 | )
866 | nid = empty.add_note(nid=123, **init_dict, inplace=True)
867 | self.assertEqual(nid, 123)
868 | note = empty.loc[nid]
869 | self.assertDictEqual(init_dict, self._notes_dict(note))
870 | self.assertEqual(len(empty), 1)
871 |
872 | init_dict2 = dict(
873 | nmodel="Basic",
874 | nflds=["field21", "field22"],
875 | ntags=["tag21", "tag22"],
876 | nguid="cryptic2",
877 | nmod=1235,
878 | nusn=17,
879 | )
880 | nid = empty.add_note(nid=125, **init_dict2, inplace=True)
881 | self.assertEqual(nid, 125)
882 | note = empty.loc[125]
883 | self.assertDictEqual(init_dict2, self._notes_dict(note))
884 | self.assertEqual(len(empty), 2)
885 |
886 | empty2 = self.nenotes()
887 | empty2.add_notes(
888 | "Basic",
889 | [["field1", "field2"], ["field21", "field22"]],
890 | ntags=[["tag1", "tag2"], ["tag21", "tag22"]],
891 | nguid=["cryptic", "cryptic2"],
892 | nmod=[124, 1235],
893 | nusn=[42, 17],
894 | nid=[123, 125],
895 | inplace=True,
896 | )
897 | self.assertTrue(empty.equals(empty2))
898 |
899 | def test_new_note_raises_suplicate(self):
900 | empty = self.nenotes()
901 | empty.add_note("Basic", ["f1", "f2"], nid=10, inplace=True)
902 | self.assertEqual(len(empty), 1)
903 | with self.assertRaises(ValueError):
904 | empty.add_note("Basic", ["f3", "f4"], nid=10, inplace=True)
905 |
906 | def test_new_note_default_values(self):
907 | empty = self.nenotes()
908 |
909 | init_dict = dict(nmodel="Basic", nflds=["field1", "field2"])
910 | nid = empty.add_note(nid=123, **init_dict, inplace=True)
911 | self.assertEqual(nid, 123)
912 | note = empty.loc[nid].to_dict()
913 | self.assertEqual(note["nmodel"], init_dict["nmodel"])
914 | self.assertEqual(note["nflds"], init_dict["nflds"])
915 |
916 | def test_new_note_raises(self):
917 | empty = self.nenotes()
918 | with self.assertRaises(ValueError):
919 | empty.add_note("doesntexist", [])
920 | with self.assertRaises(ValueError):
921 | empty.add_note("Basic", ["1", "2", "3"])
922 |
923 | def test_new_notes_equivalent_field_specifications(self):
924 | empty1 = self.nenotes()
925 | empty2 = self.nenotes()
926 | empty3 = self.nenotes()
927 |
928 | empty1.add_notes("Basic", [["11", "12"], ["21", "22"]], inplace=True)
929 | empty2.add_notes(
930 | "Basic",
931 | [{"Front": "11", "Back": "12"}, {"Front": "21", "Back": "22"}],
932 | inplace=True,
933 | )
934 | empty3.add_notes(
935 | "Basic", {"Front": ["11", "21"], "Back": ["12", "22"]}, inplace=True
936 | )
937 | self.assertListEqual(empty1["nflds"].tolist(), empty2["nflds"].tolist())
938 | self.assertListEqual(empty2["nflds"].tolist(), empty3["nflds"].tolist())
939 |
940 | def test_new_notes_equivalent_field_specifications_fields_as_columns(self):
941 | empty1 = self.nenotes().fields_as_columns()
942 | empty2 = self.nenotes().fields_as_columns()
943 | empty3 = self.nenotes().fields_as_columns()
944 |
945 | empty1.add_notes("Basic", [["11", "12"], ["21", "22"]], inplace=True)
946 | empty2.add_notes(
947 | "Basic",
948 | [{"Front": "11", "Back": "12"}, {"Front": "21", "Back": "22"}],
949 | inplace=True,
950 | )
951 | empty3.add_notes(
952 | "Basic", {"Front": ["11", "21"], "Back": ["12", "22"]}, inplace=True
953 | )
954 |
955 | p = empty1.fields_as_columns_prefix
956 |
957 | self.assertListEqual(
958 | empty1[p + "Front"].tolist(), empty2[p + "Front"].tolist()
959 | )
960 | self.assertListEqual(
961 | empty2[p + "Front"].tolist(), empty3[p + "Front"].tolist()
962 | )
963 | self.assertListEqual(
964 | empty1[p + "Back"].tolist(), empty2[p + "Back"].tolist()
965 | )
966 | self.assertListEqual(
967 | empty2[p + "Back"].tolist(), empty3[p + "Back"].tolist()
968 | )
969 |
970 | # Help
971 | # ==========================================================================
972 |
973 | def test_help_col(self):
974 | for table, adf in self.table2adf.items():
975 | with self.subTest(table=table):
976 | cols = list(adf.columns) + [adf.index.name]
977 | for col in cols:
978 | self.assertIsInstance(adf.help_col(col, ret=True), str)
979 |
980 | def test_help_cols_auto(self):
981 | for table, adf in self.table2adf.items():
982 | with self.subTest(table=table):
983 | df = adf.help_cols()
984 | self.assertListEqual(
985 | list(df.columns),
986 | ["AnkiColumn", "Table", "Description", "Native", "Default"],
987 | )
988 | self.assertListEqual(
989 | sorted(adf.columns),
990 | sorted(set(df.index)), # nid, cid appear twice
991 | )
992 |
993 | def test_help(self):
994 | notes = self.notes
995 | hlp = notes.help(ret=True)
996 | self.assertTrue(isinstance(hlp, str))
997 |
998 |
999 | class TestAnkiDFv1(TestAnkiDF):
1000 | db_path = (
1001 | pathlib.Path(__file__).parent
1002 | / "data"
1003 | / "few_basic_cards"
1004 | / "collection_v1.anki2"
1005 | )
1006 |
1007 |
1008 | if __name__ == "__main__":
1009 | unittest.main()
1010 |
--------------------------------------------------------------------------------
/ankipandas/test/test_collection.py:
--------------------------------------------------------------------------------
1 | # std
2 | from __future__ import annotations
3 |
4 | import pathlib
5 | import shutil
6 |
7 | # 3rd
8 | import pytest
9 |
10 | # ours
11 | from ankipandas.collection import Collection
12 | from ankipandas.test.util import parameterized_paths
13 |
14 |
15 | def _init_all_tables(col: Collection) -> None:
16 | """Access all attributes at least once to ensure that they are
17 | initialized.
18 | """
19 | _ = col.notes
20 | _ = col.cards
21 | _ = col.revs
22 |
23 |
24 | # Summarize changes
25 | # ==========================================================================
26 |
27 |
28 | @parameterized_paths()
29 | def test_summarize_changes_uninitialized(db_path):
30 | col = Collection(db_path)
31 | sc = col.summarize_changes(output="dict")
32 | assert len(sc) == 0
33 |
34 |
35 | @parameterized_paths()
36 | def test_summarize_changes_no_changes(db_path):
37 | col = Collection(db_path)
38 | _init_all_tables(col)
39 | col.summarize_changes()
40 | sc = col.summarize_changes(output="dict")
41 | for item in ["cards", "revs", "notes"]:
42 | assert sc[item]["n_modified"] == 0
43 | assert sc[item]["n_added"] == 0
44 | assert sc[item]["n_deleted"] == 0
45 | assert not sc[item]["has_changed"]
46 |
47 |
48 | @parameterized_paths()
49 | def test_summarize_notes_changed(db_path):
50 | col = Collection(db_path)
51 | col.notes.add_tag("this_will_be_modified", inplace=True)
52 | sc = col.summarize_changes(output="dict")
53 | assert sc["notes"]["n_modified"] == sc["notes"]["n"]
54 |
55 |
56 | # Writing
57 | # ==========================================================================
58 |
59 |
60 | @parameterized_paths()
61 | def test_read_write_identical_trivial(db_path, tmpdir):
62 | db_path = shutil.copy2(str(db_path), str(tmpdir))
63 | (pathlib.Path(str(tmpdir)) / "backups").mkdir()
64 | col = Collection(db_path)
65 | _init_all_tables(col)
66 | col.write(modify=True, delete=True, add=True, _override_exception=True)
67 | col_rel = Collection(db_path)
68 | assert col.notes.equals(col_rel.notes)
69 | assert col.cards.equals(col_rel.cards)
70 | assert col.revs.equals(col_rel.revs)
71 |
72 |
73 | @parameterized_paths()
74 | def test_write_raises_delete(db_path, tmpdir):
75 | db_path = shutil.copy2(str(db_path), str(tmpdir))
76 | (pathlib.Path(str(tmpdir)) / "backups").mkdir()
77 | col = Collection(db_path)
78 | _init_all_tables(col)
79 | col.notes.drop(col.notes.index, inplace=True)
80 | cases = [
81 | dict(modify=False, add=True),
82 | dict(modify=True, add=False),
83 | dict(modify=True, add=True),
84 | ]
85 | for case in cases:
86 | with pytest.raises(ValueError, match=".*would be deleted.*"):
87 | col.write(**case, delete=False, _override_exception=True)
88 |
89 |
90 | @parameterized_paths()
91 | def test_write_raises_modified(db_path, tmpdir):
92 | db_path = shutil.copy2(str(db_path), str(tmpdir))
93 | (pathlib.Path(str(tmpdir)) / "backups").mkdir()
94 | col = Collection(db_path)
95 | _init_all_tables(col)
96 | col.notes.add_tag("test", inplace=True)
97 | cases = [
98 | dict(add=False, delete=True),
99 | dict(add=True, delete=False),
100 | dict(add=True, delete=True),
101 | ]
102 | for case in cases:
103 | with pytest.raises(ValueError, match=".*would be modified.*"):
104 | col.write(**case, modify=False, _override_exception=True)
105 |
106 |
107 | @parameterized_paths()
108 | def test_write_raises_added(db_path, tmpdir):
109 | db_path = shutil.copy2(str(db_path), str(tmpdir))
110 | (pathlib.Path(str(tmpdir)) / "backups").mkdir()
111 | col = Collection(db_path)
112 | _init_all_tables(col)
113 | col.notes.add_note("Basic", ["test", "back"], inplace=True)
114 | cases = [
115 | dict(modify=False, delete=True),
116 | dict(modify=True, delete=False),
117 | dict(modify=True, delete=True),
118 | ]
119 | for case in cases:
120 | with pytest.raises(ValueError, match=".*would be modified.*"):
121 | col.write(**case, add=False, _override_exception=True)
122 |
123 |
124 | @parameterized_paths()
125 | def test_write_added(db_path, tmpdir):
126 | db_path = shutil.copy2(str(db_path), str(tmpdir))
127 | (pathlib.Path(str(tmpdir)) / "backups").mkdir()
128 | col = Collection(db_path)
129 | _init_all_tables(col)
130 | col.notes.add_note("Basic", ["test", "back"], inplace=True)
131 | col.write(add=True, _override_exception=True)
132 |
--------------------------------------------------------------------------------
/ankipandas/test/test_paths.py:
--------------------------------------------------------------------------------
1 | # std
2 | from __future__ import annotations
3 |
4 | import collections
5 | import tempfile
6 | import unittest
7 | from pathlib import Path
8 |
9 | # 3rd
10 | from randomfiletree import iterative_gaussian_tree, sample_random_elements
11 |
12 | # ours
13 | import ankipandas.paths as paths
14 | from ankipandas.util.log import set_debug_log_level
15 | from ankipandas.util.misc import flatten_list_list
16 |
17 |
18 | def touch_file_in_random_folders(basedir, filename: str, n=1) -> list[Path]:
19 | """Create files in random folders.
20 |
21 | Args:
22 | basedir: Starting directory
23 | filename: Filename of the files to create
24 | n: Number of files to create
25 |
26 | Returns:
27 | List of files that were created.
28 | """
29 | files = set()
30 | for d in sample_random_elements(
31 | basedir, n_dirs=n, n_files=0, onfail="ignore"
32 | )[0]:
33 | p = Path(d) / filename
34 | p.touch()
35 | files.add(p)
36 | return list(files)
37 |
38 |
39 | class TestFindDatabase(unittest.TestCase):
40 | def setUp(self):
41 | set_debug_log_level()
42 | self.dirs = {
43 | "nothing": tempfile.TemporaryDirectory(),
44 | "multiple": tempfile.TemporaryDirectory(),
45 | "perfect": tempfile.TemporaryDirectory(),
46 | }
47 | for d in self.dirs.values():
48 | iterative_gaussian_tree(
49 | d.name,
50 | repeat=5,
51 | nfolders=3,
52 | min_folders=1,
53 | nfiles=2,
54 | min_files=1,
55 | maxdepth=4,
56 | )
57 | self.dbs = {
58 | "nothing": [],
59 | "multiple": touch_file_in_random_folders(
60 | self.dirs["multiple"].name, "collection.anki2", 10
61 | ),
62 | "perfect": touch_file_in_random_folders(
63 | self.dirs["perfect"].name, "collection.anki2", 1
64 | ),
65 | }
66 | self.maxDiff = None
67 |
68 | def test_db_path_input_nexist(self):
69 | with self.assertRaises(FileNotFoundError):
70 | paths.db_path_input("/x/y/z")
71 |
72 | def test_db_path_input_multiple(self):
73 | with self.assertRaises(ValueError):
74 | paths.db_path_input(self.dirs["multiple"].name)
75 |
76 | def test_db_path_input_nothing(self):
77 | with self.assertRaises(ValueError):
78 | paths.db_path_input(self.dirs["nothing"].name)
79 |
80 | def test_db_path_input_perfect(self):
81 | self.assertEqual(
82 | paths.db_path_input(self.dirs["perfect"].name),
83 | self.dbs["perfect"][0],
84 | )
85 |
86 | def test__find_database(self):
87 | for d in self.dirs:
88 | a = sorted(
89 | map(
90 | str,
91 | flatten_list_list(
92 | paths._find_db(
93 | self.dirs[d].name,
94 | maxdepth=None,
95 | break_on_first=False,
96 | ).values()
97 | ),
98 | )
99 | )
100 | b = sorted(str(x) for x in self.dbs[d])
101 | self.assertListEqual(a, b)
102 |
103 | def test__find_database_filename(self):
104 | # If doesn't exist
105 | self.assertEqual(
106 | paths._find_db(
107 | Path("abc/myfilename.txt"), filename="myfilename.txt"
108 | ),
109 | {},
110 | )
111 | tmpdir = tempfile.TemporaryDirectory()
112 | dir_path = Path(tmpdir.name) / "myfolder"
113 | file_path = dir_path / "myfilename.txt"
114 | dir_path.mkdir()
115 | file_path.touch()
116 | self.assertEqual(
117 | paths._find_db(file_path, filename="myfilename.txt"),
118 | collections.defaultdict(list, {"myfolder": [file_path]}),
119 | )
120 | tmpdir.cleanup()
121 |
122 | def test_find_database(self):
123 | with self.assertRaises(ValueError):
124 | paths.find_db(self.dirs["nothing"].name, break_on_first=False)
125 | with self.assertRaises(ValueError):
126 | paths.find_db(self.dirs["multiple"].name, break_on_first=False)
127 | print(self.dbs["multiple"])
128 | self.assertEqual(
129 | str(paths.find_db(self.dirs["perfect"].name, break_on_first=False)),
130 | str(self.dbs["perfect"][0]),
131 | )
132 |
133 | def tearDown(self):
134 | for d in self.dirs.values():
135 | d.cleanup()
136 |
137 |
138 | class TestBackup(unittest.TestCase):
139 | def setUp(self):
140 | set_debug_log_level()
141 | self.tmpdir = tempfile.TemporaryDirectory()
142 | self.tmpdir_path = Path(self.tmpdir.name)
143 | (self.tmpdir_path / "collection.anki2").touch()
144 | (self.tmpdir_path / "backups").mkdir()
145 |
146 | self.tmpdir_only_db = tempfile.TemporaryDirectory()
147 | self.tmpdir_only_db_path = Path(self.tmpdir_only_db.name)
148 | (self.tmpdir_only_db_path / "collection.anki2").touch()
149 |
150 | def tearDown(self):
151 | self.tmpdir.cleanup()
152 | self.tmpdir_only_db.cleanup()
153 |
154 | def test_get_anki_backup_folder(self):
155 | self.assertEqual(
156 | str(
157 | paths.get_anki_backup_folder(
158 | self.tmpdir_path / "collection.anki2"
159 | )
160 | ),
161 | str(self.tmpdir_path / "backups"),
162 | )
163 |
164 | def test_get_anki_backup_folder_raise(self):
165 | with self.assertRaises(FileNotFoundError):
166 | paths.get_anki_backup_folder(self.tmpdir_path / "asdf")
167 | with self.assertRaises(ValueError):
168 | paths.get_anki_backup_folder(
169 | self.tmpdir_only_db_path / "collection.anki2"
170 | )
171 | paths.get_anki_backup_folder(
172 | self.tmpdir_only_db_path / "collection.anki2", nexist="ignore"
173 | )
174 |
175 | def test_backup_db_auto(self):
176 | with tempfile.TemporaryDirectory() as tmpdir:
177 | db_path = Path(tmpdir) / "collection.anki2"
178 | db_path.touch()
179 | backup_folder = db_path.parent / "backups"
180 | backup_folder.mkdir()
181 | backup_path = paths.backup_db(db_path)
182 | self.assertTrue(backup_path.is_file())
183 | self.assertTrue(backup_path.parent == backup_folder)
184 |
185 | def test_backup_db_custom(self):
186 | with tempfile.TemporaryDirectory() as tmpdir:
187 | db_path = Path(tmpdir) / "collection.anki2"
188 | db_path.touch()
189 | backup_folder = db_path.parent / "myfolder"
190 | backup_path = paths.backup_db(db_path, backup_folder=backup_folder)
191 | self.assertTrue(backup_path.is_file())
192 | self.assertTrue(backup_path.parent == backup_folder)
193 |
194 |
195 | if __name__ == "__main__":
196 | unittest.main()
197 |
--------------------------------------------------------------------------------
/ankipandas/test/test_raw.py:
--------------------------------------------------------------------------------
1 | # std
2 | from __future__ import annotations
3 |
4 | import copy
5 | import pathlib
6 | import shutil
7 | import tempfile
8 | import unittest
9 |
10 | # 3rd
11 | import pandas as pd
12 |
13 | # ours
14 | from ankipandas.raw import (
15 | close_db,
16 | get_db_version,
17 | get_deck_info,
18 | get_did2deck,
19 | get_info,
20 | get_mid2fields,
21 | get_mid2model,
22 | get_model_info,
23 | get_table,
24 | load_db,
25 | set_info,
26 | set_table,
27 | )
28 | from ankipandas.util.dataframe import merge_dfs
29 | from ankipandas.util.log import set_debug_log_level
30 |
31 |
32 | class TestRawRead(unittest.TestCase):
33 | def setUp(self):
34 | set_debug_log_level()
35 | self.db_folder = (
36 | pathlib.Path(__file__).parent / "data" / "few_basic_cards"
37 | )
38 | self.version2db = {
39 | 0: load_db(self.db_folder / "collection.anki2"),
40 | 1: load_db(self.db_folder / "collection_v1.anki2"),
41 | }
42 |
43 | def test_get_db_version(self):
44 | for version in [0, 1]:
45 | with self.subTest(version=version):
46 | assert get_db_version(self.version2db[version]) == version
47 |
48 | def tearDown(self):
49 | for db in self.version2db.values():
50 | close_db(db)
51 |
52 | def test_get_deck_info(self):
53 | for version in [0, 1]:
54 | with self.subTest(version=version):
55 | info = get_deck_info(self.version2db[version])
56 | self.assertGreaterEqual(len(info), 2)
57 | self.assertIsInstance(info, dict)
58 |
59 | def test_get_deck_names(self):
60 | for version in [0, 1]:
61 | with self.subTest(version=version):
62 | names = get_did2deck(self.version2db[version])
63 | self.assertTrue(
64 | set(names.values()).issuperset({"Testing", "EnglishGerman"})
65 | )
66 |
67 | def test_get_model_info(self):
68 | for version in [0, 1]:
69 | with self.subTest(version=version):
70 | minfo = get_model_info(self.version2db[version])
71 | self.assertIsInstance(minfo, dict)
72 | self.assertGreaterEqual(len(minfo), 2)
73 |
74 | def test_get_model_names(self):
75 | for version in [0, 1]:
76 | with self.subTest(version=version):
77 | names = get_mid2model(self.version2db[version])
78 | self.assertIn("Basic", names.values())
79 | self.assertIn("Cloze", names.values())
80 | self.assertEqual(len(names), 5)
81 |
82 | def test_get_field_names(self):
83 | for version in [0, 1]:
84 | with self.subTest(version=version):
85 | _fnames = get_mid2fields(self.version2db[version])
86 | models = get_mid2model(self.version2db[version])
87 | fnames = {models[mid]: _fnames[mid] for mid in models}
88 | print("MODELS", models)
89 | print("_FNAMES", _fnames)
90 | print("FNAMES", fnames)
91 | self.assertEqual(
92 | len(fnames), len(get_mid2model(self.version2db[version]))
93 | )
94 | self.assertListEqual(fnames["Basic"], ["Front", "Back"])
95 |
96 |
97 | class TestRawWrite(unittest.TestCase):
98 | db_read_path = (
99 | pathlib.Path(__file__).parent
100 | / "data"
101 | / "few_basic_cards"
102 | / "collection.anki2"
103 | )
104 |
105 | def setUp(self):
106 | set_debug_log_level()
107 | self.db_read = load_db(self.db_read_path)
108 | self.db_write_dir = tempfile.TemporaryDirectory()
109 | self.db_write_path = (
110 | pathlib.Path(self.db_write_dir.name) / "collection.anki2"
111 | )
112 | self._reset()
113 |
114 | def _reset(self):
115 | shutil.copy(str(self.db_read_path), str(self.db_write_path))
116 | self.db_write = load_db(self.db_write_path)
117 |
118 | def tearDown(self):
119 | self.db_read.close()
120 | self.db_write.close()
121 | self.db_write_dir.cleanup()
122 |
123 | def _check_db_equal(self):
124 | notes = get_table(self.db_read, "notes")
125 | cards = get_table(self.db_read, "cards")
126 | revlog = get_table(self.db_read, "revs")
127 | notes2 = get_table(self.db_write, "notes")
128 | cards2 = get_table(self.db_write, "cards")
129 | revlog2 = get_table(self.db_write, "revs")
130 | # noinspection PyUnresolvedReferences
131 | self.assertListEqual(notes.values.tolist(), notes2.values.tolist())
132 | # noinspection PyUnresolvedReferences
133 | self.assertListEqual(cards.values.tolist(), cards2.values.tolist())
134 | # noinspection PyUnresolvedReferences
135 | self.assertListEqual(revlog.values.tolist(), revlog2.values.tolist())
136 |
137 | def test_rw_identical(self):
138 | notes = get_table(self.db_read, "notes")
139 | cards = get_table(self.db_read, "cards")
140 | revlog = get_table(self.db_read, "revs")
141 | for mode in ["update", "replace", "append"]:
142 | with self.subTest(mode=mode):
143 | self._reset()
144 | set_table(self.db_write, notes, "notes", mode)
145 | set_table(self.db_write, cards, "cards", mode)
146 | set_table(self.db_write, revlog, "revs", mode)
147 | self._check_db_equal()
148 |
149 | def test_update(self):
150 | notes2 = get_table(self.db_read, "notes")
151 | notes = get_table(self.db_read, "notes")
152 | for mode in ["update", "replace", "append"]:
153 | with self.subTest(mode=mode):
154 | self._reset()
155 | notes2.loc[notes2["id"] == 1555579337683, "tags"] = (
156 | "definitelynew!"
157 | )
158 | set_table(self.db_write, notes2, "notes", mode)
159 | if mode == "append":
160 | self._check_db_equal()
161 | else:
162 | notes2r = get_table(self.db_write, "notes")
163 | chtag = notes2r.loc[notes2r["id"] == 1555579337683, "tags"]
164 | self.assertListEqual(
165 | list(chtag.values.tolist()), ["definitelynew!"]
166 | )
167 | unchanged = notes.loc[notes["id"] != 1555579337683, :]
168 | unchanged2 = notes2r.loc[notes2["id"] != 1555579337683, :]
169 |
170 | self.assertListEqual(
171 | list(unchanged.values.tolist()),
172 | list(unchanged2.values.tolist()),
173 | )
174 |
175 | def test_update_append_does_not_delete(self):
176 | notes = get_table(self.db_read, "notes")
177 | cards = get_table(self.db_read, "cards")
178 | revs = get_table(self.db_read, "revs")
179 | notes.drop(notes.index)
180 | cards.drop(cards.index)
181 | revs.drop(revs.index)
182 | for mode in ["update", "append"]:
183 | with self.subTest(mode=mode):
184 | self._reset()
185 | set_table(self.db_write, notes, "notes", mode)
186 | set_table(self.db_write, cards, "cards", mode)
187 | set_table(self.db_write, revs, "revs", mode)
188 | self._check_db_equal()
189 |
190 | def test_replace_deletes(self):
191 | notes = get_table(self.db_read, "notes")
192 | cards = get_table(self.db_read, "cards")
193 | revs = get_table(self.db_read, "revs")
194 | notes = notes.drop(notes.index)
195 | cards = cards.drop(cards.index)
196 | revs = revs.drop(revs.index)
197 | self._reset()
198 | set_table(self.db_write, notes, "notes", "replace")
199 | set_table(self.db_write, cards, "cards", "replace")
200 | set_table(self.db_write, revs, "revs", "replace")
201 | notes = get_table(self.db_write, "notes")
202 | cards = get_table(self.db_write, "cards")
203 | revs = get_table(self.db_write, "revs")
204 | self.assertEqual(len(notes), 0)
205 | self.assertEqual(len(revs), 0)
206 | self.assertEqual(len(cards), 0)
207 |
208 | def test_set_get_inverse(self):
209 | info = get_info(self.db_read)
210 | set_info(self.db_write, info)
211 | info2 = get_info(self.db_write)
212 | self.assertDictEqual(info, info2)
213 |
214 |
215 | class TestRawWriteV1(unittest.TestCase):
216 | db_read_path = (
217 | pathlib.Path(__file__).parent
218 | / "data"
219 | / "few_basic_cards"
220 | / "collection_v1.anki2"
221 | )
222 |
223 |
224 | class TestMergeDfs(unittest.TestCase):
225 | def setUp(self):
226 | set_debug_log_level()
227 | self.df = pd.DataFrame(
228 | {"id_df": [1, 2, 3, 1, 1], "clash": ["a", "b", "c", "a", "a"]}
229 | )
230 | self.df_add = pd.DataFrame(
231 | {
232 | "id_add": [1, 2, 3],
233 | "value": [4, 5, 6],
234 | "drop": [7, 8, 9],
235 | "ignore": [10, 11, 12],
236 | "clash": [1, 1, 1],
237 | }
238 | )
239 |
240 | def test_merge_dfs(self):
241 | df_merged = merge_dfs(
242 | self.df,
243 | self.df_add,
244 | id_df="id_df",
245 | id_add="id_add",
246 | prepend="_",
247 | columns=["value", "drop", "clash"],
248 | drop_columns=["id_add", "drop"],
249 | )
250 | self.assertListEqual(
251 | sorted(df_merged.columns),
252 | ["_clash", "clash", "id_df", "value"],
253 | )
254 | self.assertListEqual(sorted(df_merged["value"]), [4, 4, 4, 5, 6])
255 |
256 | def test_merge_dfs_prepend_all(self):
257 | df_merged = merge_dfs(
258 | self.df,
259 | self.df_add,
260 | id_df="id_df",
261 | id_add="id_add",
262 | prepend="_",
263 | prepend_clash_only=False,
264 | )
265 | self.assertListEqual(
266 | sorted(df_merged.columns),
267 | ["_clash", "_drop", "_ignore", "_value", "clash", "id_df"],
268 | )
269 |
270 | def test_merge_dfs_inplace(self):
271 | df = copy.deepcopy(self.df)
272 | merge_dfs(df, self.df_add, id_df="id_df", id_add="id_add", inplace=True)
273 | self.assertListEqual(
274 | sorted(df.columns),
275 | ["clash_x", "clash_y", "drop", "id_df", "ignore", "value"],
276 | )
277 | self.assertListEqual(sorted(df["value"]), [4, 4, 4, 5, 6])
278 |
279 |
280 | if __name__ == "__main__":
281 | unittest.main()
282 |
--------------------------------------------------------------------------------
/ankipandas/test/test_regression.py:
--------------------------------------------------------------------------------
1 | """ These tests are created from issues that we fixed to avoid that they might
2 | come back later.
3 | """
4 |
5 | from __future__ import annotations
6 |
7 | # ours
8 | from ankipandas.collection import Collection
9 | from ankipandas.test.util import parameterized_paths
10 |
11 |
12 | @parameterized_paths()
13 | def test_inplace_merge_notes(db_path):
14 | """https://github.com/klieret/AnkiPandas/issues/51
15 | AttributeError: 'NoneType' object has no attribute 'col'
16 | """
17 | col = Collection(db_path)
18 | col.cards.merge_notes(inplace=True)
19 |
--------------------------------------------------------------------------------
/ankipandas/test/util.py:
--------------------------------------------------------------------------------
1 | # std
2 | from __future__ import annotations
3 |
4 | import pathlib
5 |
6 | # 3rd
7 | import pytest
8 |
9 | _test_db_paths = [
10 | pathlib.Path(__file__).resolve().parent
11 | / "data"
12 | / "few_basic_cards"
13 | / "collection.anki2",
14 | pathlib.Path(__file__).resolve().parent
15 | / "data"
16 | / "few_basic_cards"
17 | / "collection_v1.anki2",
18 | ]
19 |
20 |
21 | def parameterized_paths():
22 | return pytest.mark.parametrize("db_path", _test_db_paths)
23 |
--------------------------------------------------------------------------------
/ankipandas/util/__init__.py:
--------------------------------------------------------------------------------
1 | """ Various utilities of this package.
2 |
3 | .. warning::
4 |
5 | These utilities are less aimed at end users and might therefore be subject
6 | to change.
7 |
8 | """
9 |
10 | from __future__ import annotations
11 |
12 | import ankipandas.util.checksum
13 | import ankipandas.util.dataframe
14 | import ankipandas.util.log
15 | import ankipandas.util.misc
16 |
--------------------------------------------------------------------------------
/ankipandas/util/checksum.py:
--------------------------------------------------------------------------------
1 | from __future__ import annotations
2 |
3 | import re
4 | from hashlib import sha1
5 | from html.entities import name2codepoint
6 |
7 | # Implementation directly copied from Anki (anki/anki/utils.py).
8 | # Only a bit of PEP8ing and making things private.
9 |
10 | _reComment = re.compile("(?s)")
11 | _reStyle = re.compile("(?si).*?")
12 | _reScript = re.compile("(?si).*?")
13 | _reTag = re.compile("(?s)<.*?>")
14 | _reEnts = re.compile(r"?\w+;")
15 | _reMedia = re.compile("(?i)
]+src=[\"']?([^\"'>]+)[\"']?[^>]*>")
16 |
17 |
18 | def _strip_html(s):
19 | s = _reComment.sub("", s)
20 | s = _reStyle.sub("", s)
21 | s = _reScript.sub("", s)
22 | s = _reTag.sub("", s)
23 | s = _ents_to_txt(s)
24 | return s
25 |
26 |
27 | def _strip_html_media(s):
28 | """Strip HTML but keep media filenames"""
29 | s = _reMedia.sub(" \\1 ", s)
30 | return _strip_html(s)
31 |
32 |
33 | def _ents_to_txt(html):
34 | # entitydefs defines nbsp as \xa0 instead of a standard space, so we
35 | # replace it first
36 | html = html.replace(" ", " ")
37 |
38 | def fixup(m):
39 | text = m.group(0)
40 | if text[:2] == "":
41 | # character reference
42 | try:
43 | if text[:3] == "":
44 | return chr(int(text[3:-1], 16))
45 | else:
46 | return chr(int(text[2:-1]))
47 | except ValueError:
48 | pass
49 | else:
50 | # named entity
51 | try:
52 | text = chr(name2codepoint[text[1:-1]])
53 | except KeyError:
54 | pass
55 | return text # leave as is
56 |
57 | return _reEnts.sub(fixup, html)
58 |
59 |
60 | def _checksum(data):
61 | if isinstance(data, str):
62 | data = data.encode("utf-8")
63 | return sha1(data).hexdigest()
64 |
65 |
66 | def field_checksum(data: str) -> int:
67 | """32 bit unsigned number from first 8 digits of sha1 hash.
68 | Apply this to the first field to the the field checksum that is used by
69 | Anki to detect duplicates.
70 |
71 | Args:
72 | data: string like
73 |
74 | Returns:
75 | int
76 | """
77 | return int(_checksum(_strip_html_media(data).encode("utf-8"))[:8], 16)
78 |
--------------------------------------------------------------------------------
/ankipandas/util/dataframe.py:
--------------------------------------------------------------------------------
1 | """ DataFrame utilities. """
2 |
3 | # std
4 | from __future__ import annotations
5 |
6 | # 3rd
7 | import pandas as pd
8 |
9 | # ours
10 | from ankipandas.util.log import log
11 |
12 |
13 | def _sync_metadata(df_ret: pd.DataFrame, df_old: pd.DataFrame) -> None:
14 | """
15 | If the df_old has a `_metadata` field, containing a list of attribute
16 | names that contain metadata, then this is copied from `df_old` to the new
17 | dataframe `df_ret.
18 |
19 | Args:
20 | df_ret:
21 | df_old:
22 |
23 | Returns:
24 | None
25 | """
26 | if hasattr(df_old, "_metadata"):
27 | for key in df_old._metadata:
28 | value = getattr(df_old, key)
29 | log.debug("Setting metadata attribute %s to %s", key, value)
30 | setattr(df_ret, key, value)
31 |
32 |
33 | def replace_df_inplace(df: pd.DataFrame, df_new: pd.DataFrame) -> None:
34 | """Replace dataframe 'in place'.
35 | If the dataframe has a `_metadata` field, containing a list of attribute
36 | names that contain metadata, then this is copied from `df` to the new
37 | dataframe.
38 |
39 | Args:
40 | df: :class:`pandas.DataFrame` to be replaced
41 | df_new: :class:`pandas.DataFrame` to replace the previous one
42 |
43 | Returns:
44 | None
45 | """
46 | # Drop all ROWs (not columns)
47 | if df.index.any():
48 | df.drop(df.index, inplace=True)
49 | for col in df_new.columns:
50 | df[col] = df_new[col]
51 | drop_cols = set(df.columns) - set(df_new.columns)
52 | if drop_cols:
53 | df.drop(drop_cols, axis=1, inplace=True)
54 | _sync_metadata(df_new, df)
55 |
56 |
57 | # todo: this might be made more elegant in the future for sure...
58 | # fixme: This removes items whenever it can't merge!
59 | def merge_dfs(
60 | df: pd.DataFrame,
61 | df_add: pd.DataFrame,
62 | id_df: str,
63 | inplace=False,
64 | id_add="id",
65 | prepend="",
66 | replace=False,
67 | prepend_clash_only=True,
68 | columns=None,
69 | drop_columns=None,
70 | ) -> pd.DataFrame | None:
71 | """
72 | Merge information from two dataframes.
73 | If the dataframe has a `_metadata` field, containing a list of attribute
74 | names that contain metadata, then this is copied from `df` to the new
75 | dataframe.
76 |
77 | Args:
78 | df: Original :class:`pandas.DataFrame`
79 | df_add: :class:`pandas.DataFrame` to be merged with original
80 | :class:`pandas.DataFrame`
81 | id_df: Column of original dataframe that contains the id along which
82 | we merge.
83 | inplace: If False, return new dataframe, else update old one
84 | id_add: Column of the new dataframe that contains the id along which
85 | we merge
86 | prepend: Prepend a string to the column names from the new dataframe
87 | replace: Replace columns
88 | prepend_clash_only: Only prepend string to the column names from the
89 | new dataframe if there is a name clash.
90 | columns: Keep only these columns
91 | drop_columns: Drop these columns
92 |
93 | Returns:
94 | New merged :class:`pandas.DataFrame`
95 | """
96 | # Careful: Do not drop the id column until later (else we can't merge)
97 | # Still, we want to remove as much as possible here, because it's probably
98 | # better performing
99 | if columns:
100 | df_add = df_add.drop(
101 | set(df_add.columns) - (set(columns) | {id_add}), axis=1
102 | )
103 | if drop_columns:
104 | df_add = df_add.drop(set(drop_columns) - {id_add}, axis=1)
105 | # Careful: Rename columns after dropping unwanted ones
106 | if prepend_clash_only:
107 | col_clash = set(df.columns) & set(df_add.columns)
108 | rename_dict = {col: prepend + col for col in col_clash}
109 | else:
110 | rename_dict = {col: prepend + col for col in df_add.columns}
111 | df_add = df_add.rename(columns=rename_dict)
112 | # Careful: Might have renamed id_add as well
113 | if id_add in rename_dict:
114 | id_add = rename_dict[id_add]
115 |
116 | if replace:
117 | # Simply remove all potential clashes
118 | replaced_columns = set(df_add.columns) & set(df.columns)
119 | df = df.drop(replaced_columns, axis=1)
120 |
121 | merge_kwargs = {}
122 |
123 | if id_add in df_add.columns:
124 | merge_kwargs["right_on"] = id_add
125 | elif id_add == df_add.index.name:
126 | merge_kwargs["right_index"] = True
127 | else:
128 | raise ValueError(f"'{id_add}' is neither index nor column.")
129 |
130 | if id_df in df.columns:
131 | merge_kwargs["left_on"] = id_df
132 | elif id_df == df.index.name:
133 | merge_kwargs["left_index"] = True
134 | else:
135 | raise ValueError(f"'{id_df}' is neither index nor column.")
136 |
137 | df_merge = df.merge(df_add, **merge_kwargs)
138 |
139 | # Now remove id_add if it was to be removed
140 | # Careful: 'in' doesn't work with None
141 | if (columns and id_add not in columns) or (
142 | drop_columns and id_add in drop_columns
143 | ):
144 | df_merge.drop(id_add, axis=1, inplace=True)
145 |
146 | # Make sure we don't have two ID columns
147 | new_id_add_col = id_add
148 | if id_add in rename_dict:
149 | new_id_add_col = rename_dict[id_add]
150 | if new_id_add_col in df_merge.columns and id_df != new_id_add_col:
151 | df_merge.drop(new_id_add_col, axis=1, inplace=True)
152 |
153 | _sync_metadata(df_merge, df)
154 |
155 | if inplace:
156 | replace_df_inplace(df, df_merge)
157 | return None # mypy
158 | else:
159 | return df_merge
160 |
--------------------------------------------------------------------------------
/ankipandas/util/guid.py:
--------------------------------------------------------------------------------
1 | from __future__ import annotations
2 |
3 | import random
4 | import string
5 |
6 | # Directly copied from anki utils!
7 |
8 |
9 | # used in ankiweb
10 | def _base62(num, extra=""):
11 | s = string
12 | table = s.ascii_letters + s.digits + extra
13 | buf = ""
14 | while num:
15 | num, i = divmod(num, len(table))
16 | buf = table[i] + buf
17 | return buf
18 |
19 |
20 | _base91_extra_chars = "!#$%&()*+,-./:;<=>?@[]^_`{|}~"
21 |
22 |
23 | def _base91(num):
24 | # all printable characters minus quotes, backslash and separators
25 | return _base62(num, _base91_extra_chars)
26 |
27 |
28 | def _guid64():
29 | """Return a base91-encoded 64bit random number."""
30 | return _base91(random.randint(0, 2**64 - 1))
31 |
32 |
33 | def guid():
34 | """Return globally unique ID"""
35 | return _guid64()
36 |
--------------------------------------------------------------------------------
/ankipandas/util/log.py:
--------------------------------------------------------------------------------
1 | # std
2 | from __future__ import annotations
3 |
4 | import logging
5 |
6 | import colorlog
7 |
8 | LOG_DEFAULT_LEVEL = logging.INFO
9 |
10 |
11 | def get_logger():
12 | """Sets up global logger."""
13 | _log = colorlog.getLogger("AnkiPandas")
14 |
15 | if _log.handlers:
16 | # the logger already has handlers attached to it, even though
17 | # we didn't add it ==> logging.get_logger got us an existing
18 | # logger ==> we don't need to do anything
19 | return _log
20 |
21 | _log.setLevel(LOG_DEFAULT_LEVEL)
22 |
23 | sh = colorlog.StreamHandler()
24 | log_colors = {
25 | "DEBUG": "cyan",
26 | "INFO": "green",
27 | "WARNING": "yellow",
28 | "ERROR": "red",
29 | "CRITICAL": "red",
30 | }
31 | formatter = colorlog.ColoredFormatter(
32 | "%(log_color)s%(levelname)s: %(message)s", log_colors=log_colors
33 | )
34 | sh.setFormatter(formatter)
35 | # Controlled by overall logger level
36 | sh.setLevel(logging.DEBUG)
37 |
38 | _log.addHandler(sh)
39 |
40 | return _log
41 |
42 |
43 | def set_log_level(level: str | int) -> None:
44 | """Set global log level.
45 |
46 | Args:
47 | level: Either an int
48 | (https://docs.python.org/3/library/logging.html#levels)
49 | or one of the keywords, 'critical' (only the most terrifying of log
50 | messages), 'error', 'warning', 'info',
51 | 'debug' (all log messages)
52 |
53 | Returns:
54 | None
55 | """
56 | lvl = level
57 | if isinstance(level, str):
58 | lvl = getattr(logging, level.upper())
59 | get_logger().setLevel(lvl)
60 |
61 |
62 | def set_debug_log_level() -> None:
63 | """Set global log level to debug."""
64 | set_log_level(logging.DEBUG)
65 |
66 |
67 | log = get_logger()
68 |
--------------------------------------------------------------------------------
/ankipandas/util/misc.py:
--------------------------------------------------------------------------------
1 | # std
2 | from __future__ import annotations
3 |
4 | import collections
5 | from typing import Any
6 |
7 |
8 | def invert_dict(dct: dict) -> dict:
9 | """Invert dictionary, i.e. reverse keys and values.
10 |
11 | Args:
12 | dct: Dictionary
13 |
14 | Returns:
15 | Dictionary with reversed keys and values.
16 |
17 | Raises:
18 | :class:`ValueError` if values are not unique.
19 | """
20 | if not len(set(dct.values())) == len(dct.values()):
21 | print(dct)
22 | print(sorted(dct.values()))
23 | raise ValueError("Dictionary does not seem to be invertible.")
24 | return {value: key for key, value in dct.items()}
25 |
26 |
27 | def flatten_list_list(lst: list[list[Any]]) -> list[Any]:
28 | """Takes a list of lists and returns a list of all elements.
29 |
30 | Args:
31 | lst: List of Lists
32 |
33 | Returns:
34 | list
35 | """
36 | return [item for sublist in lst for item in sublist]
37 |
38 |
39 | def nested_dict():
40 | """This is very clever and stolen from
41 | https://stackoverflow.com/questions/16724788/
42 | Use it to initialize a dictionary-like object which automatically adds
43 | levels.
44 | E.g.
45 |
46 | .. code-block:: python
47 |
48 | a = nested_dict()
49 | a['test']['this']['is']['working'] = "yaaay"
50 | """
51 | return collections.defaultdict(nested_dict)
52 |
53 |
54 | def defaultdict2dict(defdict: collections.defaultdict) -> dict:
55 | return {
56 | key: (
57 | defaultdict2dict(value)
58 | if isinstance(value, collections.defaultdict)
59 | else value
60 | )
61 | for key, value in defdict.items()
62 | }
63 |
--------------------------------------------------------------------------------
/ankipandas/util/test/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/klieret/AnkiPandas/0b17a1870711d4adc3e9fc82bff8aac986b09f5e/ankipandas/util/test/__init__.py
--------------------------------------------------------------------------------
/ankipandas/util/test/test_dataframe.py:
--------------------------------------------------------------------------------
1 | # std
2 | from __future__ import annotations
3 |
4 | import unittest
5 |
6 | # 3rd
7 | import pandas as pd
8 |
9 | # ours
10 | from ankipandas.util.dataframe import replace_df_inplace
11 |
12 |
13 | class TestUtils(unittest.TestCase):
14 | def test__replace_df_inplace(self):
15 | df = pd.DataFrame({"a": [1, 2], "b": [3, 4]})
16 | df_new = pd.DataFrame({"a": [1]})
17 | replace_df_inplace(df, df_new)
18 | self.assertEqual(len(df), 1)
19 | self.assertEqual(len(df.columns), 1)
20 | self.assertListEqual(list(df["a"].values), [1])
21 |
22 |
23 | if __name__ == "__main__":
24 | unittest.main()
25 |
--------------------------------------------------------------------------------
/ankipandas/util/test/test_log.py:
--------------------------------------------------------------------------------
1 | # std
2 | from __future__ import annotations
3 |
4 | import unittest
5 |
6 | # ours
7 | from ankipandas.util.log import get_logger, log, set_log_level
8 |
9 |
10 | class TestLogging(unittest.TestCase):
11 | """Only tests that things run without error."""
12 |
13 | def test_log(self):
14 | log.info("Test info")
15 | log.warning("Test warning")
16 |
17 | def test_get_logger(self):
18 | get_logger().info("Test info")
19 | get_logger().warning("Test warning")
20 |
21 | def test_set_log_level(self):
22 | set_log_level("warning")
23 | set_log_level("WARNING")
24 | set_log_level(0)
25 |
26 |
27 | if __name__ == "__main__":
28 | unittest.main()
29 |
--------------------------------------------------------------------------------
/ankipandas/util/test/test_misc.py:
--------------------------------------------------------------------------------
1 | # std
2 | from __future__ import annotations
3 |
4 | import unittest
5 |
6 | # ours
7 | from ankipandas.util.misc import invert_dict
8 |
9 |
10 | class TestInvertDict(unittest.TestCase):
11 | def test_ok(self):
12 | a = {1: 2, 3: 4, 5: 6}
13 | self.assertDictEqual(invert_dict(a), {2: 1, 4: 3, 6: 5})
14 |
15 | def test_fails(self):
16 | a = {1: 2, 3: 2}
17 | with self.assertRaises(ValueError):
18 | invert_dict(a)
19 |
20 |
21 | if __name__ == "__main__":
22 | unittest.main()
23 |
--------------------------------------------------------------------------------
/ankipandas/util/test/test_types.py:
--------------------------------------------------------------------------------
1 | # std
2 | from __future__ import annotations
3 |
4 | import unittest
5 |
6 | from ankipandas.util.log import set_debug_log_level
7 |
8 | # ours
9 | from ankipandas.util.types import (
10 | is_dict_list_like,
11 | is_list_dict_like,
12 | is_list_like,
13 | is_list_list_like,
14 | )
15 |
16 |
17 | class TestTypes(unittest.TestCase):
18 | def setUp(self):
19 | set_debug_log_level()
20 |
21 | def test_is_list_like(self):
22 | self.assertTrue(is_list_like([1, 2]))
23 | self.assertTrue(is_list_like((1, 2)))
24 | self.assertFalse(is_list_like("asdf"))
25 |
26 | def test_is_list_list_like(self):
27 | self.assertTrue(is_list_list_like([[1, 2]]))
28 | self.assertTrue(is_list_list_like([(1, 2)]))
29 | self.assertFalse(is_list_list_like([(1, 2), 3]))
30 |
31 | def test_is_list_dict_like(self):
32 | self.assertTrue(is_list_dict_like([{1: 3}, {4: 5}]))
33 | self.assertTrue(is_list_dict_like([]))
34 | self.assertFalse(is_list_dict_like([(1, 2), (4, 5)]))
35 |
36 | def test_is_dict_list_like(self):
37 | self.assertTrue(is_dict_list_like({1: [], 2: (3, 4)}))
38 | self.assertTrue(is_dict_list_like({}))
39 | self.assertFalse(is_dict_list_like([(1, 2), (4, 5)]))
40 |
41 |
42 | if __name__ == "__main__":
43 | unittest.main()
44 |
--------------------------------------------------------------------------------
/ankipandas/util/types.py:
--------------------------------------------------------------------------------
1 | def is_list_like(obj):
2 | """True if object type is similar to list, tuple etc."""
3 | return isinstance(obj, (tuple, list))
4 |
5 |
6 | def is_list_list_like(obj):
7 | """True if object is like-like object of list-like objects"""
8 | return is_list_like(obj) and all(map(is_list_like, obj))
9 |
10 |
11 | def is_list_dict_like(obj):
12 | """True if object is list-like object of dictionaries."""
13 | return is_list_like(obj) and all(map(lambda x: isinstance(x, dict), obj))
14 |
15 |
16 | def is_dict_list_like(obj):
17 | """True if object is dictionary with list-like objects as values."""
18 | return isinstance(obj, dict) and all(map(is_list_like, obj.values()))
19 |
--------------------------------------------------------------------------------
/ankipandas/version.txt:
--------------------------------------------------------------------------------
1 | 0.3.15
2 |
--------------------------------------------------------------------------------
/codespell.txt:
--------------------------------------------------------------------------------
1 | hist
2 | inpt
3 | assertIn
4 |
--------------------------------------------------------------------------------
/doc/Makefile:
--------------------------------------------------------------------------------
1 | # Makefile for Sphinx documentation
2 | #
3 |
4 | # You can set these variables from the command line.
5 | SPHINXOPTS =
6 | SPHINXBUILD = sphinx-build
7 | PAPER =
8 | BUILDDIR = _build
9 |
10 | # User-friendly check for sphinx-build
11 | ifeq ($(shell which $(SPHINXBUILD) >/dev/null 2>&1; echo $$?), 1)
12 | $(error The '$(SPHINXBUILD)' command was not found. Make sure you have Sphinx installed, then set the SPHINXBUILD environment variable to point to the full path of the '$(SPHINXBUILD)' executable. Alternatively you can add the directory with the executable to your PATH. If you don't have Sphinx installed, grab it from http://sphinx-doc.org/)
13 | endif
14 |
15 | # Internal variables.
16 | PAPEROPT_a4 = -D latex_paper_size=a4
17 | PAPEROPT_letter = -D latex_paper_size=letter
18 | ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) .
19 | # the i18n builder cannot share the environment and doctrees with the others
20 | I18NSPHINXOPTS = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) .
21 |
22 | .PHONY: help
23 | help:
24 | @echo "Please use \`make ' where is one of"
25 | @echo " html to make standalone HTML files"
26 | @echo " dirhtml to make HTML files named index.html in directories"
27 | @echo " singlehtml to make a single large HTML file"
28 | @echo " pickle to make pickle files"
29 | @echo " json to make JSON files"
30 | @echo " htmlhelp to make HTML files and a HTML help project"
31 | @echo " qthelp to make HTML files and a qthelp project"
32 | @echo " applehelp to make an Apple Help Book"
33 | @echo " devhelp to make HTML files and a Devhelp project"
34 | @echo " epub to make an epub"
35 | @echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter"
36 | @echo " latexpdf to make LaTeX files and run them through pdflatex"
37 | @echo " latexpdfja to make LaTeX files and run them through platex/dvipdfmx"
38 | @echo " text to make text files"
39 | @echo " man to make manual pages"
40 | @echo " texinfo to make Texinfo files"
41 | @echo " info to make Texinfo files and run them through makeinfo"
42 | @echo " gettext to make PO message catalogs"
43 | @echo " changes to make an overview of all changed/added/deprecated items"
44 | @echo " xml to make Docutils-native XML files"
45 | @echo " pseudoxml to make pseudoxml-XML files for display purposes"
46 | @echo " linkcheck to check all external links for integrity"
47 | @echo " doctest to run all doctests embedded in the documentation (if enabled)"
48 | @echo " coverage to run coverage check of the documentation (if enabled)"
49 |
50 | .PHONY: clean
51 | clean:
52 | rm -rf $(BUILDDIR)/*
53 |
54 | .PHONY: html
55 | html:
56 | $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html
57 | @echo
58 | @echo "Build finished. The HTML pages are in $(BUILDDIR)/html."
59 |
60 | .PHONY: dirhtml
61 | dirhtml:
62 | $(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml
63 | @echo
64 | @echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml."
65 |
66 | .PHONY: singlehtml
67 | singlehtml:
68 | $(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml
69 | @echo
70 | @echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml."
71 |
72 | .PHONY: pickle
73 | pickle:
74 | $(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle
75 | @echo
76 | @echo "Build finished; now you can process the pickle files."
77 |
78 | .PHONY: json
79 | json:
80 | $(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json
81 | @echo
82 | @echo "Build finished; now you can process the JSON files."
83 |
84 | .PHONY: htmlhelp
85 | htmlhelp:
86 | $(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp
87 | @echo
88 | @echo "Build finished; now you can run HTML Help Workshop with the" \
89 | ".hhp project file in $(BUILDDIR)/htmlhelp."
90 |
91 | .PHONY: qthelp
92 | qthelp:
93 | $(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp
94 | @echo
95 | @echo "Build finished; now you can run "qcollectiongenerator" with the" \
96 | ".qhcp project file in $(BUILDDIR)/qthelp, like this:"
97 | @echo "# qcollectiongenerator $(BUILDDIR)/qthelp/BDecaysKinematicClustering.qhcp"
98 | @echo "To view the help file:"
99 | @echo "# assistant -collectionFile $(BUILDDIR)/qthelp/BDecaysKinematicClustering.qhc"
100 |
101 | .PHONY: applehelp
102 | applehelp:
103 | $(SPHINXBUILD) -b applehelp $(ALLSPHINXOPTS) $(BUILDDIR)/applehelp
104 | @echo
105 | @echo "Build finished. The help book is in $(BUILDDIR)/applehelp."
106 | @echo "N.B. You won't be able to view it unless you put it in" \
107 | "~/Library/Documentation/Help or install it in your application" \
108 | "bundle."
109 |
110 | .PHONY: devhelp
111 | devhelp:
112 | $(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp
113 | @echo
114 | @echo "Build finished."
115 | @echo "To view the help file:"
116 | @echo "# mkdir -p $$HOME/.local/share/devhelp/BDecaysKinematicClustering"
117 | @echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/BDecaysKinematicClustering"
118 | @echo "# devhelp"
119 |
120 | .PHONY: epub
121 | epub:
122 | $(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub
123 | @echo
124 | @echo "Build finished. The epub file is in $(BUILDDIR)/epub."
125 |
126 | .PHONY: latex
127 | latex:
128 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
129 | @echo
130 | @echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex."
131 | @echo "Run \`make' in that directory to run these through (pdf)latex" \
132 | "(use \`make latexpdf' here to do that automatically)."
133 |
134 | .PHONY: latexpdf
135 | latexpdf:
136 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
137 | @echo "Running LaTeX files through pdflatex..."
138 | $(MAKE) -C $(BUILDDIR)/latex all-pdf
139 | @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex."
140 |
141 | .PHONY: latexpdfja
142 | latexpdfja:
143 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
144 | @echo "Running LaTeX files through platex and dvipdfmx..."
145 | $(MAKE) -C $(BUILDDIR)/latex all-pdf-ja
146 | @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex."
147 |
148 | .PHONY: text
149 | text:
150 | $(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text
151 | @echo
152 | @echo "Build finished. The text files are in $(BUILDDIR)/text."
153 |
154 | .PHONY: man
155 | man:
156 | $(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man
157 | @echo
158 | @echo "Build finished. The manual pages are in $(BUILDDIR)/man."
159 |
160 | .PHONY: texinfo
161 | texinfo:
162 | $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
163 | @echo
164 | @echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo."
165 | @echo "Run \`make' in that directory to run these through makeinfo" \
166 | "(use \`make info' here to do that automatically)."
167 |
168 | .PHONY: info
169 | info:
170 | $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
171 | @echo "Running Texinfo files through makeinfo..."
172 | make -C $(BUILDDIR)/texinfo info
173 | @echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo."
174 |
175 | .PHONY: gettext
176 | gettext:
177 | $(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale
178 | @echo
179 | @echo "Build finished. The message catalogs are in $(BUILDDIR)/locale."
180 |
181 | .PHONY: changes
182 | changes:
183 | $(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes
184 | @echo
185 | @echo "The overview file is in $(BUILDDIR)/changes."
186 |
187 | .PHONY: linkcheck
188 | linkcheck:
189 | $(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck
190 | @echo
191 | @echo "Link check complete; look for any errors in the above output " \
192 | "or in $(BUILDDIR)/linkcheck/output.txt."
193 |
194 | .PHONY: doctest
195 | doctest:
196 | $(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest
197 | @echo "Testing of doctests in the sources finished, look at the " \
198 | "results in $(BUILDDIR)/doctest/output.txt."
199 |
200 | .PHONY: coverage
201 | coverage:
202 | $(SPHINXBUILD) -b coverage $(ALLSPHINXOPTS) $(BUILDDIR)/coverage
203 | @echo "Testing of coverage in the sources finished, look at the " \
204 | "results in $(BUILDDIR)/coverage/python.txt."
205 |
206 | .PHONY: xml
207 | xml:
208 | $(SPHINXBUILD) -b xml $(ALLSPHINXOPTS) $(BUILDDIR)/xml
209 | @echo
210 | @echo "Build finished. The XML files are in $(BUILDDIR)/xml."
211 |
212 | .PHONY: pseudoxml
213 | pseudoxml:
214 | $(SPHINXBUILD) -b pseudoxml $(ALLSPHINXOPTS) $(BUILDDIR)/pseudoxml
215 | @echo
216 | @echo "Build finished. The pseudo-XML files are in $(BUILDDIR)/pseudoxml."
217 |
--------------------------------------------------------------------------------
/doc/ankidf.rst:
--------------------------------------------------------------------------------
1 | AnkiDataFrame
2 | ^^^^^^^^^^^^^
3 |
4 | The class :class:`~ankipandas.ankidf.AnkiDataFrame` is the central data
5 | structure in which we provide the notes, cards and review tables.
6 | Access it via an instance of :class:`~ankipandas.collection.Collection`.
7 |
8 | Example:
9 |
10 | .. code-block:: python
11 |
12 | from ankipandas import Collection
13 | col = Collection()
14 |
15 | col.notes # Notes as AnkiDataFrame
16 | col.cards # Cards as AnkiDataFrame
17 | col.revs # Reviews as AnkiDataFrame
18 |
19 | .. autoclass:: ankipandas.ankidf.AnkiDataFrame
20 | :members:
21 | :undoc-members:
22 | :exclude-members: equals, update, append
23 |
--------------------------------------------------------------------------------
/doc/collection.rst:
--------------------------------------------------------------------------------
1 | Collection
2 | ^^^^^^^^^^
3 |
4 | This is the starting point for most end-users.
5 | The :class:`~ankipandas.collection.Collection` class loads the Anki collection
6 | and provides access to its notes, cards and reviews as pandas
7 | :class:`~pandas.DataFrame` objects.
8 |
9 | .. autoclass:: ankipandas.collection.Collection
10 | :members:
11 | :undoc-members:
12 |
--------------------------------------------------------------------------------
/doc/conf.py:
--------------------------------------------------------------------------------
1 | from __future__ import annotations
2 |
3 | import os
4 | import pathlib
5 | import sys
6 | from pathlib import Path
7 |
8 | # If extensions (or modules to document with autodoc) are in another directory,
9 | # add these directories to sys.path here. If the directory is relative to the
10 | # documentation root, use os.path.abspath to make it absolute, like shown here.
11 | sys.path.insert(0, os.path.abspath("../"))
12 |
13 | readme_path = Path(__file__).parent.resolve().parent / "README.md"
14 | readme_target = Path(__file__).parent / "readme.md"
15 |
16 | with readme_target.open("w") as outf:
17 | outf.write(
18 | "\n".join(
19 | [
20 | "Readme",
21 | "======",
22 | "",
23 | ]
24 | )
25 | )
26 | lines = []
27 | for line in readme_path.read_text().split("\n"):
28 | lines.append(line)
29 | outf.write("\n".join(lines))
30 |
31 |
32 | # -- General configuration ------------------------------------------------
33 |
34 | # If your documentation needs a minimal Sphinx version, state it here.
35 | # needs_sphinx = '1.0'
36 |
37 | autodoc_mock_imports = []
38 |
39 | autodoc_default_options = {
40 | "special-members": "__init__",
41 | "undoc-members": True,
42 | "show-inheritance": True,
43 | }
44 |
45 | autodoc_inherit_docstrings = False
46 |
47 | autoclass_content = "class"
48 |
49 | # https://stackoverflow.com/questions/12772927/
50 | # Will only work for sphinx > 1.4 onward
51 | suppress_warnings = ["image.nonlocal_uri"]
52 |
53 | # Add any Sphinx extension module names here, as strings. They can be
54 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
55 | # ones.
56 | extensions = [
57 | "sphinx.ext.autodoc",
58 | "sphinx.ext.todo",
59 | "sphinx.ext.coverage",
60 | "sphinx.ext.mathjax",
61 | "sphinx.ext.ifconfig",
62 | "sphinx.ext.viewcode",
63 | "sphinx.ext.napoleon",
64 | "sphinx.ext.intersphinx",
65 | "recommonmark",
66 | ]
67 |
68 | intersphinx_mapping = {
69 | "python": ("https://docs.python.org/3", None),
70 | "pandas": ("http://pandas-docs.github.io/pandas-docs-travis/", None),
71 | }
72 |
73 | # Add any paths that contain templates here, relative to this directory.
74 | templates_path = ["_templates"]
75 |
76 | # The suffix(es) of source filenames.
77 | # You can specify multiple suffix as a list of string:
78 | source_suffix = [".rst", ".md"]
79 | # source_suffix = ".rst"
80 |
81 | # The encoding of source files.
82 | # source_encoding = 'utf-8-sig'
83 |
84 | # The master toctree document.
85 | master_doc = "index"
86 |
87 | # General information about the project.
88 | project = "AnkiPandas"
89 | copyright = "2019, Kilian Lieret"
90 | author = "Kilian Lieret"
91 |
92 | # The version info for the project you're documenting, acts as replacement for
93 | # |version| and |release|, also used in various other places throughout the
94 | # built documents.
95 | #
96 |
97 | this_dir = pathlib.Path(__file__).resolve().parent
98 | with (this_dir / ".." / "ankipandas" / "version.txt").open() as vf:
99 | version = vf.read().strip()
100 | print(f"Version as read from version.txt: '{version}'")
101 |
102 | # The short X.Y version.
103 | # version = 'dev'
104 | # The full version, including alpha/beta/rc tags.
105 | release = version
106 |
107 | # The language for content autogenerated by Sphinx. Refer to documentation
108 | # for a list of supported languages.
109 | #
110 | # This is also used if you do content translation via gettext catalogs.
111 | # Usually you set "language" from the command line for these cases.
112 | language = None
113 |
114 | # There are two options for replacing |today|: either, you set today to some
115 | # non-false value, then it is used:
116 | # today = ''
117 | # Else, today_fmt is used as the format for a strftime call.
118 | # today_fmt = '%B %d, %Y'
119 |
120 | # List of patterns, relative to source directory, that match files and
121 | # directories to ignore when looking for source files.
122 | exclude_patterns = ["_build"]
123 |
124 | # The reST default role (used for this markup: `text`) to use for all
125 | # documents.
126 | # default_role = None
127 |
128 | # If true, '()' will be appended to :func: etc. cross-reference text.
129 | # add_function_parentheses = True
130 |
131 | # If true, the current module name will be prepended to all description
132 | # unit titles (such as .. function::).
133 | # add_module_names = True
134 |
135 | # If true, sectionauthor and moduleauthor directives will be shown in the
136 | # output. They are ignored by default.
137 | # show_authors = False
138 |
139 | # The name of the Pygments (syntax highlighting) style to use.
140 | pygments_style = "sphinx"
141 |
142 | # A list of ignored prefixes for module index sorting.
143 | # modindex_common_prefix = []
144 |
145 | # If true, keep warnings as "system message" paragraphs in the built documents.
146 | # keep_warnings = False
147 |
148 | # If true, `todo` and `todoList` produce output, else they produce nothing.
149 | todo_include_todos = True
150 |
151 | # -- Options for HTML output ----------------------------------------------
152 |
153 | # The theme to use for HTML and HTML Help pages. See the documentation for
154 | # a list of builtin themes.
155 | # html_theme = 'alabaster'
156 |
157 | # Add any paths that contain custom themes here, relative to this directory.
158 | # html_theme_path = ["_themes"]
159 |
160 | try:
161 | import importlib
162 |
163 | theme = importlib.import_module("sphinx_book_theme")
164 | html_theme = "sphinx_book_theme"
165 | html_theme_path = [theme.get_html_theme_path()]
166 | except ImportError:
167 | print(
168 | "**** WARNING ****: reverting to default theme, because "
169 | "sphinx_book_theme is not installed"
170 | )
171 | html_theme = "default"
172 | print(f"html_theme='{html_theme}'")
173 |
174 | # Theme options are theme-specific and customize the look and feel of a theme
175 | # further. For a list of options available for each theme, see the
176 | # documentation.
177 | # html_theme_options = {}
178 |
179 | if html_theme == "sphinx_rtd_theme":
180 | html_theme_options = {"logo_only": True}
181 | else:
182 | html_theme_options = {}
183 | print(f"html_theme_options={html_theme_options}")
184 |
185 | if html_theme == "sphinx_rtd_theme":
186 | html_logo = "../misc/logo/logo_rtd.svg"
187 | else:
188 | html_logo = "../misc/logo/logo.svg"
189 |
190 | # The name for this set of Sphinx documents. If None, it defaults to
191 | # " v documentation".
192 | # html_title = None
193 |
194 | # A shorter title for the navigation bar. Default is the same as html_title.
195 | # html_short_title = None
196 |
197 | # The name of an image file (relative to this directory) to place at the top
198 | # of the sidebar.
199 |
200 | # The name of an image file (relative to this directory) to use as a favicon of
201 | # the docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32
202 | # pixels large.
203 | # html_favicon = None
204 |
205 | # Add any paths that contain custom static files (such as style sheets) here,
206 | # relative to this directory. They are copied after the builtin static files,
207 | # so a file named "default.css" will overwrite the builtin "default.css".
208 | # html_static_path = ['_static']
209 |
210 |
211 | # Output file base name for HTML help builder.
212 | htmlhelp_basename = "AnkiPandas"
213 |
214 | # -- Options for LaTeX output ---------------------------------------------
215 |
216 | latex_elements = {
217 | # The paper size ('letterpaper' or 'a4paper').
218 | # 'papersize': 'letterpaper',
219 | # The font size ('10pt', '11pt' or '12pt').
220 | # 'pointsize': '10pt',
221 | # Additional stuff for the LaTeX preamble.
222 | # 'preamble': '',
223 | # Latex figure (float) alignment
224 | # 'figure_align': 'htbp',
225 | }
226 |
227 | # Grouping the document tree into LaTeX files. List of tuples
228 | # (source start file, target name, title,
229 | # author, documentclass [howto, manual, or own class]).
230 | latex_documents = [
231 | (master_doc, "ankipandas.tex", "AnkiPandas", "Kilian Lieret", "manual")
232 | ]
233 |
234 | # -- Options for manual page output ---------------------------------------
235 |
236 | # One entry per manual page. List of tuples
237 | # (source start file, name, description, authors, manual section).
238 | man_pages = [(master_doc, "ankipandas", "AnkiPandas", [author], 1)]
239 |
240 | # If true, show URL addresses after external links.
241 | # man_show_urls = False
242 |
243 |
244 | # -- Options for Texinfo output -------------------------------------------
245 |
246 | # Grouping the document tree into Texinfo files. List of tuples
247 | # (source start file, target name, title, author,
248 | # dir menu entry, description, category)
249 | texinfo_documents = [
250 | (
251 | master_doc,
252 | "ankipandas",
253 | "AnkiPandas",
254 | author,
255 | "AnkiPandas",
256 | "Load your anki database as a pandas DataFrame with just one "
257 | "line of code!",
258 | "Miscellaneous",
259 | )
260 | ]
261 |
262 | # Documents to append as an appendix to all manuals.
263 | # texinfo_appendices = []
264 |
265 | # If false, no module index is generated.
266 | # texinfo_domain_indices = True
267 |
268 | # How to display URL addresses: 'footnote', 'no', or 'inline'.
269 | # texinfo_show_urls = 'footnote'
270 |
271 | # If true, do not generate a @detailmenu in the "Top" node's menu.
272 | # texinfo_no_detailmenu = False
273 |
274 |
275 | # -- Options for Epub output ----------------------------------------------
276 |
277 | # Bibliographic Dublin Core info.
278 | epub_title = project
279 | epub_author = author
280 | epub_publisher = author
281 | epub_copyright = copyright
282 |
283 | # The basename for the epub file. It defaults to the project name.
284 | # epub_basename = project
285 |
286 | # The HTML theme for the epub output. Since the default themes are not
287 | # optimized for small screen space, using the same theme for HTML and epub
288 | # output is usually not wise. This defaults to 'epub', a theme designed to save
289 | # visual space.
290 | # epub_theme = 'epub'
291 |
292 | # The language of the text. It defaults to the language option
293 | # or 'en' if the language is not set.
294 | # epub_language = ''
295 |
296 | # The scheme of the identifier. Typical schemes are ISBN or URL.
297 | # epub_scheme = ''
298 |
299 | # The unique identifier of the text. This can be a ISBN number
300 | # or the project homepage.
301 | # epub_identifier = ''
302 |
303 | # A unique identification for the text.
304 | # epub_uid = ''
305 |
306 | # A tuple containing the cover image and cover page html template filenames.
307 | # epub_cover = ()
308 |
309 | # A sequence of (type, uri, title) tuples for the guide element of content.opf.
310 | # epub_guide = ()
311 |
312 | # HTML files that should be inserted before the pages created by sphinx.
313 | # The format is a list of tuples containing the path and title.
314 | # epub_pre_files = []
315 |
316 | # HTML files that should be inserted after the pages created by sphinx.
317 | # The format is a list of tuples containing the path and title.
318 | # epub_post_files = []
319 |
320 | # A list of files that should not be packed into the epub file.
321 | epub_exclude_files = ["search.html"]
322 |
323 | # The depth of the table of contents in toc.ncx.
324 | # epub_tocdepth = 3
325 |
326 | # Allow duplicate toc entries.
327 | # epub_tocdup = True
328 |
329 | # Choose between 'default' and 'includehidden'.
330 | # epub_tocscope = 'default'
331 |
332 | # Fix unsupported image types using the Pillow.
333 | # epub_fix_images = False
334 |
335 | # Scale large images.
336 | # epub_max_image_width = 0
337 |
338 | # How to display URL addresses: 'footnote', 'no', or 'inline'.
339 | # epub_show_urls = 'inline'
340 |
341 | # If false, no index is generated.
342 | # epub_use_index = True
343 |
344 | # Do not order alphabetically but by source
345 | autodoc_member_order = "bysource"
346 |
--------------------------------------------------------------------------------
/doc/examples.rst:
--------------------------------------------------------------------------------
1 | Analysis
2 | --------
3 |
4 | .. note::
5 |
6 | All examples assume the line
7 |
8 | .. code-block:: python
9 |
10 | col = Collection()
11 |
12 | Or ``col = Collection("/path/to/col.anki2")``, etc.
13 |
14 | In which deck are the most leeches?
15 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
16 |
17 | .. literalinclude:: examples/examples/leeches_per_deck.py
18 | :linenos:
19 |
20 | .. image:: examples/plots/leeches_per_deck.png
21 |
22 | Which deck has the longest average retention rates?
23 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
24 |
25 | .. literalinclude:: examples/examples/retention_rate_per_deck.py
26 | :linenos:
27 |
28 | .. image:: examples/plots/retention_rate_per_deck.png
29 |
30 | Repetitions vs type
31 | ^^^^^^^^^^^^^^^^^^^
32 |
33 | Minimal:
34 |
35 | .. code-block:: python
36 |
37 | col.cards.hist("crepts", by="ctype")
38 |
39 | Prettier:
40 |
41 | .. literalinclude:: examples/examples/repetitions_per_type.py
42 | :linenos:
43 |
44 | .. image:: examples/plots/repetitions_per_type.png
45 |
46 | Repetitions vs deck
47 | ^^^^^^^^^^^^^^^^^^^
48 |
49 | One liner:
50 |
51 | .. code-block:: python
52 |
53 | col.cards.hist(column="creps", by="cdeck")
54 |
55 | Prettier:
56 |
57 | .. literalinclude:: examples/examples/repetitions_per_deck.py
58 | :linenos:
59 |
60 | .. image:: examples/plots/repetitions_per_deck.png
61 |
62 | Retention distribution vs deck
63 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
64 |
65 | .. literalinclude:: examples/examples/retention_distribution_vs_deck.py
66 | :linenos:
67 |
68 | .. image:: examples/plots/retention_distribution_vs_deck.png
69 |
70 | Reviews vs retention length vs deck
71 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
72 |
73 | .. literalinclude:: examples/examples/reviews_vs_ease.py
74 | :linenos:
75 |
76 | .. image:: examples/plots/reviews_vs_ease.png
77 |
--------------------------------------------------------------------------------
/doc/examples/col.anki2:
--------------------------------------------------------------------------------
1 | /home/fuchur/.local/share/Anki2/fuchurMain/collection.anki2
--------------------------------------------------------------------------------
/doc/examples/examples/leeches_per_deck.py:
--------------------------------------------------------------------------------
1 | cards = col.cards.merge_notes()
2 | counts = cards[cards.has_tag("leech")]["cdeck"].value_counts()
3 | counts.plot.pie(title="Leeches per deck")
4 |
--------------------------------------------------------------------------------
/doc/examples/examples/repetitions_per_deck.py:
--------------------------------------------------------------------------------
1 | interesting_decks = list(col.cards.cdeck.unique())
2 | interesting_decks.remove("archived::physics")
3 | selected = col.cards[col.cards.cdeck.isin(interesting_decks)]
4 | axss = selected.hist(
5 | column="creps",
6 | by="cdeck",
7 | sharex=True,
8 | layout=(5, 4),
9 | figsize=(15, 15),
10 | density=True,
11 | )
12 | for axs in axss:
13 | for ax in axs:
14 | ax.set_xlabel("#Reviews")
15 | ax.set_ylabel("Count")
16 |
--------------------------------------------------------------------------------
/doc/examples/examples/repetitions_per_type.py:
--------------------------------------------------------------------------------
1 | axs = col.cards.hist(column="creps", by="ctype", layout=(1, 2), figsize=(12, 3))
2 | for ax in axs:
3 | ax.set_xlabel("#Reviews")
4 | ax.set_ylabel("Count")
5 |
--------------------------------------------------------------------------------
/doc/examples/examples/retention_distribution_vs_deck.py:
--------------------------------------------------------------------------------
1 | from __future__ import annotations
2 |
3 | import matplotlib.pyplot as plt
4 | import numpy as np
5 |
6 | ax = plt.gca()
7 | for deck in col.cards.cdeck.unique():
8 | selected = col.cards[col.cards.cdeck == deck]["civl"]
9 | if len(selected) < 1000:
10 | continue
11 | selected.plot.hist(
12 | ax=ax,
13 | label=deck,
14 | histtype="step",
15 | linewidth=2,
16 | xlim=(0, 365),
17 | bins=np.linspace(0, 365, 10),
18 | )
19 | ax.set_xlabel("Predicted retention length (review interval)")
20 | ax.set_ylabel("Number of cards")
21 | ax.set_title("Expected retention length per deck [days]")
22 | ax.legend(frameon=False)
23 |
--------------------------------------------------------------------------------
/doc/examples/examples/retention_rate_per_deck.py:
--------------------------------------------------------------------------------
1 | grouped = col.cards.groupby("cdeck")
2 | data = grouped.mean()["civl"].sort_values().tail()
3 | ax = data.plot.barh()
4 | ax.set_ylabel("Deck name")
5 | ax.set_xlabel("Average expected retention length/review interval [days]")
6 | ax.set_title("Average retention length per deck")
7 |
--------------------------------------------------------------------------------
/doc/examples/examples/reviews_vs_ease.py:
--------------------------------------------------------------------------------
1 | from __future__ import annotations
2 |
3 | import pandas as pd
4 |
5 | xs = []
6 | ys = []
7 | decks = []
8 | for deck in col.cards.cdeck.unique():
9 | selected = col.cards[col.cards["cdeck"] == deck]
10 | if len(selected) < 500:
11 | continue
12 | decks.append(deck)
13 | binned = pd.qcut(selected["creps"], 15, duplicates="drop")
14 | results = selected.groupby(binned)["civl"].mean()
15 | y = results.tolist()
16 | x = results.index.map(lambda x: x.mid).tolist()
17 | xs.append(x)
18 | ys.append(y)
19 |
20 | ax = plt.gca()
21 | for i in range(len(xs)):
22 | ax.plot(xs[i], ys[i], "o-", label=decks[i])
23 | ax.set_xlabel("#Reviews")
24 | ax.set_ylabel("Expected retention length/review interval [days]")
25 | ax.set_title("Number of reviews vs retention length")
26 | ax.legend(frameon=False)
27 |
--------------------------------------------------------------------------------
/doc/examples/loader.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 |
3 | # std
4 | from __future__ import annotations
5 |
6 | import logging
7 | import os
8 | import sys
9 | from pathlib import Path
10 |
11 | # 3rd
12 | import matplotlib.pyplot as plt
13 |
14 | # ours
15 | sys.path.insert(0, "../..")
16 | import ankipandas # noqa E402
17 | from ankipandas.util.log import get_logger # noqa E402
18 |
19 |
20 | class Loader:
21 | def __init__(self):
22 | this_dir = Path(__file__).parent
23 | self.col_path = this_dir / "col.anki2"
24 | self.examples_dir = this_dir / "examples"
25 | self.output_dir = this_dir / "plots"
26 | self.log = get_logger()
27 | self.log.setLevel(logging.DEBUG)
28 |
29 | def get_examples(self):
30 | examples = []
31 | for root, _, files in os.walk(str(self.examples_dir)):
32 | for file in files:
33 | examples.append(Path(root) / file)
34 | return examples
35 |
36 | def run_example(self, path: Path, save=True):
37 | self.log.info("Running example %s", path)
38 | col = ankipandas.Collection(self.col_path) # noqa F841
39 | with path.open("r") as example_file:
40 | exec(example_file.read())
41 | if save:
42 | out = self.output_dir.resolve() / (path.resolve().stem + ".png")
43 | self.log.info("Plotting to %s", out)
44 | plt.savefig(out, bbox_inches="tight", transparent=True, dpi=75)
45 | plt.cla()
46 | plt.clf()
47 | plt.close()
48 |
49 | def run_all(self, **kwargs):
50 | for example in self.get_examples():
51 | self.run_example(example, **kwargs)
52 |
53 |
54 | if __name__ == "__main__":
55 | loader = Loader()
56 | loader.run_all()
57 |
--------------------------------------------------------------------------------
/doc/examples/plots/leeches_per_deck.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/klieret/AnkiPandas/0b17a1870711d4adc3e9fc82bff8aac986b09f5e/doc/examples/plots/leeches_per_deck.png
--------------------------------------------------------------------------------
/doc/examples/plots/repetitions_per_deck.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/klieret/AnkiPandas/0b17a1870711d4adc3e9fc82bff8aac986b09f5e/doc/examples/plots/repetitions_per_deck.png
--------------------------------------------------------------------------------
/doc/examples/plots/repetitions_per_type.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/klieret/AnkiPandas/0b17a1870711d4adc3e9fc82bff8aac986b09f5e/doc/examples/plots/repetitions_per_type.png
--------------------------------------------------------------------------------
/doc/examples/plots/retention_distribution_vs_deck.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/klieret/AnkiPandas/0b17a1870711d4adc3e9fc82bff8aac986b09f5e/doc/examples/plots/retention_distribution_vs_deck.png
--------------------------------------------------------------------------------
/doc/examples/plots/retention_rate_per_deck.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/klieret/AnkiPandas/0b17a1870711d4adc3e9fc82bff8aac986b09f5e/doc/examples/plots/retention_rate_per_deck.png
--------------------------------------------------------------------------------
/doc/examples/plots/reviews_vs_ease.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/klieret/AnkiPandas/0b17a1870711d4adc3e9fc82bff8aac986b09f5e/doc/examples/plots/reviews_vs_ease.png
--------------------------------------------------------------------------------
/doc/faq.md:
--------------------------------------------------------------------------------
1 | Questions & Answers
2 | ===================
3 |
4 | > What do the different columns mean?
5 |
6 | Please use the `help`, `help_cols`, or `help_col` methods of the `AnkiDataFrame`
7 | object to display information about the columns.
8 |
9 | > How to get the creation time of a card/note?
10 |
11 | The IDs of the cards/notes correspond to the creation time.
12 | See [issue #112](https://github.com/klieret/AnkiPandas/issues/112) for a small
13 | code snippet to convert it to a ``datetime`` object.
14 |
15 | > Can I access deck settings (e.g., the card intake per day) from `ankipandas`?
16 |
17 | This is currently not supported by `ankipandas`. However, you can find related
18 | discussion in [issue #113](https://github.com/klieret/AnkiPandas/issues/113).
19 |
--------------------------------------------------------------------------------
/doc/index.rst:
--------------------------------------------------------------------------------
1 | AnkiPandas
2 | ==========
3 |
4 | Load your anki database as a pandas DataFrame with just one
5 | line of code!
6 |
7 | .. toctree::
8 | :maxdepth: 2
9 | :caption: Basics
10 |
11 | readme
12 | troubleshooting
13 | faq
14 |
15 | .. toctree::
16 | :maxdepth: 2
17 | :caption: Examples
18 |
19 | examples
20 | projects_with_ap
21 |
22 | .. toctree::
23 | :maxdepth: 2
24 | :caption: Module documentation
25 |
26 | collection
27 | ankidf
28 | paths
29 | raw
30 | util
31 |
--------------------------------------------------------------------------------
/doc/paths.rst:
--------------------------------------------------------------------------------
1 | Paths and Searching
2 | ^^^^^^^^^^^^^^^^^^^
3 |
4 |
5 | .. automodule:: ankipandas.paths
6 | :members:
7 | :undoc-members:
8 |
--------------------------------------------------------------------------------
/doc/projects_with_ap.rst:
--------------------------------------------------------------------------------
1 | Projects that use AnkiPandas
2 | ----------------------------
3 |
4 | .. note::
5 |
6 | To add your project, please open an `issue `_.
7 |
8 | * `cryptocoinserver/AnkiCardSimilarity.py `_ A script that checks for similar cards in a deck and adds tags to them. It uses TF-IDF and cosine similarity to calculate the similarity between cards.
9 | * `thiswillbeyourgithub/DocToolsLLM `_: AI powered Q&A on any kind of document or combination of documents, including entire anki collections, pdfs, videos, youtube playlists etc.
10 | * `thiswillbeyourgithub/Anki-Semantic-Search `_: Use AI (fastText's multilingual word vectors) to search through your anki notes!
11 |
--------------------------------------------------------------------------------
/doc/raw.rst:
--------------------------------------------------------------------------------
1 | Internals
2 | ^^^^^^^^^
3 |
4 |
5 | .. automodule:: ankipandas.raw
6 | :members:
7 | :undoc-members:
8 |
--------------------------------------------------------------------------------
/doc/requirements.txt:
--------------------------------------------------------------------------------
1 | recommonmark
2 | sphinx_book_theme
3 | git+https://github.com/klieret/AnkiPandas.git
4 |
--------------------------------------------------------------------------------
/doc/troubleshooting.rst:
--------------------------------------------------------------------------------
1 | Troubleshooting
2 | ---------------
3 |
4 | Getting help
5 | ^^^^^^^^^^^^
6 |
7 | Submit an `issue on github`_. Thank you for
8 | improving this toolkit with me!
9 |
10 | .. _issue on github: https://github.com/klieret/ankipandas/issues
11 |
12 | Common problems
13 | ^^^^^^^^^^^^^^^
14 |
15 | * **Locked database**: While Anki is running, your database will be locked and
16 | you might not be able to access it. Simply close Anki and try again. Similarly
17 | Anki might refuse to open the database if ``ankipandas`` has currently opened
18 | it (be it in a Jupyter notebook or in a currently running project).
19 |
20 | .. note::
21 |
22 | Any unlisted problem that you ran into (and solved)? Help others by bringing
23 | it to my attention_. Please check if there is already an issue created for
24 | it by going through this list_.
25 |
26 | .. _attention: https://github.com/klieret/ankipandas/issues
27 | .. _list: https://github.com/klieret/AnkiPandas/issues?q=is%3Aissue+is%3Aopen+sort%3Aupdated-desc+label%3Abug
28 |
29 |
30 | Debugging
31 | ^^^^^^^^^
32 |
33 | For better debugging, you can increase the log level of ``ankipandas``:
34 |
35 | .. code-block:: python
36 |
37 | ankipandas.set_log_level("debug")
38 |
--------------------------------------------------------------------------------
/doc/util.rst:
--------------------------------------------------------------------------------
1 | Utilities
2 | ^^^^^^^^^
3 |
4 | .. automodule:: ankipandas.util
5 |
6 | .. automodule:: ankipandas.util.log
7 | :members:
8 | :undoc-members:
9 |
10 | .. automodule:: ankipandas.util.dataframe
11 | :members:
12 | :undoc-members:
13 |
14 | .. automodule:: ankipandas.util.misc
15 | :members:
16 | :undoc-members:
17 |
18 | .. automodule:: ankipandas.util.checksum
19 | :members:
20 | :undoc-members:
21 |
--------------------------------------------------------------------------------
/misc/logo/logo_github.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/klieret/AnkiPandas/0b17a1870711d4adc3e9fc82bff8aac986b09f5e/misc/logo/logo_github.png
--------------------------------------------------------------------------------
/mlc_config.json:
--------------------------------------------------------------------------------
1 | {
2 | "ignorePatterns": [
3 | {
4 | "pattern": "https://github.com/issues?.*"
5 | }
6 | ]
7 | }
8 |
--------------------------------------------------------------------------------
/mypy.ini:
--------------------------------------------------------------------------------
1 | [mypy]
2 | ignore_missing_imports = True
3 | follow_imports = silent
4 |
--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
1 | [tool.black]
2 | line-length = 80
3 | target-version = ["py37"]
4 |
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | pandas
2 | colorlog
3 | randomfiletree
4 | numpy
5 |
--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
1 | [metadata]
2 | name = ankipandas
3 | description = Load your anki database as a pandas DataFrame with just one line of code!
4 | url = https://github.com/klieret/ankipandas
5 | version = file: ankipandas/version.txt
6 | long_description = file: README.md
7 | long_description_content_type = text/markdown
8 | license = MIT
9 | license_file = LICENSE.txt
10 | python_requires = >=3.7
11 | project_urls =
12 | Bug Tracker = https://github.com/klieret/ankipandas/issues
13 | Documentation = https://ankipandas.readthedocs.io/
14 | Source Code = https://github.com/klieret/ankipandas/
15 | keywords =
16 | anki
17 | pandas
18 | dataframe
19 | classifiers =
20 | Programming Language :: Python :: 3
21 | Programming Language :: Python :: 3.7
22 | Programming Language :: Python :: 3.8
23 | Programming Language :: Python :: 3.9
24 | Programming Language :: Python :: 3.10
25 | License :: OSI Approved :: MIT License
26 | Operating System :: OS Independent
27 | Topic :: Database
28 | Topic :: Education
29 | Topic :: Utilities
30 |
31 | [options]
32 | packages = find:
33 | include_package_data = True
34 | zip_safe = False
35 |
36 |
37 | [tool:pytest]
38 | addopts = --cov=ankipandas
39 |
--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 |
3 | """ To install this package, change to the directory of this file and run
4 |
5 | pip3 install --user .
6 |
7 | (the ``--user`` flag installs the package for your user account only, otherwise
8 | you will need administrator rights).
9 | """
10 |
11 | from __future__ import annotations
12 |
13 | # std
14 | import site
15 | import sys
16 | from pathlib import Path
17 |
18 | # noinspection PyUnresolvedReferences
19 | import setuptools
20 |
21 | # Sometimes editable install fails with an error message about user site
22 | # being not writeable. The following line can fix that, see
23 | # https://github.com/pypa/pip/issues/7953
24 | site.ENABLE_USER_SITE = "--user" in sys.argv[1:]
25 |
26 | this_dir = Path(__file__).resolve().parent
27 |
28 | setuptools.setup(
29 | install_requires=[
30 | req.strip()
31 | for req in (this_dir / "requirements.txt").read_text().splitlines()
32 | if req.strip() and not req.startswith("#")
33 | ]
34 | )
35 |
--------------------------------------------------------------------------------