├── .all-contributorsrc
├── .coveragerc
├── .flake8
├── .github
    ├── ISSUE_TEMPLATE.md
    └── workflows
    │   ├── check-links.yaml
    │   ├── codeql.yml
    │   ├── release.yaml
    │   └── testing.yaml
├── .gitignore
├── .pre-commit-config.yaml
├── .readthedocs.yaml
├── CHANGELOG.md
├── LICENSE.txt
├── MANIFEST.in
├── README.md
├── ankipandas
    ├── __init__.py
    ├── _columns.py
    ├── ankidf.py
    ├── collection.py
    ├── conftest.py
    ├── data
    │   └── anki_fields.csv
    ├── paths.py
    ├── raw.py
    ├── test
    │   ├── __init__.py
    │   ├── data
    │   │   └── few_basic_cards
    │   │   │   ├── collection.anki2
    │   │   │   └── collection_v1.anki2
    │   ├── test_ankidf.py
    │   ├── test_collection.py
    │   ├── test_paths.py
    │   ├── test_raw.py
    │   ├── test_regression.py
    │   └── util.py
    ├── util
    │   ├── __init__.py
    │   ├── checksum.py
    │   ├── dataframe.py
    │   ├── guid.py
    │   ├── log.py
    │   ├── misc.py
    │   ├── test
    │   │   ├── __init__.py
    │   │   ├── test_dataframe.py
    │   │   ├── test_log.py
    │   │   ├── test_misc.py
    │   │   └── test_types.py
    │   └── types.py
    └── version.txt
├── codespell.txt
├── doc
    ├── Makefile
    ├── ankidf.rst
    ├── collection.rst
    ├── conf.py
    ├── examples.rst
    ├── examples
    │   ├── col.anki2
    │   ├── examples
    │   │   ├── leeches_per_deck.py
    │   │   ├── repetitions_per_deck.py
    │   │   ├── repetitions_per_type.py
    │   │   ├── retention_distribution_vs_deck.py
    │   │   ├── retention_rate_per_deck.py
    │   │   └── reviews_vs_ease.py
    │   ├── loader.py
    │   └── plots
    │   │   ├── leeches_per_deck.png
    │   │   ├── repetitions_per_deck.png
    │   │   ├── repetitions_per_type.png
    │   │   ├── retention_distribution_vs_deck.png
    │   │   ├── retention_rate_per_deck.png
    │   │   └── reviews_vs_ease.png
    ├── faq.md
    ├── index.rst
    ├── paths.rst
    ├── projects_with_ap.rst
    ├── raw.rst
    ├── requirements.txt
    ├── troubleshooting.rst
    └── util.rst
├── misc
    └── logo
    │   ├── _logos.svg
    │   ├── logo.svg
    │   ├── logo_github.png
    │   ├── logo_rtd.svg
    │   └── logo_social.svg
├── mlc_config.json
├── mypy.ini
├── pyproject.toml
├── requirements.txt
├── setup.cfg
└── setup.py


/.all-contributorsrc:
--------------------------------------------------------------------------------
  1 | {
  2 |   "files": [
  3 |     "README.md"
  4 |   ],
  5 |   "imageSize": 100,
  6 |   "commit": false,
  7 |   "contributorsSortAlphabetically": true,
  8 |   "contributors": [
  9 |     {
 10 |       "login": "exc4l",
 11 |       "name": "exc4l",
 12 |       "avatar_url": "https://avatars3.githubusercontent.com/u/74188442?v=4",
 13 |       "profile": "https://github.com/exc4l",
 14 |       "contributions": [
 15 |         "bug",
 16 |         "code"
 17 |       ]
 18 |     },
 19 |     {
 20 |       "login": "CalculusAce",
 21 |       "name": "CalculusAce",
 22 |       "avatar_url": "https://avatars3.githubusercontent.com/u/42630988?v=4",
 23 |       "profile": "https://github.com/CalculusAce",
 24 |       "contributions": [
 25 |         "bug"
 26 |       ]
 27 |     },
 28 |     {
 29 |       "login": "brownbat",
 30 |       "name": "Thomas Brownback",
 31 |       "avatar_url": "https://avatars2.githubusercontent.com/u/26754?v=4",
 32 |       "profile": "http://thomasbrownback.com/",
 33 |       "contributions": [
 34 |         "bug"
 35 |       ]
 36 |     },
 37 |     {
 38 |       "login": "p4nix",
 39 |       "name": "p4nix",
 40 |       "avatar_url": "https://avatars1.githubusercontent.com/u/7038116?v=4",
 41 |       "profile": "https://github.com/p4nix",
 42 |       "contributions": [
 43 |         "bug"
 44 |       ]
 45 |     },
 46 |     {
 47 |       "login": "eumiro",
 48 |       "name": "Miroslav Šedivý",
 49 |       "avatar_url": "https://avatars0.githubusercontent.com/u/6774676?v=4",
 50 |       "profile": "https://github.com/eumiro",
 51 |       "contributions": [
 52 |         "test",
 53 |         "code"
 54 |       ]
 55 |     },
 56 |     {
 57 |       "login": "khughitt",
 58 |       "name": "Keith Hughitt",
 59 |       "avatar_url": "https://avatars.githubusercontent.com/u/125001?v=4",
 60 |       "profile": "https://github.com/khughitt",
 61 |       "contributions": [
 62 |         "bug"
 63 |       ]
 64 |     },
 65 |     {
 66 |       "login": "bollwyvl",
 67 |       "name": "Nicholas Bollweg",
 68 |       "avatar_url": "https://avatars.githubusercontent.com/u/45380?v=4",
 69 |       "profile": "https://github.com/bollwyvl",
 70 |       "contributions": [
 71 |         "code"
 72 |       ]
 73 |     },
 74 |     {
 75 |       "login": "eshrh",
 76 |       "name": "eshrh",
 77 |       "avatar_url": "https://avatars.githubusercontent.com/u/16175276?v=4",
 78 |       "profile": "http://esrh.sdf.org",
 79 |       "contributions": [
 80 |         "doc"
 81 |       ]
 82 |     },
 83 |     {
 84 |       "login": "Blocked",
 85 |       "name": "Blocked",
 86 |       "avatar_url": "https://avatars.githubusercontent.com/u/4366503?v=4",
 87 |       "profile": "https://github.com/Blocked",
 88 |       "contributions": [
 89 |         "bug"
 90 |       ]
 91 |     },
 92 |     {
 93 |       "login": "frnsys",
 94 |       "name": "Francis Tseng",
 95 |       "avatar_url": "https://avatars.githubusercontent.com/u/1059947?v=4",
 96 |       "profile": "http://frnsys.com",
 97 |       "contributions": [
 98 |         "bug",
 99 |         "code"
100 |       ]
101 |     }
102 |   ],
103 |   "contributorsPerLine": 7,
104 |   "projectName": "AnkiPandas",
105 |   "projectOwner": "klieret",
106 |   "repoType": "github",
107 |   "repoHost": "https://github.com",
108 |   "skipCi": true,
109 |   "commitConvention": "angular"
110 | }
111 | 


--------------------------------------------------------------------------------
/.coveragerc:
--------------------------------------------------------------------------------
1 | [run]
2 | omit =
3 |     setup.py
4 |     examples/*
5 |     */test/*
6 |     test_*
7 | relative_files = True
8 | 


--------------------------------------------------------------------------------
/.flake8:
--------------------------------------------------------------------------------
 1 | [flake8]
 2 | max-line-length = 80
 3 | select = C,E,F,W,B,B950
 4 | ignore = E203, E501, W503
 5 | per-file-ignores =
 6 |     doc/examples/examples/*: F821
 7 |     */__init__.py: F401
 8 | exclude =
 9 |     .git,
10 |     __pycache__,
11 |     notebooks,
12 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE.md:
--------------------------------------------------------------------------------
 1 | <summary><strong>Instructions</strong></summary>
 2 | 
 3 | First off, thanks for helping! :heart:
 4 | 
 5 | To make it easier to come up with a solution, please make sure to include
 6 | 
 7 | * Your operating system
 8 | * Please make sure that you set the logging level of ankipandas to debug directly after importing:
 9 |   `ankipandas.set_debug_log_level()`.
10 | * A minimal working example (MWE) of code that can be used to reproduce your problem (where applicable)
11 | * A clear description of what you expected and what happened instead
12 | * The complete output of all log messages of `AnkiPandas`
13 | 
14 | Also note that it might take some time before I can take a look at this.
15 | If I don't reply within a week, please ping me again (e.g. write another comment).
16 | Thanks for your patience!
17 | 
18 | <strong>You may delete these instructions from your comment.</strong>
19 | 


--------------------------------------------------------------------------------
/.github/workflows/check-links.yaml:
--------------------------------------------------------------------------------
 1 | name: Check Markdown links
 2 | 
 3 | on:
 4 |   push:
 5 |   pull_request:
 6 |   schedule:
 7 |     - cron: "0 0 1 * *"
 8 | 
 9 | jobs:
10 |   markdown-link-check:
11 |     runs-on: ubuntu-latest
12 |     steps:
13 |       - uses: actions/checkout@master
14 |       - uses: gaurav-nelson/github-action-markdown-link-check@v1
15 | 


--------------------------------------------------------------------------------
/.github/workflows/codeql.yml:
--------------------------------------------------------------------------------
 1 | name: "CodeQL"
 2 | 
 3 | on:
 4 |   push:
 5 |     branches: [ "master" ]
 6 |   pull_request:
 7 |     branches: [ "master" ]
 8 |   schedule:
 9 |     - cron: "44 5 * * 5"
10 | 
11 | jobs:
12 |   analyze:
13 |     name: Analyze
14 |     runs-on: ubuntu-latest
15 |     permissions:
16 |       actions: read
17 |       contents: read
18 |       security-events: write
19 | 
20 |     strategy:
21 |       fail-fast: false
22 |       matrix:
23 |         language: [ python ]
24 | 
25 |     steps:
26 |       - name: Checkout
27 |         uses: actions/checkout@v3
28 | 
29 |       - name: Initialize CodeQL
30 |         uses: github/codeql-action/init@v2
31 |         with:
32 |           languages: ${{ matrix.language }}
33 |           queries: +security-and-quality
34 | 
35 |       - name: Autobuild
36 |         uses: github/codeql-action/autobuild@v2
37 | 
38 |       - name: Perform CodeQL Analysis
39 |         uses: github/codeql-action/analyze@v2
40 |         with:
41 |           category: "/language:${{ matrix.language }}"
42 | 


--------------------------------------------------------------------------------
/.github/workflows/release.yaml:
--------------------------------------------------------------------------------
 1 | name: Release
 2 | 
 3 | on:
 4 |   release:
 5 |     types: [published]
 6 |   workflow_dispatch:
 7 | 
 8 | jobs:
 9 |   deploy:
10 |     runs-on: ubuntu-latest
11 | 
12 |     steps:
13 |       - uses: actions/checkout@v3
14 |         with:
15 |           fetch-depth: 0
16 | 
17 |       - name: Build sdist and wheel
18 |         run: pipx run build
19 | 
20 |       - uses: actions/upload-artifact@v4
21 |         with:
22 |           path: dist
23 | 
24 |       - name: Check products
25 |         run: pipx run twine check dist/*
26 | 
27 |       - uses: pypa/gh-action-pypi-publish@v1.6.1
28 |         if: github.event_name == 'release' && github.event.action == 'published'
29 |         with:
30 |           # Remember to generate this and set it in "GitHub Secrets"
31 |           password: ${{ secrets.pypi_password }}
32 |           verbose: true
33 | 


--------------------------------------------------------------------------------
/.github/workflows/testing.yaml:
--------------------------------------------------------------------------------
 1 | name: testing
 2 | 
 3 | on: [push, pull_request]
 4 | 
 5 | jobs:
 6 |   build:
 7 |     runs-on: ubuntu-latest
 8 | 
 9 |     steps:
10 |       - uses: actions/checkout@v2
11 |       - name: Set up Python 3.8
12 |         uses: actions/setup-python@v2
13 |         with:
14 |           python-version: 3.8
15 |       - name: Install prerequisites
16 |         run: |
17 |           python3 -m pip install -U pip wheel setuptools
18 |       - name: Build distribution
19 |         run: |
20 |           python3 setup.py sdist bdist_wheel
21 |           cd dist && sha256sum * | tee SHA256SUMS
22 |       - name: Install lint/test dependencies
23 |         run: |
24 |           python3 -m pip install flake8 pytest pytest-cover pytest-subtests
25 |       - name: Lint with flake8
26 |         run: |
27 |           # stop the build if there are Python syntax errors or undefined names
28 |           flake8 . --count --select=E9,F63,F7,F82 --ignore F821 --show-source --statistics
29 |           # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
30 |           flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
31 |       - name: Install test dependencies
32 |         run: |
33 |           python3 -m pip install -e .
34 |       - name: Test with pytest
35 |         run: |
36 |           pytest
37 |       - uses: actions/upload-artifact@v4
38 |         with:
39 |           name: ankipandas dist ${{ github.run_number }}
40 |           path: ./dist
41 |       - name: Coveralls
42 |         uses: AndreMiras/coveralls-python-action@develop
43 | 
44 |   test:
45 |     needs: [build]
46 |     runs-on: ${{ matrix.os }}-latest
47 |     strategy:
48 |       matrix:
49 |         os:  [ubuntu, windows, macos]
50 |         python-version: ['3.8', '3.9', '3.10']
51 |         include:
52 |           - python-version: '3.8'
53 |             artifact: ankipandas-*.whl
54 |           - python-version: '3.9'
55 |             artifact: ankipandas-*.tar.gz
56 |           - python-version: '3.10'
57 |             artifact: ankipandas-*.tar.gz
58 | 
59 |     steps:
60 |     - uses: actions/checkout@v2
61 |     - uses: actions/download-artifact@v4.1.7
62 |       with:
63 |         name: ankipandas dist ${{ github.run_number }}
64 |         path: ./dist
65 |     - name: Set up Python ${{ matrix.python-version }}
66 |       uses: actions/setup-python@v2
67 |       with:
68 |         python-version: ${{ matrix.python-version }}
69 |     - name: Install prerequisites
70 |       run: |
71 |         python3 -m pip install -U pip wheel
72 |     - name: Get artifact path
73 |       id: artifact
74 |       shell: bash -l {0}
75 |       run: |
76 |         cd dist
77 |         echo "::set-output name=path::$(ls ${{ matrix.artifact }})"
78 |     - name: Install package
79 |       run: |
80 |         cd dist
81 |         python3 -m pip install ${{ steps.artifact.outputs.path }}
82 |     - name: Smoke test
83 |       run: |
84 |         cd dist
85 |         python3 -m pip list
86 |         python3 -m pip check
87 |         python3 -c "import ankipandas"
88 |     - name: Install test dependencies
89 |       run: |
90 |         python3 -m pip install pytest pytest-cover pytest-subtests
91 |     - name: Test with pytest
92 |       run: |
93 |         cd dist
94 |         pytest --pyargs ankipandas --cov ankipandas --cov-report term-missing:skip-covered
95 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | todo.txt
  2 | *.swp
  3 | .idea/**
  4 | doc/_build/**
  5 | # gets copied:
  6 | doc/readme.md
  7 | .~*
  8 | 
  9 | examples/**
 10 | examples/jupyter_notebooks/tests.ipynb
 11 | 
 12 | # Created by https://www.gitignore.io/api/python
 13 | # Edit at https://www.gitignore.io/?templates=python
 14 | 
 15 | *.ipynb
 16 | 
 17 | ### Python ###
 18 | # Byte-compiled / optimized / DLL files
 19 | __pycache__/
 20 | *.py[cod]
 21 | *$py.class
 22 | 
 23 | # C extensions
 24 | *.so
 25 | 
 26 | # Distribution / packaging
 27 | .Python
 28 | build/
 29 | develop-eggs/
 30 | dist/
 31 | downloads/
 32 | eggs/
 33 | .eggs/
 34 | lib/
 35 | lib64/
 36 | parts/
 37 | sdist/
 38 | var/
 39 | wheels/
 40 | pip-wheel-metadata/
 41 | share/python-wheels/
 42 | *.egg-info/
 43 | .installed.cfg
 44 | *.egg
 45 | MANIFEST
 46 | 
 47 | # PyInstaller
 48 | #  Usually these files are written by a python script from a template
 49 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 50 | *.manifest
 51 | *.spec
 52 | 
 53 | # Installer logs
 54 | pip-log.txt
 55 | pip-delete-this-directory.txt
 56 | 
 57 | # Unit test / coverage reports
 58 | htmlcov/
 59 | .tox/
 60 | .nox/
 61 | .coverage
 62 | .coverage.*
 63 | .cache
 64 | nosetests.xml
 65 | coverage.xml
 66 | *.cover
 67 | .hypothesis/
 68 | .pytest_cache/
 69 | 
 70 | # Translations
 71 | *.mo
 72 | *.pot
 73 | 
 74 | # Django stuff:
 75 | *.log
 76 | local_settings.py
 77 | db.sqlite3
 78 | 
 79 | # Flask stuff:
 80 | instance/
 81 | .webassets-cache
 82 | 
 83 | # Scrapy stuff:
 84 | .scrapy
 85 | 
 86 | # Sphinx documentation
 87 | docs/_build/
 88 | 
 89 | # PyBuilder
 90 | target/
 91 | 
 92 | # Jupyter Notebook
 93 | .ipynb_checkpoints
 94 | 
 95 | # IPython
 96 | profile_default/
 97 | ipython_config.py
 98 | 
 99 | # pyenv
100 | .python-version
101 | 
102 | # pipenv
103 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
104 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
105 | #   having no cross-platform support, pipenv may install dependencies that don’t work, or not
106 | #   install all needed dependencies.
107 | #Pipfile.lock
108 | 
109 | # celery beat schedule file
110 | celerybeat-schedule
111 | 
112 | # SageMath parsed files
113 | *.sage.py
114 | 
115 | # Environments
116 | .env
117 | .venv
118 | env/
119 | venv/
120 | ENV/
121 | env.bak/
122 | venv.bak/
123 | 
124 | # Spyder project settings
125 | .spyderproject
126 | .spyproject
127 | 
128 | # Rope project settings
129 | .ropeproject
130 | 
131 | # mkdocs documentation
132 | /site
133 | 
134 | # mypy
135 | .mypy_cache/
136 | .dmypy.json
137 | dmypy.json
138 | 
139 | # Pyre type checker
140 | .pyre/
141 | 
142 | # End of https://www.gitignore.io/api/python
143 | 


--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
 1 | repos:
 2 |   - repo: https://github.com/psf/black
 3 |     rev: 24.10.0
 4 |     hooks:
 5 |       - id: black
 6 | 
 7 |   - repo: https://github.com/pre-commit/pre-commit-hooks
 8 |     rev: v5.0.0
 9 |     hooks:
10 |       - id: check-added-large-files
11 |       - id: check-case-conflict
12 |       - id: check-merge-conflict
13 |       - id: detect-private-key
14 |       - id: end-of-file-fixer
15 |       - id: trailing-whitespace
16 |   - repo: https://github.com/pycqa/isort
17 |     rev: 5.13.2
18 |     hooks:
19 |       - id: isort
20 |         name: isort (python)
21 |         args:
22 |           [
23 |             "--profile",
24 |             "black",
25 |             "-a",
26 |             "from __future__ import annotations",
27 |             "--append-only",
28 |           ]
29 | 
30 |   - repo: https://github.com/PyCQA/flake8
31 |     rev: "7.1.1" # pick a git hash / tag to point to
32 |     hooks:
33 |       - id: flake8
34 |         additional_dependencies: ["flake8-bugbear"]
35 |   - repo: https://github.com/pre-commit/mirrors-mypy
36 |     rev: "v1.13.0" # Use the sha / tag you want to point at
37 |     hooks:
38 |       - id: mypy
39 |         exclude: "conftest.py|^doc/examples/examples/|^doc/conf.py"
40 |   - repo: https://github.com/codespell-project/codespell
41 |     rev: "v2.3.0"
42 |     hooks:
43 |       - id: codespell
44 |         args: ["-I", "codespell.txt"]
45 |   - repo: https://github.com/asottile/pyupgrade
46 |     rev: v3.19.0
47 |     hooks:
48 |       - id: pyupgrade
49 |         args: ["--py37-plus"]
50 | 
51 | 
52 | 
53 | ci:
54 |   autoupdate_schedule: monthly
55 | 


--------------------------------------------------------------------------------
/.readthedocs.yaml:
--------------------------------------------------------------------------------
 1 | version: 2
 2 | 
 3 | build:
 4 |     os: ubuntu-22.04
 5 |     tools:
 6 |       python: "3.10"
 7 | 
 8 | sphinx:
 9 |   configuration: doc/conf.py
10 | 
11 | python:
12 |   install:
13 |     - requirements: doc/requirements.txt
14 | 


--------------------------------------------------------------------------------
/CHANGELOG.md:
--------------------------------------------------------------------------------
  1 | # Changelog
  2 | 
  3 | The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
  4 | and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
  5 | 
  6 | ## 0.3.15 -- 2023-10-11
  7 | 
  8 | ### Removed
  9 | 
 10 | - `Collection.write` now raises a `NotImplementedError` because
 11 |   https://github.com/klieret/AnkiPandas/issues/137 has so far not been
 12 |   resolved and has caused numerous problems.
 13 | 
 14 | ## 0.3.14 -- 2023-05-14
 15 | 
 16 | ### Fixed
 17 | 
 18 | - Ensured compatibility with pandas 2.0
 19 | 
 20 | ## 0.3.13 -- 2023-04-05
 21 | 
 22 | ### Fixed
 23 | 
 24 | - Require pandas < 2.0 for now because of API changes in pandas 2.0.
 25 |   See #143
 26 | 
 27 | ## 0.3.12 -- 2023-01-01
 28 | 
 29 | ### Fixed
 30 | 
 31 | - Fix future warning about `Index.__and__`
 32 | 
 33 | ## 0.3.11 -- 2022-12-29
 34 | 
 35 | ### Removed
 36 | 
 37 | - Python 3.6 has reached its end of life and is no longer supported. This is to
 38 |   ensure that we can keep the code base modern and developers happy.
 39 | 
 40 | ### Fixed
 41 | 
 42 | - When updating cards or notes, certain SQL search indices that are usually
 43 |   automatically created by Anki could be left missing. While these do not
 44 |   change any of the information in the database and can be created trivially,
 45 |   this caused Anki to refuse to load the database.
 46 | 
 47 | ### Documentation and developer happiness
 48 | 
 49 | - Various improvements to the documentation
 50 | - Work on modernizing the code base
 51 | 
 52 | ## 0.3.10 -- 2021-05-07
 53 | 
 54 | ### Fixed
 55 | 
 56 | - The SQLite connection to the database is now always closed right away by the Collection.
 57 |   This solves issues where the connection was kept open even though all collection and
 58 |   AnkiDataFrame objects were deleted (there is additional garbage collection trouble).
 59 |   Read more in [issue 75](https://github.com/klieret/AnkiPandas/issues/75)
 60 | 
 61 | ## 0.3.9 -- 2020-12-17
 62 | 
 63 | ### Deprecated
 64 | 
 65 | - Future releases will not support python 3.5 anymore. Added warning for users still on
 66 |   python 3.5.
 67 | 
 68 | ## 0.3.8 -- 2020-12-05
 69 | 
 70 | ### Fixed
 71 | 
 72 | - Setup problems with editable install as described [here](https://github.com/pypa/pip/issues/7953)
 73 | - Compatibility issues with building ankipandas on windows machines (windows is not
 74 |   using utf8 by default which often results in errors, see
 75 |   [here](https://discuss.python.org/t/pep-597-enable-utf-8-mode-by-default-on-windows/3122))
 76 | - Issues with max search depth for database search
 77 | - AttributeError when calling `merge_notes` with `inplace=True`. [Issue #51](https://github.com/klieret/AnkiPandas/issues/51)
 78 | - Default search paths might not have been working because the user name was not inserted properly
 79 | - Properly tell anki that things were changed and that it should sync its tables
 80 | 
 81 | ### Changed
 82 | 
 83 | - Improved database search on windows machines
 84 | - If no changes are detected in the different tables, the database will not be overwritten
 85 | 
 86 | ## 0.3.7 -- 2020-11-28
 87 | 
 88 | ### Fixed
 89 | 
 90 | - `merge_cards` and `merge_notes` didn't update metadata of return value, resulting in errors like
 91 |   `Unknown value of _df_format`. Issue #42
 92 | - `force` values weren't passed on, resulting in AnkiPandas refusing to do anything
 93 |   when writing out
 94 | - On Windows the int size is 32 bit even on 64 bit computers, resulting in issues with
 95 |   large values of ids. Issue #41
 96 | 
 97 | 
 98 | ## 0.3.6 - 2020-08-26
 99 | 
100 | ### Fixed
101 | 
102 | - Keep support for python 3.5
103 | 
104 | ## 0.3.5 - 2020-08-26
105 | 
106 | ### Fixed
107 | 
108 | - Support for new anki versions ([#38](https://github.com/klieret/AnkiPandas/issues/38))
109 | 
110 | ## 0.3.4 - 2020-07-09
111 | 
112 | ### Fixed
113 | 
114 | - JSONDecodeError upon loading database. Also added additional debugging output in case something like this happens again.
115 | 
116 | ## 0.3.3 - 2020-04-21
117 | 
118 | ### Fixed
119 | 
120 | - Compatibility with new pandas versions
121 | 
122 | ## 0.3.0 - 2019-06-02
123 | 
124 | ### Added
125 | 
126 | - Add new notes and cards
127 | 
128 | ### Changed
129 | 
130 | - Cards/notes/reviews tables are now initialized from a central ``Collection`` object
131 | 
132 | ### Fixed
133 | 
134 | - ``was_modified``, ``was_added`` break when user added additional columns to dataframe
135 | - Correctly set ``mod`` (modification timestamp) and ``usn`` (update sequence number) of whole database after updates
136 | 
137 | ## 0.2.1 - 2019-05-17
138 | 
139 | ### Fixed
140 | 
141 | - Merging of tables failed with some pandas versions
142 | 
143 | ## 0.2.0 - 2019-05-07
144 | 
145 | ### Added
146 | 
147 | - Modify tables and write them back into the database.
148 | 


--------------------------------------------------------------------------------
/LICENSE.txt:
--------------------------------------------------------------------------------
 1 | The MIT License (MIT)
 2 | 
 3 | Copyright (c) 2019 Kilian Lieret
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include ankipandas/version.txt
2 | include requirements.txt
3 | include CHANGELOG.md
4 | include README.md
5 | include LICENSE.txt
6 | recursive-include ankipandas/test/data/few_basic_cards *.anki2
7 | recursive-include ankipandas/data *.csv
8 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | <div align="center">
  2 | <a href="https://ankipandas.readthedocs.io/">
  3 | <img src="https://raw.githubusercontent.com/klieret/AnkiPandas/master/misc/logo/logo_github.png"></a>
  4 | <p align="center">
  5 | <em>Analyze and manipulate your Anki collection using pandas!</em>
  6 | </p>
  7 | </a>
  8 | <p>
  9 | 
 10 | [![Documentation Status](https://readthedocs.org/projects/ankipandas/badge/?version=latest)](https://ankipandas.readthedocs.io/) [![Gitter](https://img.shields.io/gitter/room/ankipandas/community.svg)](https://matrix.to/#/#AnkiPandas_community:gitter.im)
 11 | [![License](https://img.shields.io/github/license/klieret/ankipandas.svg)](https://github.com/klieret/ankipandas/blob/master/LICENSE.txt)
 12 | [![PR welcome](https://img.shields.io/badge/PR-Welcome-%23FF8300.svg)](https://git-scm.com/book/en/v2/GitHub-Contributing-to-a-Project)
 13 | 
 14 | </p>
 15 | <p>
 16 | 
 17 | [![pre-commit.ci status](https://results.pre-commit.ci/badge/github/klieret/AnkiPandas/master.svg)](https://results.pre-commit.ci/latest/github/klieret/AnkiPandas/master)
 18 | [![gh actions](https://github.com/klieret/AnkiPandas/workflows/testing/badge.svg)](https://github.com/klieret/AnkiPandas/actions) [![Coveralls](https://coveralls.io/repos/github/klieret/AnkiPandas/badge.svg?branch=master)](https://coveralls.io/github/klieret/AnkiPandas?branch=master)
 19 | [![CodeQL](https://github.com/klieret/AnkiPandas/actions/workflows/codeql.yml/badge.svg)](https://github.com/klieret/AnkiPandas/actions/workflows/codeql.yml)
 20 | [![gitmoji](https://img.shields.io/badge/gitmoji-%20😜%20😍-FFDD67.svg)](https://gitmoji.dev) [![Black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/python/black)
 21 | [![Pypi status](https://badge.fury.io/py/ankipandas.svg)](https://pypi.org/project/ankipandas/)
 22 | 
 23 | </p>
 24 | </div>
 25 | 
 26 | ## 📝 Description
 27 | 
 28 | > **Note**
 29 | > This package needs a new maintainer, as I currently do not have enough time to continue development
 30 | > of this package. Writing modifications back into the Anki database is currently disabled,
 31 | > in particular because of issue [#137](https://github.com/klieret/AnkiPandas/issues/137).
 32 | > Please reach out to me if you are interested in getting involved!
 33 | 
 34 | [Anki](https://apps.ankiweb.net/) is one of the most popular flashcard
 35 | system for spaced repetition learning,
 36 | [pandas](https://pandas.pydata.org/) is the most popular python package
 37 | for data analysis and manipulation. So what could be better than to
 38 | bring both together?
 39 | 
 40 | With `AnkiPandas` you can use `pandas` to easily analyze or manipulate
 41 | your Anki flashcards.
 42 | 
 43 | **Features**:
 44 | 
 45 | -   **Select**: Easily select arbitrary subsets of your cards, notes or
 46 |     reviews using `pandas` ([one of many
 47 |     introductions](https://medium.com/dunder-data/6fcd0170be9c),
 48 |     [official
 49 |     documentation](https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html))
 50 | -   **Visualize**: Use pandas\' powerful [built in
 51 |     tools](https://pandas.pydata.org/pandas-docs/stable/user_guide/visualization.html)
 52 |     or switch to the even more versatile
 53 |     [seaborn](https://seaborn.pydata.org/) (statistical analysis) or
 54 |     [matplotlib](https://matplotlib.org/) libraries
 55 | -   **Manipulate**: Apply fast bulk operations to the table (e.g. add
 56 |     tags, change decks, set field contents, suspend cards, \...) or
 57 |     iterate over the table and perform these manipulations step by step.
 58 |     **⚠️ This functionality is currently disabled until [#137](https://github.com/klieret/AnkiPandas/issues/137) has been resolved! ⚠️**
 59 | -   **Import and Export**: Pandas can export to (and import from) csv,
 60 |     MS Excel, HTML, JSON, \... ([io
 61 |     documentation](https://pandas.pydata.org/pandas-docs/stable/user_guide/io.html))
 62 | 
 63 | **Pros**:
 64 | 
 65 | -   **Easy installation**: Install via python package manager
 66 |     (independent of your Anki installation)
 67 | -   **Simple**: Just one line of code to get started
 68 | -   **Convenient**: Bring together information about
 69 |     [cards](https://apps.ankiweb.net/docs/manual.html#cards),
 70 |     [notes](https://apps.ankiweb.net/docs/manual.html#notes-&-fields),
 71 |     [models](https://apps.ankiweb.net/docs/manual.html#note-types),
 72 |     [decks](https://apps.ankiweb.net/docs/manual.html#decks) and more in
 73 |     just one table!
 74 | -   **Fully documented**: Documentation on [readthedocs](https://ankipandas.readthedocs.io/)
 75 | -   **Well tested**: More than 100 unit tests to keep everything in
 76 |     check
 77 | 
 78 | Alternatives: If your main goal is to add new cards, models and more,
 79 | you can also take a look at the
 80 | [genanki](https://github.com/kerrickstaley/genanki) project.
 81 | 
 82 | ## 📦 Installation
 83 | 
 84 | `AnkiPandas` is available as [pypi
 85 | package](https://pypi.org/project/ankipandas/) and can be installed or
 86 | upgrade with the [python package
 87 | manager](https://pip.pypa.io/en/stable/):
 88 | 
 89 | ```sh
 90 | pip3 install --user --upgrade ankipandas
 91 | ```
 92 | 
 93 | ### Development installation
 94 | 
 95 | For the latest development version you can also work from a cloned
 96 | version of this repository:
 97 | 
 98 | ```sh
 99 | git clone https://github.com/klieret/ankipandas/
100 | cd ankipandas
101 | pip3 install --user --upgrade --editable .
102 | ```
103 | 
104 | If you want to help develop this package further, please also install the
105 | [pre-commit](https://pre-commit.ci/) hooks and use [gitmoji](https://gitmoji.dev/):
106 | 
107 | ```sh
108 | pre-commit install
109 | gitmoji -i
110 | ```
111 | 
112 | ## 🔥 Let's get started!
113 | 
114 | Starting up is as easy as this:
115 | 
116 | ```python
117 | from ankipandas import Collection
118 | 
119 | col = Collection()
120 | ```
121 | 
122 | And `col.notes` will be dataframe containing all notes, with additional
123 | methods that make many things easy. Similarly, you can access cards or
124 | reviews using `col.cards` or `col.revs`.
125 | 
126 | If called without any argument `Collection()` tries to find your Anki
127 | database by itself. However this might take some time. To make it
128 | easier, simply supply (part of) the path to the database and (if you
129 | have more than one user) your Anki user name, e.g.
130 | `Collection(".local/share/Anki2/", user="User 1")` on many Linux
131 | installations.
132 | 
133 | To get information about the interpretation of each column, use
134 | `print(col.notes.help_cols())`.
135 | 
136 | Take a look at the [documentation](https://ankipandas.readthedocs.io/)
137 | to find out more about more about the available methods!
138 | 
139 | Some basic examples:
140 | 
141 | ## 📈 Analysis
142 | 
143 | **More examples**: [Analysis
144 | documentation](https://ankipandas.readthedocs.io/en/latest/examples.html),
145 | [projects that use `AnkiPandas`](https://ankipandas.readthedocs.io/en/latest/projects_with_ap.html).
146 | 
147 | Show a histogram of the number of reviews (repetitions) of each card for
148 | all decks:
149 | 
150 | ```python
151 | col.cards.hist(column="creps", by="cdeck")
152 | ```
153 | 
154 | Show the number of leeches per deck as pie chart:
155 | 
156 | ```python
157 | cards = col.cards.merge_notes()
158 | selection = cards[cards.has_tag("leech")]
159 | selection["cdeck"].value_counts().plot.pie()
160 | ```
161 | 
162 | Find all notes of model `MnemoticModel` with empty `Mnemotic` field:
163 | 
164 | ```python
165 | notes = col.notes.fields_as_columns()
166 | notes.query("model=='MnemoticModel' and 'Mnemotic'==''")
167 | ```
168 | 
169 | ## 🛠️ Manipulations
170 | 
171 | > **Warning**
172 | > Writing the database has currently been disabled until
173 | > [#137](https://github.com/klieret/AnkiPandas/issues/137) has been resolved.
174 | > Help is much appreciated!
175 | 
176 | > **Warning**
177 | > **Please be careful and test this well!**
178 | > Ankipandas will create a backup of your database before writing, so you can always restore the previous state. Please make sure that everything is working before continuing to use Anki normally!
179 | 
180 | Add the `difficult-japanese` and `marked` tag to all notes that contain
181 | the tags `Japanese` and `leech`:
182 | 
183 | ```python
184 | notes = col.notes
185 | selection = notes[notes.has_tags(["Japanese", "leech"])]
186 | selection = selection.add_tag(["difficult-japanese", "marked"])
187 | col.notes.update(selection)
188 | col.write(modify=True)  # Overwrites your database after creating a backup!
189 | ```
190 | 
191 | Set the `language` field to `English` for all notes of model
192 | `LanguageModel` that are tagged with `English`:
193 | 
194 | ```python
195 | notes = col.notes
196 | selection = notes[notes.has_tag(["English"])].query("model=='LanguageModel'").copy()
197 | selection.fields_as_columns(inplace=True)
198 | selection["language"] = "English"
199 | col.notes.update(selection)
200 | col.write(modify=True)
201 | ```
202 | 
203 | Move all cards tagged `leech` to the deck `Leeches Only`:
204 | 
205 | ```python
206 | cards = col.cards
207 | selection = cards[cards.has_tag("leech")]
208 | selection["cdeck"] = "Leeches Only"
209 | col.cards.update(selection)
210 | col.write(modify=True)
211 | ```
212 | 
213 | ## 🐞 Troubleshooting
214 | 
215 | See the [troubleshooting section in the
216 | documentation](https://ankipandas.readthedocs.io/en/latest/troubleshooting.html).
217 | 
218 | ## 💖 Contributing
219 | 
220 | Your help is greatly appreciated! Suggestions, bug reports and feature
221 | requests are best opened as [github
222 | issues](https://github.com/klieret/ankipandas/issues). You could also
223 | first discuss in the [gitter
224 | community](https://matrix.to/#/#AnkiPandas_community:gitter.im). If you want to code
225 | something yourself, you are very welcome to submit a [pull
226 | request](https://github.com/klieret/AnkiPandas/pulls)!
227 | 
228 | Bug reports and pull requests are credited with the help of the [allcontributors bot](https://allcontributors.org/).
229 | 
230 | ## 📃 License & Disclaimer
231 | 
232 | This software is licenced under the [MIT
233 | license](https://github.com/klieret/ankipandas/blob/master/LICENSE.txt)
234 | and (despite best testing efforts) comes **without any warranty**. The
235 | logo is inspired by the [Anki
236 | logo](https://github.com/ankitects/anki/blob/main/qt/aqt/data/web/imgs/anki-logo-thin.png)
237 | ([license](https://github.com/ankitects/anki/blob/main/LICENSE)) and
238 | the logo of the pandas package
239 | ([license2](https://github.com/pandas-dev/pandas/blob/main/LICENSE)).
240 | This library and its author(s) are not affiliated/associated with the
241 | main Anki or pandas project in any way.
242 | 
243 | ## ✨ Contributors
244 | 
245 | Thanks goes to these wonderful people ([emoji key](https://allcontributors.org/docs/en/emoji-key)):
246 | 
247 | <!-- ALL-CONTRIBUTORS-LIST:START - Do not remove or modify this section -->
248 | <!-- prettier-ignore-start -->
249 | <!-- markdownlint-disable -->
250 | <table>
251 |   <tbody>
252 |     <tr>
253 |       <td align="center"><a href="https://github.com/Blocked"><img src="https://avatars.githubusercontent.com/u/4366503?v=4?s=100" width="100px;" alt="Blocked"/><br /><sub><b>Blocked</b></sub></a><br /><a href="https://github.com/klieret/AnkiPandas/issues?q=author%3ABlocked" title="Bug reports">🐛</a></td>
254 |       <td align="center"><a href="https://github.com/CalculusAce"><img src="https://avatars3.githubusercontent.com/u/42630988?v=4?s=100" width="100px;" alt="CalculusAce"/><br /><sub><b>CalculusAce</b></sub></a><br /><a href="https://github.com/klieret/AnkiPandas/issues?q=author%3ACalculusAce" title="Bug reports">🐛</a></td>
255 |       <td align="center"><a href="http://frnsys.com"><img src="https://avatars.githubusercontent.com/u/1059947?v=4?s=100" width="100px;" alt="Francis Tseng"/><br /><sub><b>Francis Tseng</b></sub></a><br /><a href="https://github.com/klieret/AnkiPandas/issues?q=author%3Afrnsys" title="Bug reports">🐛</a> <a href="https://github.com/klieret/AnkiPandas/commits?author=frnsys" title="Code">💻</a></td>
256 |       <td align="center"><a href="https://github.com/khughitt"><img src="https://avatars.githubusercontent.com/u/125001?v=4?s=100" width="100px;" alt="Keith Hughitt"/><br /><sub><b>Keith Hughitt</b></sub></a><br /><a href="https://github.com/klieret/AnkiPandas/issues?q=author%3Akhughitt" title="Bug reports">🐛</a></td>
257 |       <td align="center"><a href="https://github.com/eumiro"><img src="https://avatars0.githubusercontent.com/u/6774676?v=4?s=100" width="100px;" alt="Miroslav Šedivý"/><br /><sub><b>Miroslav Šedivý</b></sub></a><br /><a href="https://github.com/klieret/AnkiPandas/commits?author=eumiro" title="Tests">⚠️</a> <a href="https://github.com/klieret/AnkiPandas/commits?author=eumiro" title="Code">💻</a></td>
258 |       <td align="center"><a href="https://github.com/bollwyvl"><img src="https://avatars.githubusercontent.com/u/45380?v=4?s=100" width="100px;" alt="Nicholas Bollweg"/><br /><sub><b>Nicholas Bollweg</b></sub></a><br /><a href="https://github.com/klieret/AnkiPandas/commits?author=bollwyvl" title="Code">💻</a></td>
259 |       <td align="center"><a href="http://thomasbrownback.com/"><img src="https://avatars2.githubusercontent.com/u/26754?v=4?s=100" width="100px;" alt="Thomas Brownback"/><br /><sub><b>Thomas Brownback</b></sub></a><br /><a href="https://github.com/klieret/AnkiPandas/issues?q=author%3Abrownbat" title="Bug reports">🐛</a></td>
260 |     </tr>
261 |     <tr>
262 |       <td align="center"><a href="http://esrh.sdf.org"><img src="https://avatars.githubusercontent.com/u/16175276?v=4?s=100" width="100px;" alt="eshrh"/><br /><sub><b>eshrh</b></sub></a><br /><a href="https://github.com/klieret/AnkiPandas/commits?author=eshrh" title="Documentation">📖</a></td>
263 |       <td align="center"><a href="https://github.com/exc4l"><img src="https://avatars3.githubusercontent.com/u/74188442?v=4?s=100" width="100px;" alt="exc4l"/><br /><sub><b>exc4l</b></sub></a><br /><a href="https://github.com/klieret/AnkiPandas/issues?q=author%3Aexc4l" title="Bug reports">🐛</a> <a href="https://github.com/klieret/AnkiPandas/commits?author=exc4l" title="Code">💻</a></td>
264 |       <td align="center"><a href="https://github.com/p4nix"><img src="https://avatars1.githubusercontent.com/u/7038116?v=4?s=100" width="100px;" alt="p4nix"/><br /><sub><b>p4nix</b></sub></a><br /><a href="https://github.com/klieret/AnkiPandas/issues?q=author%3Ap4nix" title="Bug reports">🐛</a></td>
265 |     </tr>
266 |   </tbody>
267 | </table>
268 | 
269 | <!-- markdownlint-restore -->
270 | <!-- prettier-ignore-end -->
271 | 
272 | <!-- ALL-CONTRIBUTORS-LIST:END -->
273 | 
274 | This project follows the [all-contributors](https://github.com/all-contributors/all-contributors) specification. Contributions of any kind welcome!
275 | 


--------------------------------------------------------------------------------
/ankipandas/__init__.py:
--------------------------------------------------------------------------------
 1 | # ours
 2 | from __future__ import annotations
 3 | 
 4 | import ankipandas.raw
 5 | import ankipandas.util
 6 | from ankipandas.ankidf import AnkiDataFrame
 7 | from ankipandas.collection import Collection
 8 | from ankipandas.paths import db_path_input, find_db
 9 | from ankipandas.util.log import log, set_debug_log_level, set_log_level
10 | 


--------------------------------------------------------------------------------
/ankipandas/_columns.py:
--------------------------------------------------------------------------------
  1 | # std
  2 | from __future__ import annotations
  3 | 
  4 | import copy
  5 | from pathlib import Path
  6 | from typing import Any
  7 | 
  8 | import numpy as np
  9 | 
 10 | # 3rd
 11 | import pandas as pd
 12 | 
 13 | # ours
 14 | from ankipandas.util.misc import invert_dict
 15 | 
 16 | # todo: Docstrings, cleanup
 17 | 
 18 | 
 19 | tables_ours2anki = {"revs": "revlog", "cards": "cards", "notes": "notes"}
 20 | tables_anki2ours = invert_dict(tables_ours2anki)
 21 | 
 22 | fields_file = Path(__file__).parent / "data" / "anki_fields.csv"
 23 | fields_df = pd.read_csv(fields_file)
 24 | 
 25 | #: Maps table type to name of the index. E.g. the index of the notes is called
 26 | #: nid.
 27 | table2index = {"cards": "cid", "notes": "nid", "revs": "rid"}
 28 | 
 29 | our_tables = sorted(tables_ours2anki)
 30 | our_columns = {
 31 |     table: sorted(
 32 |         fields_df[(fields_df["Table"] == table) & fields_df["Default"]][
 33 |             "Column"
 34 |         ].unique()
 35 |     )
 36 |     for table in our_tables
 37 | }
 38 | # Remove indices
 39 | for table, columns in our_columns.items():
 40 |     columns.remove(table2index[table])
 41 | 
 42 | # hard code this here, because order is important
 43 | anki_columns = {
 44 |     "cards": [
 45 |         "id",
 46 |         "nid",
 47 |         "did",
 48 |         "ord",
 49 |         "mod",
 50 |         "usn",
 51 |         "type",
 52 |         "queue",
 53 |         "due",
 54 |         "ivl",
 55 |         "factor",
 56 |         "reps",
 57 |         "lapses",
 58 |         "left",
 59 |         "odue",
 60 |         "odid",
 61 |         "flags",
 62 |         "data",
 63 |     ],
 64 |     "notes": [
 65 |         "id",
 66 |         "guid",
 67 |         "mid",
 68 |         "mod",
 69 |         "usn",
 70 |         "tags",
 71 |         "flds",
 72 |         "sfld",
 73 |         "csum",
 74 |         "flags",
 75 |         "data",
 76 |     ],
 77 |     "revs": [
 78 |         "id",
 79 |         "cid",
 80 |         "usn",
 81 |         "ease",
 82 |         "ivl",
 83 |         "lastIvl",
 84 |         "factor",
 85 |         "time",
 86 |         "type",
 87 |     ],
 88 | }
 89 | 
 90 | columns_ours2anki = {
 91 |     table: dict(
 92 |         zip(
 93 |             fields_df[(fields_df["Table"] == table) & fields_df["Native"]][
 94 |                 "Column"
 95 |             ],
 96 |             fields_df[(fields_df["Table"] == table) & fields_df["Native"]][
 97 |                 "AnkiColumn"
 98 |             ],
 99 |         )
100 |     )
101 |     for table in our_tables
102 | }
103 | 
104 | 
105 | columns_anki2ours = {
106 |     table: invert_dict(columns_ours2anki[table]) for table in our_tables
107 | }
108 | 
109 | value_maps = {
110 |     "cards": {
111 |         "cqueue": {
112 |             -3: "sched buried",
113 |             -2: "user buried",
114 |             -1: "suspended",
115 |             0: "new",
116 |             1: "learning",
117 |             2: "due",
118 |             3: "in learning",
119 |         },
120 |         "ctype": {0: "learning", 1: "review", 2: "relearn", 3: "cram"},
121 |     },
122 |     "revs": {"rtype": {0: "learning", 1: "review", 2: "relearn", 3: "cram"}},
123 | }
124 | 
125 | dtype_casts: dict[str, dict[str, Any]] = {
126 |     "notes": {},
127 |     "cards": {},
128 |     "revs": {},
129 | }
130 | 
131 | # todo: more precise?
132 | dtype_casts_back: dict[str, dict[str, Any]] = {
133 |     "notes": {},
134 |     "cards": {},
135 |     "revs": {},
136 | }
137 | 
138 | # Avoiding problem with ints to floats such as here:
139 | # https://github.com/pandas-dev/pandas/issues/4094
140 | # Also be careful with platform dependent length of the int type, else this
141 | # causes this error https://stackoverflow.com/questions/38314118/
142 | # on Windows machines as stated by CalculusAce in
143 | # https://github.com/klieret/AnkiPandas/issues/41
144 | dtype_casts2 = {
145 |     "cards": {
146 |         "cord": np.int64,
147 |         "cmod": np.int64,
148 |         "cusn": np.int64,
149 |         "cdue": np.int64,
150 |         "civl": np.int64,
151 |         "cfactor": np.int64,
152 |         "creps": np.int64,
153 |         "clapses": np.int64,
154 |         "cleft": np.int64,
155 |         "codue": np.int64,
156 |     },
157 |     "notes": {"nmod": np.int64, "nusn": np.int64},
158 |     "revs": {
159 |         "cid": np.int64,
160 |         "rusn": np.int64,
161 |         "rease": np.int64,
162 |         "ivl": np.int64,
163 |         "lastivl": np.int64,
164 |         "rfactor": np.int64,
165 |         "rtime": np.int64,
166 |     },
167 | }
168 | dtype_casts_all = copy.deepcopy(dtype_casts2["cards"])
169 | dtype_casts_all.update(dtype_casts2["notes"])
170 | dtype_casts_all.update(dtype_casts2["revs"])
171 | 


--------------------------------------------------------------------------------
/ankipandas/collection.py:
--------------------------------------------------------------------------------
  1 | # std
  2 | from __future__ import annotations
  3 | 
  4 | import sqlite3
  5 | import time
  6 | from contextlib import closing
  7 | from pathlib import Path, PurePath
  8 | from typing import Any
  9 | 
 10 | # ours
 11 | import ankipandas.paths
 12 | import ankipandas.raw as raw
 13 | from ankipandas.ankidf import AnkiDataFrame
 14 | from ankipandas.util.log import log
 15 | 
 16 | 
 17 | class Collection:
 18 |     def __init__(self, path=None, user=None):
 19 |         """Initialize :class:`~ankipandas.collection.Collection` object.
 20 | 
 21 |         Args:
 22 |             path: (Search) path to database. See
 23 |                 :py:func:`~ankipandas.paths.db_path_input` for more
 24 |                 information.
 25 |             user: Anki user name. See
 26 |                 :py:func:`~ankipandas.paths.db_path_input` for more
 27 |                 information.
 28 | 
 29 |         Examples:
 30 | 
 31 |         .. code-block:: python
 32 | 
 33 |             from ankipandas import Collection
 34 | 
 35 |             # Let ankipandas find the db for you
 36 |             col = Collection()
 37 | 
 38 |             # Let ankipandas find the db for this user (important if you have
 39 |             # more than one user account in Anki)
 40 |             col = Collection(user="User 1")
 41 | 
 42 |             # Specify full path to Anki's database
 43 |             col = Collection("/full/path/to/collection.anki2")
 44 | 
 45 |             # Specify partial path to Anki's database and specify user
 46 |             col = Collection("/partial/path/to/collection", user="User 1")
 47 | 
 48 |         """
 49 |         path = ankipandas.paths.db_path_input(path, user=user)
 50 | 
 51 |         #: Path to currently loaded database
 52 |         self._path: Path = path
 53 | 
 54 |         #: Should be accessed with _get_item!
 55 |         self.__items: dict[str, AnkiDataFrame | None] = {
 56 |             "notes": None,
 57 |             "cards": None,
 58 |             "revs": None,
 59 |         }
 60 | 
 61 |         #: Should be accessed with _get_original_item!
 62 |         self.__original_items: dict[str, AnkiDataFrame | None] = {
 63 |             "notes": None,
 64 |             "cards": None,
 65 |             "revs": None,
 66 |         }
 67 | 
 68 |     @property
 69 |     def path(self) -> Path:
 70 |         """Path to currently loaded database"""
 71 |         return self._path
 72 | 
 73 |     @property
 74 |     def db(self) -> sqlite3.Connection:
 75 |         """Opened Anki database. Make sure to call `db.close()` after you're
 76 |         done. Better still, use `contextlib.closing`.
 77 |         """
 78 |         log.debug(f"Opening Db from {self._path}")
 79 |         return raw.load_db(self._path)
 80 | 
 81 |     def _get_original_item(self, item):
 82 |         r = self.__original_items[item]
 83 |         if r is None:
 84 |             if item in ["notes", "revs", "cards"]:
 85 |                 r = AnkiDataFrame.init_with_table(self, item)
 86 |                 self.__original_items[item] = r
 87 |         return r
 88 | 
 89 |     def _get_item(self, item):
 90 |         r = self.__items[item]
 91 |         if r is None:
 92 |             r = self._get_original_item(item).copy(True)
 93 |             self.__items[item] = r
 94 |         return r
 95 | 
 96 |     @property
 97 |     def notes(self) -> AnkiDataFrame:
 98 |         """Notes as :class:`ankipandas.ankidf.AnkiDataFrame`."""
 99 |         return self._get_item("notes")
100 | 
101 |     @notes.setter
102 |     def notes(self, value):
103 |         self.__items["notes"] = value
104 | 
105 |     @property
106 |     def cards(self) -> AnkiDataFrame:
107 |         """Cards as :class:`ankipandas.ankidf.AnkiDataFrame`."""
108 |         return self._get_item("cards")
109 | 
110 |     @cards.setter
111 |     def cards(self, value):
112 |         self.__items["cards"] = value
113 | 
114 |     @property
115 |     def revs(self) -> AnkiDataFrame:
116 |         """Reviews as :class:`ankipandas.ankidf.AnkiDataFrame`."""
117 |         return self._get_item("revs")
118 | 
119 |     @revs.setter
120 |     def revs(self, value):
121 |         self.__items["revs"] = value
122 | 
123 |     def empty_notes(self):
124 |         """Similar :class:`ankipandas.ankidf.AnkiDataFrame`
125 |         to :attr:`notes`, but without any rows."""
126 |         return AnkiDataFrame.init_with_table(self, "notes", empty=True)
127 | 
128 |     def empty_cards(self):
129 |         """Similar :class:`ankipandas.ankidf.AnkiDataFrame`
130 |         to :attr:`cards`, but without any rows."""
131 |         return AnkiDataFrame.init_with_table(self, "cards", empty=True)
132 | 
133 |     def empty_revs(self):
134 |         """Similar :class:`ankipandas.ankidf.AnkiDataFrame`
135 |         to :attr:`revs`, but without any rows."""
136 |         return AnkiDataFrame.init_with_table(self, "revs", empty=True)
137 | 
138 |     def summarize_changes(self, output="print") -> dict[str, dict] | None:
139 |         """Summarize changes that were made with respect to the table
140 |         as loaded from the database.
141 |         If notes/cards/etc. were not loaded at all (and hence also definitely
142 |         not modified), they do not appear in the output.
143 | 
144 |         Args:
145 |             output: Output mode: 'print' (default: print)
146 |                 or 'dict' (return as dictionary of dictionaries of format
147 |                 ``{<type (cards/notes/...)>: {<key>: <value>}}``.
148 | 
149 |         Returns:
150 |             None or dictionary of dictionaries
151 |         """
152 |         if output == "dict":
153 |             as_dict = {}
154 |             for key, value in self.__items.items():
155 |                 if value is not None:
156 |                     changes = value.summarize_changes(output="dict")
157 |                     as_dict[key] = changes  # type: ignore
158 |             return as_dict  # type: ignore
159 |         elif output == "print":
160 |             for key, value in self.__items.items():
161 |                 if value is not None:
162 |                     print(f"======== {key} ========")
163 |                     value.summarize_changes()
164 |             return None  # explicit for mypy
165 |         else:
166 |             raise ValueError(f"Invalid output setting: {output}")
167 | 
168 |     def _prepare_write_data(
169 |         self, modify=False, add=False, delete=False
170 |     ) -> dict[str, Any]:
171 |         prepared = {}
172 |         for key, value in self.__items.items():
173 |             if value is None:
174 |                 log.debug("Write: Skipping %s, because it's None.", key)
175 |                 continue
176 |             if key in ["notes", "cards", "revs"]:
177 |                 ndeleted = len(value.was_deleted())
178 |                 nmodified = sum(value.was_modified(na=False))
179 |                 nadded = sum(value.was_added())
180 | 
181 |                 if not delete and ndeleted:
182 |                     raise ValueError(
183 |                         "You specified delete=False, but {} rows of item "
184 |                         "{} would be deleted.".format(ndeleted, key)
185 |                     )
186 |                 if not modify and nmodified:
187 |                     raise ValueError(
188 |                         "You specified modify=False, but {} rows of item "
189 |                         "{} would be modified.".format(nmodified, key)
190 |                     )
191 |                 if not add and nadded:
192 |                     raise ValueError(
193 |                         "You specified add=False, but {} rows of item "
194 |                         "{} would be modified.".format(nadded, key)
195 |                     )
196 | 
197 |                 if not ndeleted and not nmodified and not nadded:
198 |                     log.debug(
199 |                         "Skipping table %s for writing, because nothing "
200 |                         "seemed to have changed",
201 |                         key,
202 |                     )
203 |                     continue
204 | 
205 |                 mode = "replace"
206 |                 if modify and not add and not delete:
207 |                     mode = "update"
208 |                 if add and not modify and not delete:
209 |                     mode = "append"
210 |                 log.debug("Will update table %s with mode %s", key, mode)
211 |                 value.check_table_integrity()
212 |                 raw_table = value.raw()
213 |                 prepared[key] = {"raw": raw_table, "mode": mode}
214 | 
215 |         return prepared
216 | 
217 |     def _get_and_update_info(self) -> dict[str, Any]:
218 |         with closing(self.db) as db:
219 |             info = raw.get_info(db)
220 | 
221 |             info_updates = dict(
222 |                 mod=int(time.time() * 1000),  # Modification time stamp
223 |                 usn=-1,  # Signals update needed
224 |             )
225 |             if raw.get_db_version(db) == 0:
226 |                 for key in info_updates:
227 |                     assert key in info
228 |                 info.update(info_updates)
229 |             elif raw.get_db_version(db) == 1:
230 |                 assert len(info) == 1
231 |                 first_key = list(info)[0]
232 |                 info[first_key].update(info_updates)
233 |             # fixme: this currently doesn't work. In the new db structure there's
234 |             #   a tags table instead of a field, but it doesn't seem to be
235 |             #   used.
236 |             # if self.__items["notes"] is not None:
237 |             #
238 |             #     missing_tags = list(
239 |             #         set(info["tags"].keys())
240 |             #         - set(self.__items["notes"].list_tags())
241 |             #     )
242 |             #     for tag in missing_tags:
243 |             #         # I'm assuming that this is the usn (update sequence number)
244 |             #         # of the tags
245 |             #         info["tags"][tag] = -1
246 |         return info
247 | 
248 |     def write(
249 |         self,
250 |         modify=False,
251 |         add=False,
252 |         delete=False,
253 |         backup_folder: PurePath | str | None = None,
254 |         _override_exception=False,
255 |     ):
256 |         """Creates a backup of the database and then writes back the new
257 |         data.
258 | 
259 |         .. danger::
260 | 
261 |             The write capabilities of ``AnkiPandas`` have currently been disabled
262 |             because of `#137 <https://github.com/klieret/AnkiPandas/issues/137/>`_.
263 |             Help in fixing this issue would be greatly appreciated!
264 | 
265 |         .. danger::
266 | 
267 |             The switches ``modify``, ``add`` and ``delete`` will run additional
268 |             cross-checks, but do not rely on them to 100%!
269 | 
270 |         .. warning::
271 | 
272 |             It is recommended to run :meth:`summarize_changes` before to check
273 |             whether the changes match your expectation.
274 | 
275 |         .. note::
276 | 
277 |             Please make sure to thoroughly check your collection in Anki after
278 |             every write process!
279 | 
280 |         Args:
281 |             modify: Allow modification of existing items (notes, cards, etc.)
282 |             add: Allow adding of new items (notes, cards, etc.)
283 |             delete: Allow deletion of items (notes, cards, etc.)
284 |             backup_folder: Path to backup folder. If None is given, the backup
285 |                 is created in the Anki backup directory (if found).
286 | 
287 |         Returns:
288 |             None
289 |         """
290 |         if not _override_exception:
291 |             raise NotImplementedError(
292 |                 "The write capabilities of AnkiPandas have currently been disabled"
293 |                 " because of https://github.com/klieret/AnkiPandas/issues/137/. "
294 |                 "Help in fixing this issue would be greatly appreciated!"
295 |             )
296 | 
297 |         if not modify and not add and not delete:
298 |             log.warning(
299 |                 "Please set modify=True, add=True or delete=True, you're"
300 |                 " literally not allowing me any modification at all."
301 |             )
302 |             return None
303 | 
304 |         try:
305 |             prepared = self._prepare_write_data(
306 |                 modify=modify, add=add, delete=delete
307 |             )
308 |             log.debug("Now getting & updating info.")
309 |             self._get_and_update_info()
310 |         except Exception as e:
311 |             log.critical(
312 |                 "Something went wrong preparing the data for writing. "
313 |                 "However, no data has been written out, so your "
314 |                 "database is save!"
315 |             )
316 |             raise e
317 |         else:
318 |             log.debug("Successfully prepared data for writing.")
319 | 
320 |         if prepared == {}:
321 |             log.warning(
322 |                 "Nothing seems to have been changed. Will not do anything!"
323 |             )
324 |             return None
325 | 
326 |         backup_path = ankipandas.paths.backup_db(
327 |             self.path, backup_folder=backup_folder
328 |         )
329 |         log.info("Backup created at %s", backup_path.resolve())
330 |         log.warning(
331 |             "Currently AnkiPandas might not be able to tell Anki to"
332 |             " sync its database. "
333 |             "You might have to manually tell Anki to sync everything "
334 |             "to AnkiDroid.\n"
335 |             "Furthermore, if you run into issues with tag searches not working"
336 |             "anymore, please first do Notes > Clear unused notes and then "
337 |             "Tools > Check Database (from the main menu). This should get them"
338 |             " to work (sorry about this issue)."
339 |         )
340 | 
341 |         # Actually setting values here, after all conversion tasks have been
342 |         # carried out. That way if any of them fails, we don't have a
343 |         # partially written collection.
344 |         log.debug("Now actually writing to database.")
345 |         try:
346 |             for table, values in prepared.items():
347 |                 log.debug("Now setting table %s.", table)
348 |                 with closing(self.db) as db:
349 |                     raw.set_table(
350 |                         db, values["raw"], table=table, mode=values["mode"]
351 |                     )
352 |                     log.debug("Setting table %s successful.", table)
353 | 
354 |                     if table == "cards":
355 |                         raw.update_card_indices(db)
356 | 
357 |                     elif table == "notes":
358 |                         raw.update_note_indices(db)
359 | 
360 |             # log.debug("Now setting info")
361 |             # raw.set_info(self.db, info)
362 |             # log.debug("Setting info successful.")
363 |         except Exception as e:
364 |             log.critical(
365 |                 "Error while writing data to database at %s. "
366 |                 "This means that your database might have become corrupted. "
367 |                 "It's STRONGLY advised that you manually restore the database "
368 |                 "by replacing it with the backup from %s and restart"
369 |                 " from scratch. "
370 |                 "Please also open a bug report at "
371 |                 "https://github.com/klieret/AnkiPandas/issues/, as errors "
372 |                 "during the actual writing process should never occur!",
373 |                 self.path.resolve(),
374 |                 backup_path.resolve(),
375 |             )
376 |             raise e
377 |         # This is important, because we have written to the database but still
378 |         # have it opened, which can cause DatabaseErrors.
379 |         log.debug("I will now drop all copies of the tables")
380 |         for key in self.__items:
381 |             self.__items[key] = None
382 |         for key in self.__original_items:
383 |             self.__original_items[key] = None
384 |         log.debug("I will now reload the connection.")
385 |         self._db = raw.load_db(self.path)
386 |         log.info(
387 |             "In case you're running this from a Jupyter notebook, make "
388 |             "sure to shutdown the kernel or delete all ankipandas objects"
389 |             " before you open anki to take a look at the result (only one "
390 |             "application can use the database at a time)."
391 |         )
392 | 


--------------------------------------------------------------------------------
/ankipandas/conftest.py:
--------------------------------------------------------------------------------
1 | from __future__ import annotations
2 | 
3 | from ankipandas.util.log import set_debug_log_level
4 | 
5 | set_debug_log_level()
6 | 


--------------------------------------------------------------------------------
/ankipandas/data/anki_fields.csv:
--------------------------------------------------------------------------------
 1 | Column,AnkiColumn,Table,Description,Native,Default
 2 | cdata,data,cards,Currently unused,TRUE,FALSE
 3 | cdeck,,cards,Name of the current deck,FALSE,TRUE
 4 | cdue,due,cards,"Due is used differently for different card types: new: note id or random int, due: integer day, relative to the collection's creation time, learning: integer timestamp.",TRUE,TRUE
 5 | cfactor,factor,cards,"The new ease factor of the card in permille (parts per thousand). If the ease factor is 2500, the card’s interval will be multiplied by 2.5 the next time you press Good.",TRUE,TRUE
 6 | cflags,flags,cards,Currently unused,TRUE,FALSE
 7 | cid,cid,revs,ID of the card (this corresponds to a epoch millisecond time stamp from the creating time),TRUE,TRUE
 8 | cid,id,cards,ID of the card (this corresponds to a epoch millisecond time stamp from the creating time),TRUE,TRUE
 9 | civl,ivl,cards,"The new interval that the card was pushed to after the review. Positive values are in days, negative values are in seconds (for learning cards).",TRUE,TRUE
10 | clapses,lapses,cards,"The number of times the card went from a ""was answered correctly"" to ""was answered incorrectly"" state",TRUE,TRUE
11 | cleft,left,cards,"Of the form ``a*1000+b``, with: ``b`` the number of reps left till graduation and ``a`` the number of reps left today",TRUE,TRUE
12 | cmod,mod,cards,Modification time [epoch seconds],TRUE,TRUE
13 | codeck,,cards,Original deck: only used when the card is currently in filtered deck,FALSE,TRUE
14 | codid,odid,cards,Original did: only used when the card is currently in filtered deck,TRUE,FALSE
15 | codue,odue,cards,Original due: only used when the card is currently in filtered deck,TRUE,TRUE
16 | cord,ord,cards,Identifies which of the card templates the card corresponds to. Valid values are from 0 to num templates -1,TRUE,TRUE
17 | cqueue,queue,cards,"Can take the value ‘sched buried’, ‘user buried’, ‘suspended’, ‘new’, ‘learning’, ‘due’, ‘in learning’ (learning but next rev at least a day after the previous review). In the raw Anki database, these values are encoded as -3=sched buried, -2=user buried, -1=suspended, 0=new, 1=learning, 2=due (as for type), 3=in learning.",TRUE,TRUE
18 | creps,reps,cards,Number of reviews,TRUE,TRUE
19 | ctype,type,cards,"Can take the values ‘learning’, ‘review’, ‘relearn’, ‘cram’ (cards being studied in a filtered deck when they are not due). In the raw Anki database, these values are encoded as 0=learning, 1=review, 2=relearn, and 3=cram.",TRUE,TRUE
20 | cusn,usn,cards,This column (update sequence number) is used to keep track of the sync state of reviews and provides no useful information for analysis.Value of -1 indicates changes that need to be pushed to server. usn < server usn indicates changes that need to be pulled from server.,TRUE,TRUE
21 | did,did,cards,ID of the deck,TRUE,FALSE
22 | mid,mid,notes,Model ID,TRUE,FALSE
23 | ncsum,csum,notes,Field checksum used for duplicate check: Integer representation of first 8 digits of sha1 hash of the first field. See source code for details.,TRUE,FALSE
24 | ndata,data,notes,Currently unused,TRUE,FALSE
25 | nflags,flags,notes,Currently unused,TRUE,FALSE
26 | nflds,flds,notes,Fields of the card as list. In the raw anki database the fields are encoded as a string joined by ‘\x1f’ characters.,TRUE,TRUE
27 | nguid,guid,notes,"Globally Unique Id, almost certainly used for syncing",TRUE,TRUE
28 | nid,id,notes,ID of the note (this corresponds to a epoch millisecond time stamp from the creating time),TRUE,TRUE
29 | nid,nid,cards,ID of the note (this corresponds to a epoch millisecond time stamp from the creating time),TRUE,TRUE
30 | nmod,mod,notes,"Modification timestamp, epoch seconds",TRUE,TRUE
31 | nmodel,,notes,Name of the model,FALSE,TRUE
32 | nsfld,sfld,notes,Content of the sort field.,TRUE,FALSE
33 | ntags,tags,notes,Tags of the note as list. In the raw anki database tags are saved space joined string.,TRUE,TRUE
34 | nusn,usn,notes,This column (update sequence number) is used to keep track of the sync state of reviews and provides no useful information for analysis.Value of -1 indicates changes that need to be pushed to server. usn < server usn indicates changes that need to be pulled from server.,TRUE,TRUE
35 | rease,ease,revs,"Which button you pushed to score your recall: review:  1 (wrong), 2 (hard), 3 (ok), 4 (easy), learn/relearn:   1 (wrong), 2 (ok), 3 (easy)",TRUE,TRUE
36 | rfactor,factor,revs,"The new ease factor of the card in permille (parts per thousand). If the ease factor is 2500, the card’s interval will be multiplied by 2.5 the next time you press Good.",TRUE,TRUE
37 | rid,id,revs,ID of the review (this corresponds to a epoch millisecond time stamp from the creating time),TRUE,TRUE
38 | rivl,ivl,revs,"The new interval that the card was pushed to after the review. Positive values are in days, negative values are in seconds (for learning cards).",TRUE,TRUE
39 | rlastIvl,lastIvl,revs,Last interval,TRUE,TRUE
40 | rtime,time,revs,"How many milliseconds your review took, up to 60000 (60s)",TRUE,TRUE
41 | rtype,type,revs,"Values: ‘learning’, ‘review’, ‘relearn’, ‘cram’ (cards being studied in a filtered deck when they are not due). In the raw Anki database this is 0 for learning cards, 1 for review cards, 2 for relearn cards, and 3 for cram cards. ",TRUE,TRUE
42 | rusn,usn,revs,This column (update sequence number) is used to keep track of the sync state of reviews and provides no useful information for analysis.Value of -1 indicates changes that need to be pushed to server. usn < server usn indicates changes that need to be pulled from server.,TRUE,TRUE
43 | 


--------------------------------------------------------------------------------
/ankipandas/paths.py:
--------------------------------------------------------------------------------
  1 | """ Convenience functions to find the database and other system locations
  2 | without the user having to specify full paths.
  3 | """
  4 | 
  5 | from __future__ import annotations
  6 | 
  7 | import collections
  8 | import datetime
  9 | 
 10 | # std
 11 | import os
 12 | import shutil
 13 | from functools import lru_cache
 14 | from pathlib import Path, PurePath
 15 | from typing import DefaultDict
 16 | 
 17 | # ours
 18 | from ankipandas.util.log import log
 19 | 
 20 | 
 21 | @lru_cache(32)
 22 | def _find_db(
 23 |     search_path,
 24 |     maxdepth=6,
 25 |     filename="collection.anki2",
 26 |     break_on_first=False,
 27 |     user: str | None = None,
 28 | ) -> DefaultDict[str, list[Path]]:
 29 |     """
 30 |     Like find_database but only for one search path at a time. Also doesn't
 31 |     raise any error, even if the search path doesn't exist.
 32 | 
 33 |     Args:
 34 |         search_path:
 35 |         maxdepth: Maximum depth relative to search_path
 36 |         filename:
 37 |         break_on_first: Break on first search result
 38 |         user: Only search for this user
 39 | 
 40 |     Returns:
 41 |         collection.defaultdict({user: [list of results]})
 42 |     """
 43 |     search_path = Path(search_path)
 44 |     if not search_path.exists():
 45 |         log.debug("_find_db: Search path %r does not exist.", str(search_path))
 46 |         return collections.defaultdict(list)
 47 |     if search_path.is_file():
 48 |         if search_path.name == filename:
 49 |             return collections.defaultdict(
 50 |                 list, {search_path.parent.name: [search_path]}
 51 |             )
 52 |         else:
 53 |             log.warning(
 54 |                 "_find_db: Search path %r is a file, but filename does not "
 55 |                 "match that of %r.",
 56 |                 str(search_path),
 57 |                 filename,
 58 |             )
 59 |             return collections.defaultdict(list)
 60 |     found: DefaultDict[str, list[Path]] = collections.defaultdict(list)
 61 |     for root, dirs, files in os.walk(str(search_path)):
 62 |         if filename in files:
 63 |             _user = os.path.basename(root)
 64 |             if user and not _user == user:
 65 |                 continue
 66 |             found[_user].append(Path(root) / filename)
 67 |             if break_on_first:
 68 |                 log.debug("_find_db: Breaking after first hit.")
 69 |                 break
 70 |         depth = len(Path(root).relative_to(search_path).parts)
 71 |         if maxdepth and depth >= maxdepth:
 72 |             # log.debug(
 73 |             #     "_find_db: Abort search at %r. "
 74 |             #     "Max depth exceeded.",
 75 |             #     str(root)
 76 |             # )
 77 |             del dirs[:]
 78 |     return found
 79 | 
 80 | 
 81 | @lru_cache(32)
 82 | def find_db(
 83 |     search_paths=None,
 84 |     maxdepth=8,
 85 |     filename="collection.anki2",
 86 |     user=None,
 87 |     break_on_first=True,
 88 | ) -> Path:
 89 |     """
 90 |     Find path to anki2 database.
 91 | 
 92 |     Args:
 93 |         search_paths: Search path as string or pathlib object or list/iterable
 94 |             thereof. If None, some search paths are set by default.
 95 |         maxdepth: Maximal search depth.
 96 |         filename: Filename of the collection (default: ``collections.anki2``)
 97 |         user: Username to which the collection belongs. If None, search for
 98 |             databases of any user.
 99 |         break_on_first: Stop searching once a database is found. This is
100 |             obviously faster, but you will not get any errors if there are
101 |             multiple databases matching your criteria.
102 | 
103 |     Raises:
104 |         If none or more than one result is found: :class:`ValueError`
105 | 
106 |     Returns:
107 |         Path to the anki2 database
108 |     """
109 |     if not search_paths:
110 |         log.info(
111 |             "Searching for database. This might take some time. "
112 |             "You can speed this up by specifying a search path or "
113 |             "directly entering the path to your database."
114 |         )
115 |         search_paths = [
116 |             "~/.local/share/Anki2/",
117 |             "~/Documents/Anki2",
118 |             Path(os.getenv("APPDATA", "~") + "/Anki2/"),
119 |             "~/.local/share/Anki2",
120 |             Path.home(),
121 |         ]
122 |         search_paths = [Path(sp).expanduser().resolve() for sp in search_paths]
123 |     if break_on_first:
124 |         log.warning(
125 |             "The search will stop at the first hit, so please verify that "
126 |             "the result is correct (for example in case there might be more "
127 |             "than one Anki installation)"
128 |         )
129 |     if isinstance(search_paths, (str, PurePath)):
130 |         search_paths = [search_paths]
131 |     found: dict[str, list[Path]] = {}
132 |     for search_path in search_paths:
133 |         found = {
134 |             **found,
135 |             **_find_db(
136 |                 search_path,
137 |                 maxdepth=maxdepth,
138 |                 filename=filename,
139 |                 user=user,
140 |                 break_on_first=break_on_first,
141 |             ),
142 |         }
143 |         if break_on_first:
144 |             if user is not None:
145 |                 if user in found:
146 |                     break
147 |             else:
148 |                 if found:
149 |                     break
150 | 
151 |     if user:
152 |         # We were searching for a specific user
153 |         if user not in found:
154 |             raise ValueError(
155 |                 f"Could not find database belonging to user {user}"
156 |             )
157 |         else:
158 |             results_user = found[user]
159 |     else:
160 |         if len(found) >= 2:
161 |             raise ValueError(
162 |                 "Found databases for more than one user: {}. Please specify "
163 |                 "the user.".format(", ".join(found))
164 |             )
165 |         elif not found:
166 |             raise ValueError(
167 |                 "No database found. You might increase the search depth or "
168 |                 "specify search paths to find more."
169 |             )
170 |         else:
171 |             # No user specified but we found only one
172 |             results_user = found.popitem()[1]
173 | 
174 |     if len(results_user) >= 2:
175 |         raise ValueError(
176 |             "Found more than one database belonging to user {} at {}".format(
177 |                 user, ", ".join(map(str, results_user))
178 |             )
179 |         )
180 | 
181 |     assert len(results_user) == 1
182 |     final_result = results_user[0]
183 |     log.debug("Database found at %r.", final_result)
184 |     return final_result
185 | 
186 | 
187 | @lru_cache(32)
188 | def db_path_input(
189 |     path: str | PurePath | None = None, user: str | None = None
190 | ) -> Path:
191 |     """Helper function to interpret user input of path to database.
192 | 
193 |     1. If no path is given, we search through some default locations
194 |     2. If path points to a file: Take that file
195 |     3. If path points to a directory: Search in that directory
196 | 
197 |     Args:
198 |         path: Path to database or search path or None
199 |         user: User name of anki collection or None
200 | 
201 |     Returns:
202 |         Path to anki database as :class:`Path` object
203 | 
204 |     Raises:
205 |         If path does not exist: :class:`FileNotFoundError`
206 |         In various other cases: :class:`ValueError`
207 |     """
208 |     if path is None:
209 |         result = find_db(user=user)
210 |     else:
211 |         path = Path(path)
212 |         if not path.exists():
213 |             raise FileNotFoundError(
214 |                 f"db_path_input: File '{str(path)}' does not exist."
215 |             )
216 |         if path.is_file():
217 |             log.debug(
218 |                 "db_path_input: Database explicitly set to %r.", str(path)
219 |             )
220 |             result = path
221 |         else:
222 |             result = find_db(
223 |                 search_paths=(path,), user=user, break_on_first=False
224 |             )
225 |             log.info("Database found at %r.", str(result))
226 |     if result:
227 |         return result
228 |     else:
229 |         raise ValueError("Database could not be found.")
230 | 
231 | 
232 | def db_backup_file_name() -> str:
233 |     """Time based file name of the backup file."""
234 |     return "backup-ankipandas-{}.anki2".format(
235 |         datetime.datetime.now().strftime("%Y-%m-%d-%H.%M.%S.%f")
236 |     )
237 | 
238 | 
239 | def get_anki_backup_folder(path: str | PurePath, nexist="raise") -> Path:
240 |     """Return path to Anki backup folder.
241 | 
242 |     Args:
243 |         path: Path to Aki database as :class:`Path`
244 |         nexist: What to do if backup folder doesn't seem to exist: ``raise`` or
245 |             ``ignore``.
246 | 
247 |     Returns:
248 |         Path to Anki backup folder as :class:`Path`.
249 |     """
250 |     path = Path(path)
251 |     if not path.is_file():
252 |         raise FileNotFoundError(f"Database path {path} seems to be invalid.")
253 |     backup_folder = path.parent / "backups"
254 |     if nexist == "raise" and not backup_folder.is_dir():
255 |         raise ValueError(
256 |             f"Anki backup folder corresponding to database at {path} doesn't seem"
257 |             " to exist. Perhaps you can specify a custom backup "
258 |             "folder?"
259 |         )
260 |     return backup_folder
261 | 
262 | 
263 | def backup_db(
264 |     db_path: str | PurePath,
265 |     backup_folder: str | PurePath | None = None,
266 | ) -> Path:
267 |     """
268 |     Back up database file.
269 | 
270 |     Args:
271 |         db_path: Path to database
272 |         backup_folder: Path to backup folder. If None is given, the backup is
273 |             created in the Anki backup directory.
274 | 
275 |     Returns:
276 |         Path to newly created backup file as :class:`Path`.
277 |     """
278 |     db_path = Path(db_path)
279 |     if backup_folder:
280 |         backup_folder = Path(backup_folder)
281 |         if not backup_folder.is_dir():
282 |             log.debug("Creating backup directory %s.", backup_folder)
283 |             backup_folder.mkdir(parents=True)
284 |     else:
285 |         backup_folder = get_anki_backup_folder(db_path, nexist="raise")
286 |     if not db_path.is_file():
287 |         raise FileNotFoundError("Database does not seem to exist.")
288 |     backup_path = backup_folder / db_backup_file_name()
289 |     shutil.copy2(str(db_path), str(backup_path))
290 |     return backup_path
291 | 


--------------------------------------------------------------------------------
/ankipandas/raw.py:
--------------------------------------------------------------------------------
  1 | """ These function implement the more direct interactions with the Anki
  2 | database and provide basic functionality that is then used to implement the
  3 | functionality in :class:`~ankipandas.collection.Collection`,
  4 | :class:`ankipandas.ankidf.AnkiDataFrame` etc.
  5 | 
  6 | .. warning::
  7 | 
  8 |     Please only use these function if you know what you are doing, as they
  9 |     come with less consistency checks as the functionality implemented in
 10 |     :class:`~ankipandas.collection.Collection` and
 11 |     :class:`ankipandas.ankidf.AnkiDataFrame`.
 12 |     Also note that the functions here are considered to be internal, i.e. might
 13 |     change without prior notice.
 14 | """
 15 | 
 16 | from __future__ import annotations
 17 | 
 18 | import json
 19 | import pathlib
 20 | import sqlite3
 21 | 
 22 | # std
 23 | from collections import defaultdict
 24 | from functools import lru_cache
 25 | 
 26 | import numpy as np
 27 | 
 28 | # 3rd
 29 | import pandas as pd
 30 | 
 31 | from ankipandas._columns import anki_columns, tables_ours2anki
 32 | 
 33 | # ours
 34 | from ankipandas.util.log import log
 35 | from ankipandas.util.misc import defaultdict2dict, nested_dict
 36 | 
 37 | CACHE_SIZE = 32
 38 | 
 39 | 
 40 | # Open/Close db
 41 | # ==============================================================================
 42 | 
 43 | 
 44 | def load_db(path: str | pathlib.PurePath) -> sqlite3.Connection:
 45 |     """
 46 |     Load database from path.
 47 | 
 48 |     Args:
 49 |         path: String or :class:`pathlib.PurePath`.
 50 | 
 51 |     Returns:
 52 |         :class:`sqlite3.Connection`
 53 |     """
 54 |     path = pathlib.Path(path)
 55 |     if not path.is_file():
 56 |         raise FileNotFoundError(f"Not a file/file not found: {path}")
 57 |     return sqlite3.connect(str(path.resolve()))
 58 | 
 59 | 
 60 | def close_db(db: sqlite3.Connection) -> None:
 61 |     """Close the database.
 62 | 
 63 |     Args:
 64 |         db:  Database (:class:`sqlite3.Connection`)
 65 | 
 66 |     Returns:
 67 |         None
 68 |     """
 69 |     db.close()
 70 | 
 71 | 
 72 | # Basic getters
 73 | # ==============================================================================
 74 | 
 75 | 
 76 | def get_table(db: sqlite3.Connection, table: str) -> pd.DataFrame:
 77 |     """Get raw table from the Anki database.
 78 | 
 79 |     Args:
 80 |         db: Database (:class:`sqlite3.Connection`)
 81 |         table: ``cards``, ``notes`` or ``revs``
 82 | 
 83 |     Returns:
 84 |         :class:`pandas.DataFrame`
 85 |     """
 86 | 
 87 |     df = pd.read_sql_query(f"SELECT * FROM {tables_ours2anki[table]}", db)
 88 |     return df
 89 | 
 90 | 
 91 | def get_empty_table(table: str) -> pd.DataFrame:
 92 |     """Get empty table
 93 | 
 94 |     Args:
 95 |         table: ``cards``, ``notes`` or ``revs``
 96 | 
 97 |     Returns:
 98 |         :class: `pandas.DataFrame`
 99 |     """
100 |     return pd.DataFrame(columns=anki_columns[table])
101 | 
102 | 
103 | def _interpret_json_val(val):
104 |     if isinstance(val, str) and val:
105 |         try:
106 |             return json.loads(val)
107 |         except json.decoder.JSONDecodeError:
108 |             return val
109 |             # msg = (
110 |             #     "AN ERROR OCCURRED WHILE TRYING TO LOAD INFORMATION "
111 |             #     "FROM THE DATABASE. PLEASE COPY THE WHOLE INFORMATION"
112 |             #     " BELOW AND ABOVE AND OPEN A BUG REPORT ON GITHUB!\n\n"
113 |             # )
114 |             # msg += "value to be parsed: {}".format(repr(val))
115 |             # log.critical(msg)
116 |             # raise
117 |     else:
118 |         return val
119 | 
120 | 
121 | def read_info(db: sqlite3.Connection, table_name: str) -> dict:
122 |     """Get a table from the database and return nested dictionary mapping of
123 |     it.
124 | 
125 |     Args:
126 |         db:
127 |         table_name:
128 | 
129 |     Returns:
130 | 
131 |     """
132 |     version = get_db_version(db)
133 |     _df = pd.read_sql_query(f"SELECT * FROM {table_name} ", db)
134 |     if version == 0:
135 |         assert len(_df) == 1, _df
136 |         ret = nested_dict()
137 |         for col in _df.columns:
138 |             ret[col] = _interpret_json_val(_df[col][0])
139 |     elif version == 1:
140 |         ret = nested_dict()
141 |         cols = _df.columns
142 |         # todo: this is a hack, but oh well:
143 |         index_cols = 1
144 |         if len(_df[cols[0]].unique()) != len(_df):
145 |             index_cols = 2
146 |         for row in _df.iterrows():
147 |             row = row[1].to_list()
148 |             if index_cols == 1:
149 |                 for icol in range(1, len(cols)):
150 |                     ret[row[0]][cols[icol]] = _interpret_json_val(row[icol])
151 |             elif index_cols == 2:
152 |                 for icol in range(2, len(cols)):
153 |                     ret[row[0]][row[1]][cols[icol]] = _interpret_json_val(
154 |                         row[icol]
155 |                     )
156 |             else:
157 |                 raise ValueError
158 |     else:
159 |         raise NotImplementedError
160 |     return defaultdict2dict(ret)
161 | 
162 | 
163 | @lru_cache(CACHE_SIZE)
164 | def get_info(db: sqlite3.Connection) -> dict:
165 |     """
166 |     Get all other information from the database, e.g. information about models,
167 |     decks etc.
168 | 
169 |     Args:
170 |         db: Database (:class:`sqlite3.Connection`)
171 | 
172 |     Returns:
173 |         Nested dictionary.
174 |     """
175 |     return read_info(db, "col")
176 | 
177 | 
178 | @lru_cache(CACHE_SIZE)
179 | def get_db_version(db: sqlite3.Connection) -> int:
180 |     """Get "version" of database structure
181 | 
182 |     Args:
183 |         db:
184 | 
185 |     Returns: 0: all info (also for decks and models) was in the 'col' table
186 |         1: separate 'notetypes' and 'decks' tables
187 | 
188 |     """
189 |     _tables = (
190 |         db.cursor()
191 |         .execute(
192 |             "SELECT name FROM sqlite_master "
193 |             "WHERE type ='table' AND name NOT LIKE 'sqlite_%';"
194 |         )
195 |         .fetchall()
196 |     )
197 |     tables = [ret[0] for ret in _tables]
198 |     if "decks" in tables:
199 |         return 1
200 |     else:
201 |         return 0
202 | 
203 | 
204 | # Basic Setters
205 | # ==============================================================================
206 | 
207 | 
208 | def _consolidate_tables(
209 |     df: pd.DataFrame, df_old: pd.DataFrame, mode: str, id_column="id"
210 | ):
211 |     if not list(df.columns) == list(df_old.columns):
212 |         raise ValueError(
213 |             "Columns do not match: Old: {}, New: {}".format(
214 |                 ", ".join(df_old.columns), ", ".join(df.columns)
215 |             )
216 |         )
217 | 
218 |     old_indices = set(df_old[id_column])
219 |     new_indices = set(df[id_column])
220 | 
221 |     # Get indices
222 |     # -----------
223 | 
224 |     if mode == "update":
225 |         indices = set(old_indices)
226 |     elif mode == "append":
227 |         indices = set(new_indices) - set(old_indices)
228 |         if not indices:
229 |             log.warning(
230 |                 "Was told to append to table, but there do not seem to be any"
231 |                 " new entries. "
232 |             )
233 |     elif mode == "replace":
234 |         indices = set(new_indices)
235 |     else:
236 |         raise ValueError(f"Unknown mode '{mode}'.")
237 | 
238 |     df = df[df[id_column].isin(indices)]
239 | 
240 |     # Apply
241 |     # -----
242 | 
243 |     if mode == "update":
244 |         df_new = df_old.copy()
245 |         df_new.update(df)
246 |     elif mode == "append":
247 |         df_new = pd.concat([df_old, df], verify_integrity=True)
248 |     elif mode == "replace":
249 |         df_new = df.copy()
250 |     else:
251 |         raise ValueError(f"Unknown mode '{mode}'.")
252 | 
253 |     return df_new
254 | 
255 | 
256 | # fixme: update mode also can delete things if we do not have all rows
257 | def set_table(
258 |     db: sqlite3.Connection,
259 |     df: pd.DataFrame,
260 |     table: str,
261 |     mode: str,
262 |     id_column="id",
263 | ) -> None:
264 |     """
265 |     Write table back to database.
266 | 
267 |     Args:
268 |         db: Database (:class:`sqlite3.Connection`)
269 |         df: The :class:`pandas.DataFrame` to write
270 |         table: Table to write to: 'notes', 'cards', 'revs'
271 |         mode: 'update': Update all existing entries, 'append': Only append new
272 |             entries, but do not modify, 'replace': Append, modify and delete
273 |         id_column: Column with ID
274 |     Returns:
275 |         None
276 |     """
277 |     df_old = get_table(db, table)
278 |     df_new = _consolidate_tables(
279 |         df=df, df_old=df_old, mode=mode, id_column=id_column
280 |     )
281 |     df_new.to_sql(tables_ours2anki[table], db, if_exists="replace", index=False)
282 | 
283 | 
284 | class NumpyJSONEncoder(json.JSONEncoder):
285 |     """JSON Encoder that support numpy datatypes by converting them to
286 |     built in datatypes."""
287 | 
288 |     def default(self, obj):
289 |         if isinstance(obj, np.integer):
290 |             return int(obj)
291 |         elif isinstance(obj, np.floating):
292 |             return float(obj)
293 |         elif isinstance(obj, np.ndarray):
294 |             return obj.tolist()
295 |         else:
296 |             return super().default(obj)
297 | 
298 | 
299 | def set_info(db: sqlite3.Connection, info: dict) -> None:
300 |     """Write back extra info to database
301 | 
302 |     Args:
303 |         db: Database (:class:`sqlite3.Connection`)
304 |         info: Output of :func:`get_info`
305 | 
306 |     Returns:
307 |         None
308 |     """
309 | 
310 |     def encode_value(value):
311 |         if isinstance(value, (float, np.floating)):
312 |             return value
313 |         elif isinstance(value, (int, np.integer)):
314 |             return value
315 |         elif isinstance(value, str):
316 |             return value
317 |         else:
318 |             return json.dumps(value, cls=NumpyJSONEncoder)
319 | 
320 |     info_json_strings = {
321 |         key: encode_value(value) for key, value in info.items()
322 |     }
323 |     df = pd.DataFrame(info_json_strings, index=[0])
324 |     df.to_sql("col", db, if_exists="replace", index=False)
325 | 
326 | 
327 | def update_note_indices(db: sqlite3.Connection) -> None:
328 |     """Update search indices for 'notes' table. This does not modify any information
329 |     in the table itself.
330 |     See https://github.com/klieret/AnkiPandas/issues/124 for more informationl
331 |     """
332 |     cur = db.cursor()
333 |     cur.execute("CREATE INDEX IF NOT EXISTS idx_notes_mid ON notes (mid)")
334 |     cur.execute("CREATE INDEX IF NOT EXISTS ix_notes_csum on notes (csum)")
335 |     cur.execute("CREATE INDEX IF NOT EXISTS ix_notes_usn on notes (usn)")
336 | 
337 | 
338 | def update_card_indices(db: sqlite3.Connection) -> None:
339 |     """Update search indices for 'cards' table. This does not modify any information
340 |     in the table itself.
341 |     See https://github.com/klieret/AnkiPandas/issues/124 for more informationl
342 |     """
343 |     cur = db.cursor()
344 |     cur.execute(
345 |         "CREATE INDEX IF NOT EXISTS idx_cards_odid ON cards (odid) WHERE odid != 0"
346 |     )
347 |     cur.execute("CREATE INDEX IF NOT EXISTS ix_cards_nid on cards (nid)")
348 |     cur.execute(
349 |         "CREATE INDEX IF NOT EXISTS ix_cards_sched on cards (did, queue, due)"
350 |     )
351 |     cur.execute("CREATE INDEX IF NOT EXISTS ix_cards_usn on cards (usn)")
352 | 
353 | 
354 | # Trivially derived getters
355 | # ==============================================================================
356 | 
357 | # todo: Using decorators here causes the function signatures to be messed up
358 | #  with sphinx but oh well.
359 | 
360 | 
361 | @lru_cache(CACHE_SIZE)
362 | def get_ids(db: sqlite3.Connection, table: str) -> list[int]:
363 |     """Get list of IDs, e.g. note IDs etc.
364 | 
365 |     Args:
366 |         db: Database (:class:`sqlite3.Connection`)
367 |         table: 'revs', 'cards', 'notes'
368 | 
369 |     Returns:
370 |         Nested dictionary
371 |     """
372 |     return get_table(db, table)["id"].astype(int).tolist()
373 | 
374 | 
375 | @lru_cache(CACHE_SIZE)
376 | def get_deck_info(db: sqlite3.Connection) -> dict:
377 |     """Get information about decks.
378 | 
379 |     Args:
380 |         db: Database (:class:`sqlite3.Connection`)
381 | 
382 |     Returns:
383 |         Nested dictionary
384 |     """
385 |     if get_db_version(db) == 0:
386 |         _dinfo = get_info(db)["decks"]
387 |     elif get_db_version(db) == 1:
388 |         _dinfo = read_info(db, "decks")
389 |     else:
390 |         raise NotImplementedError
391 | 
392 |     if not _dinfo:
393 |         return {}
394 |     else:
395 |         return _dinfo
396 | 
397 | 
398 | @lru_cache(CACHE_SIZE)
399 | def get_did2deck(db: sqlite3.Connection) -> dict[int, str]:
400 |     """Mapping of deck IDs (did) to deck names.
401 | 
402 |     Args:
403 |         db: Database (:class:`sqlite3.Connection`)
404 | 
405 |     Returns:
406 |         Dictionary mapping
407 |     """
408 |     dinfo = get_deck_info(db)
409 |     _did2dec = {int(did): dinfo[did]["name"] for did in dinfo}
410 |     return defaultdict(str, _did2dec)
411 | 
412 | 
413 | @lru_cache(CACHE_SIZE)
414 | def get_deck2did(db: sqlite3.Connection) -> dict[str, int]:
415 |     """Mapping of deck names to deck IDs
416 | 
417 |     Args:
418 |         db: Database (:class:`sqlite3.Connection`)
419 | 
420 |     Returns:
421 |         Dictionary mapping of deck id to deck name
422 |     """
423 |     dinfo = get_deck_info(db)
424 |     _did2dec = {dinfo[did]["name"]: int(did) for did in dinfo}
425 |     return defaultdict(int, _did2dec)
426 | 
427 | 
428 | @lru_cache(CACHE_SIZE)
429 | def get_model_info(db: sqlite3.Connection) -> dict:
430 |     """Get information about models.
431 | 
432 |     Args:
433 |         db: Database (:class:`sqlite3.Connection`)
434 | 
435 |     Returns:
436 |         Nested dictionary
437 |     """
438 |     if get_db_version(db) == 0:
439 |         _dinfo = get_info(db)["models"]
440 |     elif get_db_version(db) == 1:
441 |         _dinfo = read_info(db, "notetypes")
442 |     else:
443 |         raise NotImplementedError
444 |     if not _dinfo:
445 |         return {}
446 |     else:
447 |         return {int(key): value for key, value in _dinfo.items()}
448 | 
449 | 
450 | @lru_cache(CACHE_SIZE)
451 | def get_mid2model(db: sqlite3.Connection) -> dict[int, str]:
452 |     """Mapping of model IDs (mid) to model names.
453 | 
454 |     Args:
455 |         db: Database (:class:`sqlite3.Connection`)
456 | 
457 |     Returns:
458 |         Dictionary mapping
459 |     """
460 |     minfo = get_model_info(db)
461 |     _mid2model = {int(mid): minfo[mid]["name"] for mid in minfo}
462 |     return defaultdict(str, _mid2model)
463 | 
464 | 
465 | @lru_cache(CACHE_SIZE)
466 | def get_model2mid(db: sqlite3.Connection) -> dict[str, int]:
467 |     """Mapping of model name to model ID (mid)
468 | 
469 |     Args:
470 |         db: Database (:class:`sqlite3.Connection`)
471 | 
472 |     Returns:
473 |         Dictionary mapping
474 |     """
475 |     minfo = get_model_info(db)
476 |     _mid2model = {minfo[mid]["name"]: int(mid) for mid in minfo}
477 |     return defaultdict(int, _mid2model)
478 | 
479 | 
480 | @lru_cache(CACHE_SIZE)
481 | def get_mid2sortfield(db: sqlite3.Connection) -> dict[int, int]:
482 |     """Mapping of model ID to index of sort field."""
483 |     if get_db_version(db) == 0:
484 |         minfo = get_model_info(db)
485 |         _mid2sortfield = {mid: minfo[mid]["sortf"] for mid in minfo}
486 |         return defaultdict(int, _mid2sortfield)
487 |     else:
488 |         # fixme: Don't know how to retrieve sort field yet
489 |         minfo = get_model_info(db)
490 |         return {mid: 0 for mid in minfo}
491 | 
492 | 
493 | @lru_cache(CACHE_SIZE)
494 | def get_mid2fields(db: sqlite3.Connection) -> dict[int, list[str]]:
495 |     """Get mapping of model ID to field names.
496 | 
497 |     Args:
498 |         db: Database (:class:`sqlite3.Connection`)
499 | 
500 |     Returns:
501 |         Dictionary mapping of model ID (mid) to list of field names.
502 |     """
503 |     if get_db_version(db) == 0:
504 |         minfo = get_model_info(db)
505 |         return {
506 |             int(mid): [flds["name"] for flds in minfo[mid]["flds"]]
507 |             for mid in minfo
508 |         }
509 |     elif get_db_version(db) == 1:
510 |         finfo = read_info(db, "fields")
511 |         mid2fields = {
512 |             int(mid): [finfo[mid][ord]["name"] for ord in finfo[mid]]
513 |             for mid in finfo
514 |         }
515 |         return mid2fields
516 |     else:
517 |         raise NotImplementedError
518 | 
519 | 
520 | @lru_cache(CACHE_SIZE)
521 | def get_mid2templateords(db: sqlite3.Connection) -> dict[int, list[int]]:
522 |     """Get mapping of model ID to available templates ids
523 | 
524 |     Args:
525 |         db:
526 | 
527 |     Returns:
528 | 
529 |     """
530 |     if get_db_version(db) == 0:
531 |         minfo = get_model_info(db)
532 |         return {mid: [x["ord"] for x in minfo[mid]["tmpls"]] for mid in minfo}
533 |     elif get_db_version(db) == 1:
534 |         tinfo = read_info(db, "templates")
535 |         return {int(mid): [int(x) for x in tinfo[mid]] for mid in tinfo}
536 |     else:
537 |         raise NotImplementedError
538 | 
539 | 
540 | @lru_cache(CACHE_SIZE)
541 | def get_cid2nid(db: sqlite3.Connection) -> dict[int, int]:
542 |     """Mapping card ID to note ID.
543 | 
544 |     Args:
545 |         db:  Database (:class:`sqlite3.Connection`)
546 | 
547 |     Returns:
548 |         Dictionary
549 |     """
550 |     cards = get_table(db, "cards")
551 |     _cid2nid = dict(zip(cards["id"], cards["nid"]))
552 |     return defaultdict(int, _cid2nid)
553 | 
554 | 
555 | @lru_cache(CACHE_SIZE)
556 | def get_cid2did(db: sqlite3.Connection) -> dict[int, int]:
557 |     """Mapping card ID to deck ID.
558 | 
559 |     Args:
560 |         db:  Database (:class:`sqlite3.Connection`)
561 | 
562 |     Returns:
563 |         Dictionary
564 |     """
565 |     cards = get_table(db, "cards")
566 |     _cid2did = dict(zip(cards["id"], cards["did"]))
567 |     return defaultdict(int, _cid2did)
568 | 
569 | 
570 | @lru_cache(CACHE_SIZE)
571 | def get_nid2mid(db: sqlite3.Connection) -> dict[int, int]:
572 |     """Mapping note ID to model ID.
573 | 
574 |     Args:
575 |         db:  Database (:class:`sqlite3.Connection`)
576 | 
577 |     Returns:
578 |         Dictionary
579 |     """
580 |     notes = get_table(db, "notes")
581 |     _nid2mid = dict(zip(notes["id"], notes["mid"]))
582 |     return defaultdict(int, _nid2mid)
583 | 


--------------------------------------------------------------------------------
/ankipandas/test/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/klieret/AnkiPandas/0b17a1870711d4adc3e9fc82bff8aac986b09f5e/ankipandas/test/__init__.py


--------------------------------------------------------------------------------
/ankipandas/test/data/few_basic_cards/collection.anki2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/klieret/AnkiPandas/0b17a1870711d4adc3e9fc82bff8aac986b09f5e/ankipandas/test/data/few_basic_cards/collection.anki2


--------------------------------------------------------------------------------
/ankipandas/test/data/few_basic_cards/collection_v1.anki2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/klieret/AnkiPandas/0b17a1870711d4adc3e9fc82bff8aac986b09f5e/ankipandas/test/data/few_basic_cards/collection_v1.anki2


--------------------------------------------------------------------------------
/ankipandas/test/test_ankidf.py:
--------------------------------------------------------------------------------
   1 | """ Most of the functionality of the AnkiDataFrames is already tested in
   2 | test_core, because that saves to write a lot of duplicate code.
   3 | 
   4 | Everything else is tested here.
   5 | """
   6 | 
   7 | from __future__ import annotations
   8 | 
   9 | import copy
  10 | 
  11 | # std
  12 | import pathlib
  13 | import unittest
  14 | 
  15 | # 3rd
  16 | import numpy as np
  17 | 
  18 | import ankipandas._columns as _columns
  19 | import ankipandas.raw as raw
  20 | from ankipandas._columns import our_columns
  21 | 
  22 | # ours
  23 | from ankipandas.ankidf import AnkiDataFrame as AnkiDF
  24 | from ankipandas.collection import Collection
  25 | from ankipandas.util.log import set_debug_log_level
  26 | 
  27 | 
  28 | class TestAnkiDF(unittest.TestCase):
  29 |     db_path = (
  30 |         pathlib.Path(__file__).parent
  31 |         / "data"
  32 |         / "few_basic_cards"
  33 |         / "collection.anki2"
  34 |     )
  35 | 
  36 |     def setUp(self):
  37 |         set_debug_log_level()
  38 |         self.db = raw.load_db(self.db_path)
  39 | 
  40 |         self.col = Collection(self.db_path)
  41 | 
  42 |         # Do not modify this one!
  43 |         self.notes = self.col.notes
  44 |         self.cards = self.col.cards
  45 |         self.revs = self.col.revs
  46 |         self.table2adf = {
  47 |             "notes": self.notes,
  48 |             "cards": self.cards,
  49 |             "revs": self.revs,
  50 |         }
  51 |         self.adfs = [self.notes, self.cards, self.revs]
  52 | 
  53 |         self.empty_notes = self.col.empty_notes()
  54 |         self.empty_cards = self.col.empty_cards()
  55 |         self.empty_revs = self.col.empty_revs()
  56 | 
  57 |     def nnotes(self):
  58 |         return self.notes.copy()
  59 | 
  60 |     def ncards(self):
  61 |         return self.cards.copy()
  62 | 
  63 |     def nrevs(self):
  64 |         return self.revs.copy()
  65 | 
  66 |     def nenotes(self):
  67 |         return self.empty_notes.copy()
  68 | 
  69 |     def necards(self):
  70 |         return self.empty_cards.copy()
  71 | 
  72 |     def nerevs(self):
  73 |         return self.empty_revs.copy()
  74 | 
  75 |     def ntable(self, table):
  76 |         if table == "notes":
  77 |             return self.nnotes()
  78 |         elif table == "cards":
  79 |             return self.ncards()
  80 |         elif table == "revs":
  81 |             return self.nrevs()
  82 |         else:
  83 |             raise ValueError("Unknown table")
  84 | 
  85 |     def ntable2adf(self):
  86 |         return {
  87 |             "notes": self.nnotes(),
  88 |             "cards": self.ncards(),
  89 |             "revs": self.nrevs(),
  90 |         }
  91 | 
  92 |     def nadfs(self):
  93 |         return [self.nnotes(), self.ncards(), self.nrevs()]
  94 | 
  95 |     # Test constructors
  96 |     # ==========================================================================
  97 | 
  98 |     def test_empty(self):
  99 |         eadfs = {
 100 |             "notes": self.col.empty_notes(),
 101 |             "cards": self.col.empty_cards(),
 102 |             "revs": self.col.empty_revs(),
 103 |         }
 104 |         for table, eadf in eadfs.items():
 105 |             self.assertEqual(len(eadf), 0)
 106 |             adf = self.table2adf[table]
 107 |             self.assertListEqual(sorted(adf.columns), sorted(eadf.columns))
 108 | 
 109 |     def test_tags(self):
 110 |         self.assertListEqual(
 111 |             list(self.notes.query("index==1555579337683")["ntags"].values)[0],
 112 |             ["other_test_tag"],
 113 |         )
 114 |         self.assertListEqual(
 115 |             list(self.notes.query("index==1555579352896")["ntags"].values)[0],
 116 |             ["some_test_tag"],
 117 |         )
 118 | 
 119 |     def test_cards(self):
 120 |         cards = self.cards
 121 |         self.assertGreater(len(cards), 11)
 122 |         self.assertEqual(sorted(cards.columns), sorted(our_columns["cards"]))
 123 | 
 124 |     def test_notes(self):
 125 |         notes = self.notes
 126 |         self.assertGreater(len(notes), 6)
 127 |         self.assertEqual(sorted(notes.columns), sorted(our_columns["notes"]))
 128 | 
 129 |     def test_get_revs(self):
 130 |         revs = self.revs
 131 |         self.assertEqual(sorted(revs.columns), sorted(our_columns["revs"]))
 132 |         self.assertGreater(len(revs), 4)
 133 | 
 134 |     # Test merging
 135 |     # ==========================================================================
 136 | 
 137 |     def test_merge_notes_cards(self):
 138 |         merged = self.ncards().merge_notes()
 139 |         self.assertListEqual(
 140 |             sorted(merged.columns),
 141 |             sorted(set(our_columns["cards"]) | set(our_columns["notes"])),
 142 |         )
 143 | 
 144 |     def test_merge_notes_revs(self):
 145 |         merged = self.nrevs().merge_notes()
 146 |         self.assertListEqual(
 147 |             sorted(merged.columns),
 148 |             sorted(
 149 |                 # Note: 'nid' is not a notes column.
 150 |                 set(our_columns["revs"])
 151 |                 | set(our_columns["notes"])
 152 |                 | {"nid"}
 153 |             ),
 154 |         )
 155 | 
 156 |     def test_merge_notes_raises(self):
 157 |         with self.assertRaises(ValueError):
 158 |             self.nnotes().merge_notes()
 159 | 
 160 |     def test_merge_cards(self):
 161 |         merged = self.nrevs().merge_cards()
 162 |         self.assertListEqual(
 163 |             sorted(merged.columns),
 164 |             sorted(set(our_columns["revs"]) | set(our_columns["cards"])),
 165 |         )
 166 | 
 167 |     def test_merge_cards_raises(self):
 168 |         with self.assertRaises(ValueError):
 169 |             self.ncards().merge_cards()
 170 |         with self.assertRaises(ValueError):
 171 |             self.nnotes().merge_cards()
 172 | 
 173 |     # Test properties
 174 |     # ==========================================================================
 175 | 
 176 |     # Trivial cases
 177 |     # --------------------------------------------------------------------------
 178 | 
 179 |     def test_nids_notes(self):
 180 |         self.assertListEqual(list(self.notes.index), list(self.notes.nid))
 181 |         self.assertListEqual(
 182 |             list(self.notes.index),
 183 |             list(raw.get_table(self.db, "notes")["id"].unique()),
 184 |         )
 185 |         self.assertEqual(len(self.notes.nid.unique()), len(self.notes.nid))
 186 | 
 187 |     def test_cids_cards(self):
 188 |         self.assertListEqual(list(self.cards.index), list(self.cards.cid))
 189 |         self.assertListEqual(
 190 |             list(self.cards.index),
 191 |             list(raw.get_table(self.db, "cards")["id"].unique()),
 192 |         )
 193 |         self.assertEqual(len(self.cards.cid.unique()), len(self.cards.cid))
 194 | 
 195 |     def test_rids_revs(self):
 196 |         self.assertListEqual(list(self.revs.index), list(self.revs.rid))
 197 |         self.assertListEqual(
 198 |             list(self.revs.index),
 199 |             list(raw.get_table(self.db, "revs")["id"].unique()),
 200 |         )
 201 |         self.assertEqual(len(self.revs.rid.unique()), len(self.revs.rid))
 202 | 
 203 |     # Slightly more elaborate cases
 204 |     # --------------------------------------------------------------------------
 205 | 
 206 |     def test_nids_cards(self):
 207 |         self.assertListEqual(
 208 |             sorted(list(self.cards.nid.unique())),
 209 |             sorted(list(self.notes.nid.unique())),
 210 |         )
 211 | 
 212 |     def test_nids_revs(self):
 213 |         self.assertTrue(
 214 |             set(self.notes.nid.unique()).issuperset(set(self.revs.nid.unique()))
 215 |         )
 216 | 
 217 |     def test_nids_nexist(self):
 218 |         nexist = AnkiDF()
 219 |         with self.assertRaises(ValueError):
 220 |             # noinspection PyStatementEffect
 221 |             nexist.nid
 222 | 
 223 |     def test_cids_revs(self):
 224 |         self.assertTrue(
 225 |             set(self.revs.cid.unique()).issubset(set(self.cards.cid.unique()))
 226 |         )
 227 | 
 228 |     def test_cids_notes(self):
 229 |         with self.assertRaises(ValueError):
 230 |             # noinspection PyStatementEffect
 231 |             self.notes.cid
 232 | 
 233 |     # --------------------------------------------------------------------------
 234 | 
 235 |     def test_mids(self):
 236 |         mids2s = {
 237 |             "notes": set(self.notes.mid),
 238 |             "cards": set(self.cards.mid),
 239 |             "revs": set(self.revs.mid),
 240 |         }
 241 |         mids = set(raw.get_mid2model(self.db))
 242 |         for table, mids2 in mids2s.items():
 243 |             with self.subTest(table=table):
 244 |                 self.assertTrue(mids2.issubset(mids))
 245 | 
 246 |     def test_dids(self):
 247 |         did2s = {"cards": set(self.cards.did), "revs": set(self.revs.did)}
 248 |         dids = set(raw.get_did2deck(self.db))
 249 |         for table, dids2 in did2s.items():
 250 |             with self.subTest(table=table):
 251 |                 self.assertTrue(dids2.issubset(dids))
 252 | 
 253 |     # ==========================================================================
 254 | 
 255 |     def test_fields_as_columns(self):
 256 |         notes = self.nnotes()
 257 |         notes = notes.fields_as_columns()
 258 |         cols = our_columns["notes"].copy()
 259 |         cols.remove("nflds")
 260 |         prefix = notes.fields_as_columns_prefix
 261 |         new_cols = [prefix + item for item in ["Front", "Back"]]
 262 |         self.assertEqual(sorted(notes.columns), sorted(cols + new_cols))
 263 | 
 264 |     def test_fields_as_columns_x2(self):
 265 |         notes = self.nnotes()
 266 |         notes = notes.fields_as_columns()
 267 |         notes2 = notes.fields_as_columns()
 268 |         self.assertTrue(notes.equals(notes2))
 269 | 
 270 |     def test_fields_as_list(self):
 271 |         # Add fields as column, remove original 'flds' column, then
 272 |         # add it back from the field columns and see if things still check
 273 |         # out
 274 |         notes = self.nnotes()
 275 |         flds = copy.copy(notes["nflds"].values)
 276 |         notes = notes.fields_as_columns().fields_as_list()
 277 |         self.assertEqual(list(flds), list(notes["nflds"].values))
 278 |         self.assertListEqual(
 279 |             sorted(notes.columns), sorted(our_columns["notes"])
 280 |         )
 281 | 
 282 |     def test_fields_as_list_x2(self):
 283 |         notes = self.nnotes()
 284 |         notes2 = notes.fields_as_list()
 285 |         self.assertTrue(notes.equals(notes2))
 286 | 
 287 |     # Convenience
 288 |     # ==========================================================================
 289 | 
 290 |     def test_list_decks(self):
 291 |         decks = self.cards.list_decks()
 292 |         self.assertTrue(set(decks).issuperset({"Testing", "EnglishGerman"}))
 293 | 
 294 |     def test_list_models(self):
 295 |         models = self.notes.list_models()
 296 |         self.assertTrue(
 297 |             set(models).issuperset({"Basic", "Basic (and reversed card)"})
 298 |         )
 299 | 
 300 |     # Properties
 301 |     # ==========================================================================
 302 | 
 303 |     def test_prop_nid(self):
 304 |         notes, cards, revs = self.nadfs()
 305 |         with self.assertRaises(ValueError):
 306 |             notes.nid = ""
 307 |         cards.nid = "a"
 308 |         revs.nid = "a"
 309 |         # noinspection PyUnresolvedReferences
 310 |         self.assertEqual(cards.nid.unique().tolist(), ["a"])
 311 |         # noinspection PyUnresolvedReferences
 312 |         self.assertEqual(revs.nid.unique().tolist(), ["a"])
 313 | 
 314 |     def test_prop_cid(self):
 315 |         notes, cards, revs = self.nadfs()
 316 |         with self.assertRaises(ValueError):
 317 |             cards.cid = ""
 318 |         with self.assertRaises(ValueError):
 319 |             notes.cid = ""
 320 |         revs.cid = "a"
 321 |         self.assertEqual(revs["cid"].unique().tolist(), ["a"])
 322 | 
 323 |     def test_prop_rid(self):
 324 |         notes, cards, revs = self.nadfs()
 325 |         with self.assertRaises(ValueError):
 326 |             revs.rid = ""
 327 |         with self.assertRaises(ValueError):
 328 |             cards.rid = ""
 329 |         with self.assertRaises(ValueError):
 330 |             notes.rid = ""
 331 | 
 332 |     # Tags
 333 |     # ==========================================================================
 334 | 
 335 |     def test_list_tags(self):
 336 |         tags = self.notes.list_tags()
 337 |         self.assertTrue(set(tags).issuperset(["adjective", "noun"]))
 338 | 
 339 |     def test_remove_tags(self):
 340 |         notes = self.nnotes()
 341 |         notes = notes.remove_tag(None)
 342 |         self.assertEqual(list({tuple(x) for x in notes["ntags"]}), [()])
 343 | 
 344 |     def test_add_tags(self):
 345 |         notes = self.nnotes().remove_tag(None).add_tag("1145")
 346 |         self.assertListEqual(
 347 |             list({tuple(x) for x in notes["ntags"]}), [("1145",)]
 348 |         )
 349 |         notes.add_tag("abc", inplace=True)
 350 |         self.assertListEqual(
 351 |             list({tuple(x) for x in notes["ntags"]}), [("1145", "abc")]
 352 |         )
 353 |         notes.add_tag(["abc", "def"], inplace=True)
 354 |         self.assertListEqual(
 355 |             list({tuple(x) for x in notes["ntags"]}), [("1145", "abc", "def")]
 356 |         )
 357 |         notes.add_tag([], inplace=True)
 358 |         self.assertListEqual(
 359 |             list({tuple(x) for x in notes["ntags"]}), [("1145", "abc", "def")]
 360 |         )
 361 | 
 362 |     def test_has_tag(self):
 363 |         notes = self.nnotes().remove_tag(None).add_tag("1145")
 364 |         self.assertListEqual(list(notes.has_tag("1145").unique()), [True])
 365 |         self.assertListEqual(list(notes.has_tag("asdf").unique()), [False])
 366 |         self.assertListEqual(list(notes.has_tag().unique()), [True])
 367 |         self.assertListEqual(
 368 |             list(notes.has_tag(["asdf", "1145"]).unique()), [True]
 369 |         )
 370 | 
 371 |     def test_has_tag_natural(self):
 372 |         notes = self.notes
 373 |         self.assertListEqual(
 374 |             sorted(list(notes.has_tag(["some_test_tag"]).unique())),
 375 |             [False, True],
 376 |         )
 377 | 
 378 |     def test_has_tags(self):
 379 |         notes = self.nnotes().remove_tag(None).add_tag("1145")
 380 |         self.assertListEqual(list(notes.has_tags("1145").unique()), [True])
 381 |         self.assertListEqual(list(notes.has_tags("asdf").unique()), [False])
 382 |         self.assertListEqual(list(notes.has_tags().unique()), [True])
 383 |         self.assertListEqual(
 384 |             list(notes.has_tags(["asdf", "1145"]).unique()), [False]
 385 |         )
 386 |         notes = notes.add_tag("asdf")
 387 |         self.assertListEqual(
 388 |             list(notes.has_tags(["asdf", "1145"]).unique()), [True]
 389 |         )
 390 | 
 391 |     def test_remove_tag(self):
 392 |         notes = self.nnotes().add_tag(["1145", "asdf"])
 393 |         notes.remove_tag("1145", inplace=True)
 394 |         self.assertListEqual(list(notes.has_tag(["1145"]).unique()), [False])
 395 |         self.assertListEqual(list(notes.has_tag(["asdf"]).unique()), [True])
 396 | 
 397 |     # Changes
 398 |     # ==========================================================================
 399 | 
 400 |     def test_show_modification_unchanged(self):
 401 |         for table in ["cards", "revs", "notes"]:
 402 |             with self.subTest(table=table):
 403 |                 adf = self.table2adf[table]
 404 |                 self.assertEqual(np.sum(~adf.was_modified()), len(adf))
 405 |                 self.assertEqual(np.sum(~adf.was_added()), len(adf))
 406 |                 self.assertEqual(len(adf.was_deleted()), 0)
 407 |                 self.assertEqual(np.sum(~adf.was_modified(adf)), len(adf))
 408 |                 self.assertEqual(np.sum(~adf.was_added(adf)), len(adf))
 409 |                 self.assertEqual(len(adf.was_deleted(adf)), 0)
 410 | 
 411 |     def test_show_modification_empty(self):
 412 |         for table in ["cards", "revs", "notes", "notes_cols"]:
 413 |             with self.subTest(table=table):
 414 |                 if table == "notes_cols":
 415 |                     adf = self.ntable("notes")
 416 |                 else:
 417 |                     adf = self.ntable(table)
 418 |                 adf_old = adf.copy()
 419 |                 if table == "notes_cols":
 420 |                     adf.fields_as_columns(inplace=True)
 421 |                 adf["new_col"] = "blargh"
 422 |                 n = len(adf)
 423 |                 adf = adf.drop(adf.index)
 424 |                 self.assertEqual(np.sum(~adf.was_modified()), 0)
 425 |                 self.assertEqual(np.sum(~adf.was_added()), 0)
 426 |                 self.assertEqual(len(adf.was_deleted()), n)
 427 |                 self.assertEqual(np.sum(~adf.was_modified(adf_old)), 0)
 428 |                 self.assertEqual(np.sum(~adf.was_added(adf_old)), 0)
 429 |                 self.assertEqual(len(adf.was_deleted(adf_old)), n)
 430 | 
 431 |     def test_show_modification_all_modified(self):
 432 |         for table in ["cards", "revs", "notes", "notes_cols"]:
 433 |             with self.subTest(table=table):
 434 |                 if table == "notes_cols":
 435 |                     adf = self.ntable("notes")
 436 |                 else:
 437 |                     adf = self.ntable(table)
 438 |                 adf_old = adf.copy()
 439 |                 if table == "notes_cols":
 440 |                     adf.fields_as_columns(inplace=True)
 441 |                 adf[adf.columns[2]] = "changed!"
 442 |                 self.assertEqual(np.sum(~adf.was_modified()), 0)
 443 |                 self.assertEqual(np.sum(adf.was_added()), 0)
 444 |                 self.assertEqual(len(adf.was_deleted()), 0)
 445 |                 # ----
 446 |                 self.assertEqual(len(adf.modified_columns(only=True)), len(adf))
 447 |                 self.assertEqual(
 448 |                     len(adf.modified_columns(only=False)), len(adf)
 449 |                 )
 450 |                 self.assertEqual(
 451 |                     list(adf.modified_columns().loc[adf.index[0]]).index(True),
 452 |                     2,
 453 |                 )
 454 |                 # ----
 455 |                 self.assertEqual(np.sum(~adf.was_modified(adf_old)), 0)
 456 |                 self.assertEqual(np.sum(adf.was_added(adf_old)), 0)
 457 |                 self.assertEqual(len(adf.was_deleted(adf_old)), 0)
 458 |                 # ----
 459 |                 self.assertEqual(
 460 |                     len(adf.modified_columns(only=True, other=adf_old)),
 461 |                     len(adf),
 462 |                 )
 463 |                 self.assertEqual(
 464 |                     len(adf.modified_columns(only=False, other=adf_old)),
 465 |                     len(adf),
 466 |                 )
 467 |                 self.assertEqual(
 468 |                     list(
 469 |                         adf.modified_columns(other=adf_old).loc[adf.index[0]]
 470 |                     ).index(True),
 471 |                     2,
 472 |                 )
 473 | 
 474 |     def test_show_modification_some_modified(self):
 475 |         for table in ["cards", "revs", "notes", "notes_cols"]:
 476 |             with self.subTest(table=table):
 477 |                 if table == "notes_cols":
 478 |                     adf = self.ntable("notes")
 479 |                 else:
 480 |                     adf = self.ntable(table)
 481 |                 adf_old = adf.copy()
 482 |                 if table == "notes_cols":
 483 |                     adf.fields_as_columns(inplace=True)
 484 |                 adf.loc[adf.index[0], [adf.columns[2]]] = "changed!"
 485 |                 self.assertEqual(np.sum(adf.was_modified()), 1)
 486 |                 self.assertEqual(adf.was_modified().tolist()[0], True)
 487 |                 self.assertEqual(np.sum(adf.was_added()), 0)
 488 |                 self.assertEqual(len(adf.was_deleted()), 0)
 489 |                 # ----
 490 |                 self.assertEqual(len(adf.modified_columns(only=True)), 1)
 491 |                 self.assertEqual(
 492 |                     len(adf.modified_columns(only=False)), len(adf)
 493 |                 )
 494 |                 self.assertEqual(
 495 |                     list(adf.modified_columns().loc[adf.index[0]]).index(True),
 496 |                     2,
 497 |                 )
 498 |                 # ----
 499 |                 self.assertEqual(np.sum(adf.was_modified(adf_old)), 1)
 500 |                 self.assertEqual(adf.was_modified(adf_old).tolist()[0], True)
 501 |                 self.assertEqual(np.sum(adf.was_added(adf_old)), 0)
 502 |                 self.assertEqual(len(adf.was_deleted(adf_old)), 0)
 503 |                 # ----
 504 |                 self.assertEqual(
 505 |                     len(adf.modified_columns(only=True, other=adf_old)), 1
 506 |                 )
 507 |                 self.assertEqual(
 508 |                     len(adf.modified_columns(only=False, other=adf_old)),
 509 |                     len(adf),
 510 |                 )
 511 |                 self.assertEqual(
 512 |                     list(
 513 |                         adf.modified_columns(other=adf_old).loc[adf.index[0]]
 514 |                     ).index(True),
 515 |                     2,
 516 |                 )
 517 | 
 518 |     # Formats
 519 |     # ==========================================================================
 520 | 
 521 |     def test_reformat_trivial(self):
 522 |         for table in ["notes", "revs", "cards"]:
 523 |             with self.subTest(table=table):
 524 |                 adf = self.ntable(table)
 525 |                 adf2 = adf.normalize()
 526 |                 self.assertTrue(adf.equals(adf2))
 527 | 
 528 |     def test_convert_raw_load_raw(self):
 529 |         for table in ["notes", "revs", "cards"]:
 530 |             with self.subTest(table=table):
 531 |                 adf = self.ntable(table).raw()
 532 |                 df = raw.get_table(self.db, table)
 533 |                 if table == "notes":
 534 |                     df["tags"] = df["tags"].str.strip()
 535 |                 self.assertTrue(adf.equals(df))
 536 | 
 537 |     def test_raw_normalize(self):
 538 |         for table in ["notes", "revs", "cards"]:
 539 |             with self.subTest(table=table):
 540 |                 adf = self.ntable(table)
 541 |                 adf2 = adf.raw().normalize()
 542 |                 self.assertTrue(adf.equals(adf2))
 543 | 
 544 |     # Update modification stamps
 545 |     # ==========================================================================
 546 | 
 547 |     def test_set_usn(self):
 548 |         for table in ["notes", "revs", "cards"]:
 549 |             with self.subTest(table=table):
 550 |                 adf = self.ntable(table)
 551 |                 print(adf.columns)
 552 |                 adf[table[0] + "usn"] = 999
 553 |                 adf_old = adf.copy()
 554 |                 adf.loc[adf.index[0], adf_old.columns[0]] = "definitely changed"
 555 |                 adf._set_usn()
 556 |                 self.assertEqual(
 557 |                     adf.loc[
 558 |                         adf.index[0], _columns.columns_anki2ours[table]["usn"]
 559 |                     ],
 560 |                     -1,
 561 |                 )
 562 | 
 563 |     def test_set_mod(self):
 564 |         for table in ["notes", "cards"]:
 565 |             with self.subTest(table=table):
 566 |                 adf = self.ntable(table)
 567 |                 adf_old = adf.copy()
 568 |                 adf.loc[adf.index[0], adf.columns[0]] = "definitely changed"
 569 |                 adf._set_mod()
 570 |                 val1 = adf.loc[
 571 |                     adf.index[0], _columns.columns_anki2ours[table]["mod"]
 572 |                 ]
 573 |                 val_rest_1 = adf.loc[
 574 |                     adf.index[1:], _columns.columns_anki2ours[table]["mod"]
 575 |                 ]
 576 |                 val2 = adf_old.loc[
 577 |                     adf.index[0], _columns.columns_anki2ours[table]["mod"]
 578 |                 ]
 579 |                 val_rest_2 = adf.loc[
 580 |                     adf.index[1:], _columns.columns_anki2ours[table]["mod"]
 581 |                 ]
 582 |                 self.assertFalse(val1 == val2)
 583 |                 self.assertListEqual(list(val_rest_1), list(val_rest_2))
 584 | 
 585 |     # New
 586 |     # ==========================================================================
 587 | 
 588 |     # Add cards
 589 |     # --------------------------------------------------------------------------
 590 | 
 591 |     @staticmethod
 592 |     def _cards_dict(card):
 593 |         return dict(
 594 |             nid=card["nid"],
 595 |             cdeck=card["cdeck"],
 596 |             cord=card["cord"],
 597 |             cmod=card["cmod"],
 598 |             cusn=card["cusn"],
 599 |             cqueue=card["cqueue"],
 600 |             ctype=card["ctype"],
 601 |             civl=card["civl"],
 602 |             cfactor=card["cfactor"],
 603 |             clapses=card["clapses"],
 604 |             cleft=card["cleft"],
 605 |             cdue=card["cdue"],
 606 |         )
 607 | 
 608 |     def _test_new_card_default_values(self, cards, **kwargs):
 609 |         self.assertEqual(cards["cusn"].unique().tolist(), [-1])
 610 |         self.assertEqual(cards["cqueue"].unique().tolist(), ["new"])
 611 |         self.assertEqual(cards["ctype"].unique().tolist(), ["learning"])
 612 |         self.assertEqual(cards["civl"].unique().tolist(), [0])
 613 |         self.assertEqual(cards["cfactor"].unique().tolist(), [0])
 614 |         self.assertEqual(cards["creps"].unique().tolist(), [0])
 615 |         self.assertEqual(cards["cleft"].unique().tolist(), [0])
 616 |         for key, value in kwargs.items():
 617 |             self.assertEqual(cards[key].unique().tolist(), [value])
 618 | 
 619 |     def test_new_cards_default_values(self):
 620 |         empty = self.necards()
 621 | 
 622 |         nid1 = 1555579352896
 623 |         nid2 = 1557223191575
 624 |         nids = [nid1, nid2]
 625 |         deck = list(raw.get_did2deck(self.db).values())[0]
 626 | 
 627 |         kwargs = dict(cdeck=deck)
 628 | 
 629 |         with self.subTest(type="default values single note"):
 630 |             self._test_new_card_default_values(
 631 |                 empty.add_card(nid1, deck), **kwargs
 632 |             )
 633 |         with self.subTest(type="default values single card"):
 634 |             self._test_new_card_default_values(
 635 |                 empty.add_card(nid1, deck, cord=0), **kwargs, cord=0
 636 |             )
 637 |         with self.subTest(type="default values several notes"):
 638 |             self._test_new_card_default_values(
 639 |                 empty.add_cards(nids, deck), **kwargs
 640 |             )
 641 |         with self.subTest(type="default values several notes one cord"):
 642 |             self._test_new_card_default_values(
 643 |                 empty.add_cards(nids, deck, cord=0), **kwargs, cord=0
 644 |             )
 645 | 
 646 |     def test_new_cards_raises_missing_nid(self):
 647 |         empty = self.necards()
 648 |         nids = [1555579352896, -15, -16]
 649 |         with self.assertRaises(ValueError) as context:
 650 |             empty.add_cards(nids, "Default")
 651 |         self.assertTrue("-15" in str(context.exception))
 652 |         self.assertTrue("-16" in str(context.exception))
 653 |         self.assertFalse("1555579352896" in str(context.exception))
 654 | 
 655 |     def test_new_cards_raises_inconsistent_model(self):
 656 |         empty = self.necards()
 657 |         nids = [1555579352896, 1555579337683]
 658 |         with self.assertRaises(ValueError) as context:
 659 |             empty.add_cards(nids, "Default")
 660 |         self.assertTrue("for notes of the same model" in str(context.exception))
 661 | 
 662 |     def test_new_cards_raises_missing_deck(self):
 663 |         empty = self.necards()
 664 |         nids = [1555579352896]
 665 |         deck = "not existing for sure"
 666 |         with self.assertRaises(ValueError) as context:
 667 |             empty.add_cards(nids, deck)
 668 |         self.assertTrue(deck in str(context.exception))
 669 | 
 670 |     def test_new_cards_raises_due_default_not_new(self):
 671 |         empty = self.necards()
 672 |         nids = [1555579352896]
 673 |         deck = list(raw.get_did2deck(self.db).values())[0]
 674 |         with self.assertRaises(ValueError) as context:
 675 |             empty.add_cards(nids, deck, cqueue="learning")
 676 |         self.assertTrue("Due date can only be set" in str(context.exception))
 677 | 
 678 |     def test_new_card_fully_specified(self):
 679 |         empty = self.necards()
 680 |         empty_combined = self.necards()
 681 | 
 682 |         # Careful: Need notes of the same model!
 683 |         nid1 = 1555579352896
 684 |         nid2 = 1557223191575
 685 |         deck1 = list(raw.get_did2deck(self.db).values())[0]
 686 |         deck2 = list(raw.get_did2deck(self.db).values())[1]
 687 | 
 688 |         init_dict1 = dict(
 689 |             nid=nid1,
 690 |             cdeck=deck1,
 691 |             cord=0,
 692 |             cmod=123,
 693 |             cusn=5,
 694 |             cqueue="learning",
 695 |             ctype="relearn",
 696 |             civl=5,
 697 |             cfactor=17,
 698 |             clapses=89,
 699 |             cleft=15,
 700 |             cdue=178,
 701 |         )
 702 |         init_dict2 = dict(
 703 |             nid=nid2,
 704 |             cdeck=deck2,
 705 |             cord=0,
 706 |             cmod=1123,
 707 |             cusn=15,
 708 |             cqueue="due",
 709 |             ctype="review",
 710 |             civl=15,
 711 |             cfactor=117,
 712 |             clapses=189,
 713 |             cleft=115,
 714 |             cdue=1178,
 715 |         )
 716 |         init_dict_combined = dict(
 717 |             nid=[nid1, nid2],
 718 |             cdeck=[deck1, deck2],
 719 |             cord=0,
 720 |             cmod=[123, 1123],
 721 |             cusn=[5, 15],
 722 |             cqueue=["learning", "due"],
 723 |             ctype=["relearn", "review"],
 724 |             civl=[5, 15],
 725 |             cfactor=[17, 117],
 726 |             clapses=[89, 189],
 727 |             cleft=[15, 115],
 728 |             cdue=[178, 1178],
 729 |         )
 730 | 
 731 |         cid1 = empty.add_card(**init_dict1, inplace=True)[0]
 732 |         card1 = empty.loc[cid1]
 733 |         cid2 = empty.add_card(**init_dict2, inplace=True)[0]
 734 |         card2 = empty.loc[cid2]
 735 | 
 736 |         cid1, cid2 = empty_combined.add_cards(
 737 |             **init_dict_combined, inplace=True
 738 |         )
 739 |         card1c = empty_combined.loc[cid1]
 740 |         card2c = empty_combined.loc[cid2]
 741 | 
 742 |         self.assertDictEqual(init_dict1, self._cards_dict(card1))
 743 |         self.assertDictEqual(init_dict2, self._cards_dict(card2))
 744 |         self.assertDictEqual(init_dict1, self._cards_dict(card1c))
 745 |         self.assertDictEqual(init_dict2, self._cards_dict(card2c))
 746 | 
 747 |         self.assertEqual(len(empty), 2)
 748 |         self.assertEqual(len(empty_combined), 2)
 749 | 
 750 |     def test_new_cards_vs_new_card(self):
 751 |         # Also done in test_new_card_fully_specified
 752 | 
 753 |         empty = self.necards()
 754 |         empty2 = self.necards()
 755 | 
 756 |         nid = list(raw.get_nid2mid(self.db))[0]
 757 |         deck = list(raw.get_did2deck(self.db).values())[0]
 758 | 
 759 |         init_dict2 = dict(
 760 |             nid=[nid],
 761 |             cdeck=deck,
 762 |             cord=0,
 763 |             cmod=123,
 764 |             cusn=5,
 765 |             cqueue="learning",
 766 |             ctype="relearn",
 767 |             civl=5,
 768 |             cfactor=17,
 769 |             clapses=89,
 770 |             cleft=15,
 771 |             cdue=178,
 772 |         )
 773 |         init_dict1 = copy.deepcopy(init_dict2)
 774 |         init_dict1["nid"] = nid
 775 | 
 776 |         cids = empty2.add_cards(**init_dict2, inplace=True)
 777 |         card2 = empty2.loc[cids[0]]
 778 | 
 779 |         cid = empty.add_card(**init_dict1, inplace=True)[0]
 780 |         card1 = empty.loc[cid]
 781 | 
 782 |         self.assertDictEqual(self._cards_dict(card2), self._cards_dict(card1))
 783 | 
 784 |     # Add notes
 785 |     # --------------------------------------------------------------------------
 786 | 
 787 |     def test_new_notes_raises_inconsistent(self):
 788 |         with self.assertRaises(ValueError):
 789 |             self.nnotes().add_notes("Basic", [["1", "2"]], ntags=[["1"], ["2"]])
 790 |         with self.assertRaises(ValueError):
 791 |             self.nnotes().add_notes("Basic", [["1", "2"]], nid=[123, 124])
 792 |         with self.assertRaises(ValueError):
 793 |             self.nnotes().add_notes("Basic", [["1", "2"]], nguid=[123, 124])
 794 | 
 795 |     def test_new_notes_raises_nid_clash(self):
 796 |         with self.assertRaises(ValueError):
 797 |             self.nnotes().add_note("Basic", ["11", "12"], nid=10).add_note(
 798 |                 "Basic", ["21", "22"], nid=10
 799 |             )
 800 |         with self.assertRaises(ValueError):
 801 |             self.nnotes().add_notes(
 802 |                 "Basic", [["11", "12"], ["22", "22"]], nid=[10, 10]
 803 |             )
 804 | 
 805 |     def test_new_notes_raises_nguid_clash(self):
 806 |         with self.assertRaises(ValueError):
 807 |             self.nnotes().add_notes(
 808 |                 "Basic", [["11", "12"], ["21", "22"]], nguid=[10, 10]
 809 |             )
 810 |         with self.assertRaises(ValueError):
 811 |             self.nnotes().add_note("Basic", ["11", "12"], nguid=10).add_note(
 812 |                 "Basic", ["21", "22"], nguid=10
 813 |             )
 814 | 
 815 |     def test_new_notes_fields_as_columns(self):
 816 |         empty = self.nenotes()
 817 |         empty.add_notes(
 818 |             "Basic",
 819 |             [["field1", "field2"], ["field21", "field22"]],
 820 |             ntags=[["tag1", "tag2"], ["tag21", "tag22"]],
 821 |             nguid=["cryptic", "cryptic2"],
 822 |             nmod=[124, 1235],
 823 |             nusn=[42, 17],
 824 |             nid=[123, 125],
 825 |             inplace=True,
 826 |         )
 827 | 
 828 |         empty2 = self.nenotes().fields_as_columns()
 829 |         empty2.add_notes(
 830 |             "Basic",
 831 |             [["field1", "field2"], ["field21", "field22"]],
 832 |             ntags=[["tag1", "tag2"], ["tag21", "tag22"]],
 833 |             nguid=["cryptic", "cryptic2"],
 834 |             nmod=[124, 1235],
 835 |             nusn=[42, 17],
 836 |             nid=[123, 125],
 837 |             inplace=True,
 838 |         )
 839 | 
 840 |         self.assertDictEqual(
 841 |             empty.fields_as_columns().to_dict(), empty2.to_dict()
 842 |         )
 843 | 
 844 |     @staticmethod
 845 |     def _notes_dict(notes):
 846 |         return {
 847 |             "nmodel": notes["nmodel"],
 848 |             "nflds": notes["nflds"],
 849 |             "ntags": notes["ntags"],
 850 |             "nguid": notes["nguid"],
 851 |             "nmod": notes["nmod"],
 852 |             "nusn": notes["nusn"],
 853 |         }
 854 | 
 855 |     def test_new_note_empty_fully_specified(self):
 856 |         empty = self.nenotes()
 857 | 
 858 |         init_dict = dict(
 859 |             nmodel="Basic",
 860 |             nflds=["field1", "field2"],
 861 |             ntags=["tag1", "tag2"],
 862 |             nguid="cryptic",
 863 |             nmod=124,
 864 |             nusn=42,
 865 |         )
 866 |         nid = empty.add_note(nid=123, **init_dict, inplace=True)
 867 |         self.assertEqual(nid, 123)
 868 |         note = empty.loc[nid]
 869 |         self.assertDictEqual(init_dict, self._notes_dict(note))
 870 |         self.assertEqual(len(empty), 1)
 871 | 
 872 |         init_dict2 = dict(
 873 |             nmodel="Basic",
 874 |             nflds=["field21", "field22"],
 875 |             ntags=["tag21", "tag22"],
 876 |             nguid="cryptic2",
 877 |             nmod=1235,
 878 |             nusn=17,
 879 |         )
 880 |         nid = empty.add_note(nid=125, **init_dict2, inplace=True)
 881 |         self.assertEqual(nid, 125)
 882 |         note = empty.loc[125]
 883 |         self.assertDictEqual(init_dict2, self._notes_dict(note))
 884 |         self.assertEqual(len(empty), 2)
 885 | 
 886 |         empty2 = self.nenotes()
 887 |         empty2.add_notes(
 888 |             "Basic",
 889 |             [["field1", "field2"], ["field21", "field22"]],
 890 |             ntags=[["tag1", "tag2"], ["tag21", "tag22"]],
 891 |             nguid=["cryptic", "cryptic2"],
 892 |             nmod=[124, 1235],
 893 |             nusn=[42, 17],
 894 |             nid=[123, 125],
 895 |             inplace=True,
 896 |         )
 897 |         self.assertTrue(empty.equals(empty2))
 898 | 
 899 |     def test_new_note_raises_suplicate(self):
 900 |         empty = self.nenotes()
 901 |         empty.add_note("Basic", ["f1", "f2"], nid=10, inplace=True)
 902 |         self.assertEqual(len(empty), 1)
 903 |         with self.assertRaises(ValueError):
 904 |             empty.add_note("Basic", ["f3", "f4"], nid=10, inplace=True)
 905 | 
 906 |     def test_new_note_default_values(self):
 907 |         empty = self.nenotes()
 908 | 
 909 |         init_dict = dict(nmodel="Basic", nflds=["field1", "field2"])
 910 |         nid = empty.add_note(nid=123, **init_dict, inplace=True)
 911 |         self.assertEqual(nid, 123)
 912 |         note = empty.loc[nid].to_dict()
 913 |         self.assertEqual(note["nmodel"], init_dict["nmodel"])
 914 |         self.assertEqual(note["nflds"], init_dict["nflds"])
 915 | 
 916 |     def test_new_note_raises(self):
 917 |         empty = self.nenotes()
 918 |         with self.assertRaises(ValueError):
 919 |             empty.add_note("doesntexist", [])
 920 |         with self.assertRaises(ValueError):
 921 |             empty.add_note("Basic", ["1", "2", "3"])
 922 | 
 923 |     def test_new_notes_equivalent_field_specifications(self):
 924 |         empty1 = self.nenotes()
 925 |         empty2 = self.nenotes()
 926 |         empty3 = self.nenotes()
 927 | 
 928 |         empty1.add_notes("Basic", [["11", "12"], ["21", "22"]], inplace=True)
 929 |         empty2.add_notes(
 930 |             "Basic",
 931 |             [{"Front": "11", "Back": "12"}, {"Front": "21", "Back": "22"}],
 932 |             inplace=True,
 933 |         )
 934 |         empty3.add_notes(
 935 |             "Basic", {"Front": ["11", "21"], "Back": ["12", "22"]}, inplace=True
 936 |         )
 937 |         self.assertListEqual(empty1["nflds"].tolist(), empty2["nflds"].tolist())
 938 |         self.assertListEqual(empty2["nflds"].tolist(), empty3["nflds"].tolist())
 939 | 
 940 |     def test_new_notes_equivalent_field_specifications_fields_as_columns(self):
 941 |         empty1 = self.nenotes().fields_as_columns()
 942 |         empty2 = self.nenotes().fields_as_columns()
 943 |         empty3 = self.nenotes().fields_as_columns()
 944 | 
 945 |         empty1.add_notes("Basic", [["11", "12"], ["21", "22"]], inplace=True)
 946 |         empty2.add_notes(
 947 |             "Basic",
 948 |             [{"Front": "11", "Back": "12"}, {"Front": "21", "Back": "22"}],
 949 |             inplace=True,
 950 |         )
 951 |         empty3.add_notes(
 952 |             "Basic", {"Front": ["11", "21"], "Back": ["12", "22"]}, inplace=True
 953 |         )
 954 | 
 955 |         p = empty1.fields_as_columns_prefix
 956 | 
 957 |         self.assertListEqual(
 958 |             empty1[p + "Front"].tolist(), empty2[p + "Front"].tolist()
 959 |         )
 960 |         self.assertListEqual(
 961 |             empty2[p + "Front"].tolist(), empty3[p + "Front"].tolist()
 962 |         )
 963 |         self.assertListEqual(
 964 |             empty1[p + "Back"].tolist(), empty2[p + "Back"].tolist()
 965 |         )
 966 |         self.assertListEqual(
 967 |             empty2[p + "Back"].tolist(), empty3[p + "Back"].tolist()
 968 |         )
 969 | 
 970 |     # Help
 971 |     # ==========================================================================
 972 | 
 973 |     def test_help_col(self):
 974 |         for table, adf in self.table2adf.items():
 975 |             with self.subTest(table=table):
 976 |                 cols = list(adf.columns) + [adf.index.name]
 977 |                 for col in cols:
 978 |                     self.assertIsInstance(adf.help_col(col, ret=True), str)
 979 | 
 980 |     def test_help_cols_auto(self):
 981 |         for table, adf in self.table2adf.items():
 982 |             with self.subTest(table=table):
 983 |                 df = adf.help_cols()
 984 |                 self.assertListEqual(
 985 |                     list(df.columns),
 986 |                     ["AnkiColumn", "Table", "Description", "Native", "Default"],
 987 |                 )
 988 |                 self.assertListEqual(
 989 |                     sorted(adf.columns),
 990 |                     sorted(set(df.index)),  # nid, cid appear twice
 991 |                 )
 992 | 
 993 |     def test_help(self):
 994 |         notes = self.notes
 995 |         hlp = notes.help(ret=True)
 996 |         self.assertTrue(isinstance(hlp, str))
 997 | 
 998 | 
 999 | class TestAnkiDFv1(TestAnkiDF):
1000 |     db_path = (
1001 |         pathlib.Path(__file__).parent
1002 |         / "data"
1003 |         / "few_basic_cards"
1004 |         / "collection_v1.anki2"
1005 |     )
1006 | 
1007 | 
1008 | if __name__ == "__main__":
1009 |     unittest.main()
1010 | 


--------------------------------------------------------------------------------
/ankipandas/test/test_collection.py:
--------------------------------------------------------------------------------
  1 | # std
  2 | from __future__ import annotations
  3 | 
  4 | import pathlib
  5 | import shutil
  6 | 
  7 | # 3rd
  8 | import pytest
  9 | 
 10 | # ours
 11 | from ankipandas.collection import Collection
 12 | from ankipandas.test.util import parameterized_paths
 13 | 
 14 | 
 15 | def _init_all_tables(col: Collection) -> None:
 16 |     """Access all attributes at least once to ensure that they are
 17 |     initialized.
 18 |     """
 19 |     _ = col.notes
 20 |     _ = col.cards
 21 |     _ = col.revs
 22 | 
 23 | 
 24 | # Summarize changes
 25 | # ==========================================================================
 26 | 
 27 | 
 28 | @parameterized_paths()
 29 | def test_summarize_changes_uninitialized(db_path):
 30 |     col = Collection(db_path)
 31 |     sc = col.summarize_changes(output="dict")
 32 |     assert len(sc) == 0
 33 | 
 34 | 
 35 | @parameterized_paths()
 36 | def test_summarize_changes_no_changes(db_path):
 37 |     col = Collection(db_path)
 38 |     _init_all_tables(col)
 39 |     col.summarize_changes()
 40 |     sc = col.summarize_changes(output="dict")
 41 |     for item in ["cards", "revs", "notes"]:
 42 |         assert sc[item]["n_modified"] == 0
 43 |         assert sc[item]["n_added"] == 0
 44 |         assert sc[item]["n_deleted"] == 0
 45 |         assert not sc[item]["has_changed"]
 46 | 
 47 | 
 48 | @parameterized_paths()
 49 | def test_summarize_notes_changed(db_path):
 50 |     col = Collection(db_path)
 51 |     col.notes.add_tag("this_will_be_modified", inplace=True)
 52 |     sc = col.summarize_changes(output="dict")
 53 |     assert sc["notes"]["n_modified"] == sc["notes"]["n"]
 54 | 
 55 | 
 56 | # Writing
 57 | # ==========================================================================
 58 | 
 59 | 
 60 | @parameterized_paths()
 61 | def test_read_write_identical_trivial(db_path, tmpdir):
 62 |     db_path = shutil.copy2(str(db_path), str(tmpdir))
 63 |     (pathlib.Path(str(tmpdir)) / "backups").mkdir()
 64 |     col = Collection(db_path)
 65 |     _init_all_tables(col)
 66 |     col.write(modify=True, delete=True, add=True, _override_exception=True)
 67 |     col_rel = Collection(db_path)
 68 |     assert col.notes.equals(col_rel.notes)
 69 |     assert col.cards.equals(col_rel.cards)
 70 |     assert col.revs.equals(col_rel.revs)
 71 | 
 72 | 
 73 | @parameterized_paths()
 74 | def test_write_raises_delete(db_path, tmpdir):
 75 |     db_path = shutil.copy2(str(db_path), str(tmpdir))
 76 |     (pathlib.Path(str(tmpdir)) / "backups").mkdir()
 77 |     col = Collection(db_path)
 78 |     _init_all_tables(col)
 79 |     col.notes.drop(col.notes.index, inplace=True)
 80 |     cases = [
 81 |         dict(modify=False, add=True),
 82 |         dict(modify=True, add=False),
 83 |         dict(modify=True, add=True),
 84 |     ]
 85 |     for case in cases:
 86 |         with pytest.raises(ValueError, match=".*would be deleted.*"):
 87 |             col.write(**case, delete=False, _override_exception=True)
 88 | 
 89 | 
 90 | @parameterized_paths()
 91 | def test_write_raises_modified(db_path, tmpdir):
 92 |     db_path = shutil.copy2(str(db_path), str(tmpdir))
 93 |     (pathlib.Path(str(tmpdir)) / "backups").mkdir()
 94 |     col = Collection(db_path)
 95 |     _init_all_tables(col)
 96 |     col.notes.add_tag("test", inplace=True)
 97 |     cases = [
 98 |         dict(add=False, delete=True),
 99 |         dict(add=True, delete=False),
100 |         dict(add=True, delete=True),
101 |     ]
102 |     for case in cases:
103 |         with pytest.raises(ValueError, match=".*would be modified.*"):
104 |             col.write(**case, modify=False, _override_exception=True)
105 | 
106 | 
107 | @parameterized_paths()
108 | def test_write_raises_added(db_path, tmpdir):
109 |     db_path = shutil.copy2(str(db_path), str(tmpdir))
110 |     (pathlib.Path(str(tmpdir)) / "backups").mkdir()
111 |     col = Collection(db_path)
112 |     _init_all_tables(col)
113 |     col.notes.add_note("Basic", ["test", "back"], inplace=True)
114 |     cases = [
115 |         dict(modify=False, delete=True),
116 |         dict(modify=True, delete=False),
117 |         dict(modify=True, delete=True),
118 |     ]
119 |     for case in cases:
120 |         with pytest.raises(ValueError, match=".*would be modified.*"):
121 |             col.write(**case, add=False, _override_exception=True)
122 | 
123 | 
124 | @parameterized_paths()
125 | def test_write_added(db_path, tmpdir):
126 |     db_path = shutil.copy2(str(db_path), str(tmpdir))
127 |     (pathlib.Path(str(tmpdir)) / "backups").mkdir()
128 |     col = Collection(db_path)
129 |     _init_all_tables(col)
130 |     col.notes.add_note("Basic", ["test", "back"], inplace=True)
131 |     col.write(add=True, _override_exception=True)
132 | 


--------------------------------------------------------------------------------
/ankipandas/test/test_paths.py:
--------------------------------------------------------------------------------
  1 | # std
  2 | from __future__ import annotations
  3 | 
  4 | import collections
  5 | import tempfile
  6 | import unittest
  7 | from pathlib import Path
  8 | 
  9 | # 3rd
 10 | from randomfiletree import iterative_gaussian_tree, sample_random_elements
 11 | 
 12 | # ours
 13 | import ankipandas.paths as paths
 14 | from ankipandas.util.log import set_debug_log_level
 15 | from ankipandas.util.misc import flatten_list_list
 16 | 
 17 | 
 18 | def touch_file_in_random_folders(basedir, filename: str, n=1) -> list[Path]:
 19 |     """Create files in random folders.
 20 | 
 21 |     Args:
 22 |         basedir: Starting directory
 23 |         filename: Filename of the files to create
 24 |         n: Number of files to create
 25 | 
 26 |     Returns:
 27 |         List of files that were created.
 28 |     """
 29 |     files = set()
 30 |     for d in sample_random_elements(
 31 |         basedir, n_dirs=n, n_files=0, onfail="ignore"
 32 |     )[0]:
 33 |         p = Path(d) / filename
 34 |         p.touch()
 35 |         files.add(p)
 36 |     return list(files)
 37 | 
 38 | 
 39 | class TestFindDatabase(unittest.TestCase):
 40 |     def setUp(self):
 41 |         set_debug_log_level()
 42 |         self.dirs = {
 43 |             "nothing": tempfile.TemporaryDirectory(),
 44 |             "multiple": tempfile.TemporaryDirectory(),
 45 |             "perfect": tempfile.TemporaryDirectory(),
 46 |         }
 47 |         for d in self.dirs.values():
 48 |             iterative_gaussian_tree(
 49 |                 d.name,
 50 |                 repeat=5,
 51 |                 nfolders=3,
 52 |                 min_folders=1,
 53 |                 nfiles=2,
 54 |                 min_files=1,
 55 |                 maxdepth=4,
 56 |             )
 57 |         self.dbs = {
 58 |             "nothing": [],
 59 |             "multiple": touch_file_in_random_folders(
 60 |                 self.dirs["multiple"].name, "collection.anki2", 10
 61 |             ),
 62 |             "perfect": touch_file_in_random_folders(
 63 |                 self.dirs["perfect"].name, "collection.anki2", 1
 64 |             ),
 65 |         }
 66 |         self.maxDiff = None
 67 | 
 68 |     def test_db_path_input_nexist(self):
 69 |         with self.assertRaises(FileNotFoundError):
 70 |             paths.db_path_input("/x/y/z")
 71 | 
 72 |     def test_db_path_input_multiple(self):
 73 |         with self.assertRaises(ValueError):
 74 |             paths.db_path_input(self.dirs["multiple"].name)
 75 | 
 76 |     def test_db_path_input_nothing(self):
 77 |         with self.assertRaises(ValueError):
 78 |             paths.db_path_input(self.dirs["nothing"].name)
 79 | 
 80 |     def test_db_path_input_perfect(self):
 81 |         self.assertEqual(
 82 |             paths.db_path_input(self.dirs["perfect"].name),
 83 |             self.dbs["perfect"][0],
 84 |         )
 85 | 
 86 |     def test__find_database(self):
 87 |         for d in self.dirs:
 88 |             a = sorted(
 89 |                 map(
 90 |                     str,
 91 |                     flatten_list_list(
 92 |                         paths._find_db(
 93 |                             self.dirs[d].name,
 94 |                             maxdepth=None,
 95 |                             break_on_first=False,
 96 |                         ).values()
 97 |                     ),
 98 |                 )
 99 |             )
100 |             b = sorted(str(x) for x in self.dbs[d])
101 |             self.assertListEqual(a, b)
102 | 
103 |     def test__find_database_filename(self):
104 |         # If doesn't exist
105 |         self.assertEqual(
106 |             paths._find_db(
107 |                 Path("abc/myfilename.txt"), filename="myfilename.txt"
108 |             ),
109 |             {},
110 |         )
111 |         tmpdir = tempfile.TemporaryDirectory()
112 |         dir_path = Path(tmpdir.name) / "myfolder"
113 |         file_path = dir_path / "myfilename.txt"
114 |         dir_path.mkdir()
115 |         file_path.touch()
116 |         self.assertEqual(
117 |             paths._find_db(file_path, filename="myfilename.txt"),
118 |             collections.defaultdict(list, {"myfolder": [file_path]}),
119 |         )
120 |         tmpdir.cleanup()
121 | 
122 |     def test_find_database(self):
123 |         with self.assertRaises(ValueError):
124 |             paths.find_db(self.dirs["nothing"].name, break_on_first=False)
125 |         with self.assertRaises(ValueError):
126 |             paths.find_db(self.dirs["multiple"].name, break_on_first=False)
127 |             print(self.dbs["multiple"])
128 |         self.assertEqual(
129 |             str(paths.find_db(self.dirs["perfect"].name, break_on_first=False)),
130 |             str(self.dbs["perfect"][0]),
131 |         )
132 | 
133 |     def tearDown(self):
134 |         for d in self.dirs.values():
135 |             d.cleanup()
136 | 
137 | 
138 | class TestBackup(unittest.TestCase):
139 |     def setUp(self):
140 |         set_debug_log_level()
141 |         self.tmpdir = tempfile.TemporaryDirectory()
142 |         self.tmpdir_path = Path(self.tmpdir.name)
143 |         (self.tmpdir_path / "collection.anki2").touch()
144 |         (self.tmpdir_path / "backups").mkdir()
145 | 
146 |         self.tmpdir_only_db = tempfile.TemporaryDirectory()
147 |         self.tmpdir_only_db_path = Path(self.tmpdir_only_db.name)
148 |         (self.tmpdir_only_db_path / "collection.anki2").touch()
149 | 
150 |     def tearDown(self):
151 |         self.tmpdir.cleanup()
152 |         self.tmpdir_only_db.cleanup()
153 | 
154 |     def test_get_anki_backup_folder(self):
155 |         self.assertEqual(
156 |             str(
157 |                 paths.get_anki_backup_folder(
158 |                     self.tmpdir_path / "collection.anki2"
159 |                 )
160 |             ),
161 |             str(self.tmpdir_path / "backups"),
162 |         )
163 | 
164 |     def test_get_anki_backup_folder_raise(self):
165 |         with self.assertRaises(FileNotFoundError):
166 |             paths.get_anki_backup_folder(self.tmpdir_path / "asdf")
167 |         with self.assertRaises(ValueError):
168 |             paths.get_anki_backup_folder(
169 |                 self.tmpdir_only_db_path / "collection.anki2"
170 |             )
171 |         paths.get_anki_backup_folder(
172 |             self.tmpdir_only_db_path / "collection.anki2", nexist="ignore"
173 |         )
174 | 
175 |     def test_backup_db_auto(self):
176 |         with tempfile.TemporaryDirectory() as tmpdir:
177 |             db_path = Path(tmpdir) / "collection.anki2"
178 |             db_path.touch()
179 |             backup_folder = db_path.parent / "backups"
180 |             backup_folder.mkdir()
181 |             backup_path = paths.backup_db(db_path)
182 |             self.assertTrue(backup_path.is_file())
183 |             self.assertTrue(backup_path.parent == backup_folder)
184 | 
185 |     def test_backup_db_custom(self):
186 |         with tempfile.TemporaryDirectory() as tmpdir:
187 |             db_path = Path(tmpdir) / "collection.anki2"
188 |             db_path.touch()
189 |             backup_folder = db_path.parent / "myfolder"
190 |             backup_path = paths.backup_db(db_path, backup_folder=backup_folder)
191 |             self.assertTrue(backup_path.is_file())
192 |             self.assertTrue(backup_path.parent == backup_folder)
193 | 
194 | 
195 | if __name__ == "__main__":
196 |     unittest.main()
197 | 


--------------------------------------------------------------------------------
/ankipandas/test/test_raw.py:
--------------------------------------------------------------------------------
  1 | # std
  2 | from __future__ import annotations
  3 | 
  4 | import copy
  5 | import pathlib
  6 | import shutil
  7 | import tempfile
  8 | import unittest
  9 | 
 10 | # 3rd
 11 | import pandas as pd
 12 | 
 13 | # ours
 14 | from ankipandas.raw import (
 15 |     close_db,
 16 |     get_db_version,
 17 |     get_deck_info,
 18 |     get_did2deck,
 19 |     get_info,
 20 |     get_mid2fields,
 21 |     get_mid2model,
 22 |     get_model_info,
 23 |     get_table,
 24 |     load_db,
 25 |     set_info,
 26 |     set_table,
 27 | )
 28 | from ankipandas.util.dataframe import merge_dfs
 29 | from ankipandas.util.log import set_debug_log_level
 30 | 
 31 | 
 32 | class TestRawRead(unittest.TestCase):
 33 |     def setUp(self):
 34 |         set_debug_log_level()
 35 |         self.db_folder = (
 36 |             pathlib.Path(__file__).parent / "data" / "few_basic_cards"
 37 |         )
 38 |         self.version2db = {
 39 |             0: load_db(self.db_folder / "collection.anki2"),
 40 |             1: load_db(self.db_folder / "collection_v1.anki2"),
 41 |         }
 42 | 
 43 |     def test_get_db_version(self):
 44 |         for version in [0, 1]:
 45 |             with self.subTest(version=version):
 46 |                 assert get_db_version(self.version2db[version]) == version
 47 | 
 48 |     def tearDown(self):
 49 |         for db in self.version2db.values():
 50 |             close_db(db)
 51 | 
 52 |     def test_get_deck_info(self):
 53 |         for version in [0, 1]:
 54 |             with self.subTest(version=version):
 55 |                 info = get_deck_info(self.version2db[version])
 56 |                 self.assertGreaterEqual(len(info), 2)
 57 |                 self.assertIsInstance(info, dict)
 58 | 
 59 |     def test_get_deck_names(self):
 60 |         for version in [0, 1]:
 61 |             with self.subTest(version=version):
 62 |                 names = get_did2deck(self.version2db[version])
 63 |                 self.assertTrue(
 64 |                     set(names.values()).issuperset({"Testing", "EnglishGerman"})
 65 |                 )
 66 | 
 67 |     def test_get_model_info(self):
 68 |         for version in [0, 1]:
 69 |             with self.subTest(version=version):
 70 |                 minfo = get_model_info(self.version2db[version])
 71 |                 self.assertIsInstance(minfo, dict)
 72 |                 self.assertGreaterEqual(len(minfo), 2)
 73 | 
 74 |     def test_get_model_names(self):
 75 |         for version in [0, 1]:
 76 |             with self.subTest(version=version):
 77 |                 names = get_mid2model(self.version2db[version])
 78 |                 self.assertIn("Basic", names.values())
 79 |                 self.assertIn("Cloze", names.values())
 80 |                 self.assertEqual(len(names), 5)
 81 | 
 82 |     def test_get_field_names(self):
 83 |         for version in [0, 1]:
 84 |             with self.subTest(version=version):
 85 |                 _fnames = get_mid2fields(self.version2db[version])
 86 |                 models = get_mid2model(self.version2db[version])
 87 |                 fnames = {models[mid]: _fnames[mid] for mid in models}
 88 |                 print("MODELS", models)
 89 |                 print("_FNAMES", _fnames)
 90 |                 print("FNAMES", fnames)
 91 |                 self.assertEqual(
 92 |                     len(fnames), len(get_mid2model(self.version2db[version]))
 93 |                 )
 94 |                 self.assertListEqual(fnames["Basic"], ["Front", "Back"])
 95 | 
 96 | 
 97 | class TestRawWrite(unittest.TestCase):
 98 |     db_read_path = (
 99 |         pathlib.Path(__file__).parent
100 |         / "data"
101 |         / "few_basic_cards"
102 |         / "collection.anki2"
103 |     )
104 | 
105 |     def setUp(self):
106 |         set_debug_log_level()
107 |         self.db_read = load_db(self.db_read_path)
108 |         self.db_write_dir = tempfile.TemporaryDirectory()
109 |         self.db_write_path = (
110 |             pathlib.Path(self.db_write_dir.name) / "collection.anki2"
111 |         )
112 |         self._reset()
113 | 
114 |     def _reset(self):
115 |         shutil.copy(str(self.db_read_path), str(self.db_write_path))
116 |         self.db_write = load_db(self.db_write_path)
117 | 
118 |     def tearDown(self):
119 |         self.db_read.close()
120 |         self.db_write.close()
121 |         self.db_write_dir.cleanup()
122 | 
123 |     def _check_db_equal(self):
124 |         notes = get_table(self.db_read, "notes")
125 |         cards = get_table(self.db_read, "cards")
126 |         revlog = get_table(self.db_read, "revs")
127 |         notes2 = get_table(self.db_write, "notes")
128 |         cards2 = get_table(self.db_write, "cards")
129 |         revlog2 = get_table(self.db_write, "revs")
130 |         # noinspection PyUnresolvedReferences
131 |         self.assertListEqual(notes.values.tolist(), notes2.values.tolist())
132 |         # noinspection PyUnresolvedReferences
133 |         self.assertListEqual(cards.values.tolist(), cards2.values.tolist())
134 |         # noinspection PyUnresolvedReferences
135 |         self.assertListEqual(revlog.values.tolist(), revlog2.values.tolist())
136 | 
137 |     def test_rw_identical(self):
138 |         notes = get_table(self.db_read, "notes")
139 |         cards = get_table(self.db_read, "cards")
140 |         revlog = get_table(self.db_read, "revs")
141 |         for mode in ["update", "replace", "append"]:
142 |             with self.subTest(mode=mode):
143 |                 self._reset()
144 |                 set_table(self.db_write, notes, "notes", mode)
145 |                 set_table(self.db_write, cards, "cards", mode)
146 |                 set_table(self.db_write, revlog, "revs", mode)
147 |                 self._check_db_equal()
148 | 
149 |     def test_update(self):
150 |         notes2 = get_table(self.db_read, "notes")
151 |         notes = get_table(self.db_read, "notes")
152 |         for mode in ["update", "replace", "append"]:
153 |             with self.subTest(mode=mode):
154 |                 self._reset()
155 |                 notes2.loc[notes2["id"] == 1555579337683, "tags"] = (
156 |                     "definitelynew!"
157 |                 )
158 |                 set_table(self.db_write, notes2, "notes", mode)
159 |                 if mode == "append":
160 |                     self._check_db_equal()
161 |                 else:
162 |                     notes2r = get_table(self.db_write, "notes")
163 |                     chtag = notes2r.loc[notes2r["id"] == 1555579337683, "tags"]
164 |                     self.assertListEqual(
165 |                         list(chtag.values.tolist()), ["definitelynew!"]
166 |                     )
167 |                     unchanged = notes.loc[notes["id"] != 1555579337683, :]
168 |                     unchanged2 = notes2r.loc[notes2["id"] != 1555579337683, :]
169 | 
170 |                     self.assertListEqual(
171 |                         list(unchanged.values.tolist()),
172 |                         list(unchanged2.values.tolist()),
173 |                     )
174 | 
175 |     def test_update_append_does_not_delete(self):
176 |         notes = get_table(self.db_read, "notes")
177 |         cards = get_table(self.db_read, "cards")
178 |         revs = get_table(self.db_read, "revs")
179 |         notes.drop(notes.index)
180 |         cards.drop(cards.index)
181 |         revs.drop(revs.index)
182 |         for mode in ["update", "append"]:
183 |             with self.subTest(mode=mode):
184 |                 self._reset()
185 |                 set_table(self.db_write, notes, "notes", mode)
186 |                 set_table(self.db_write, cards, "cards", mode)
187 |                 set_table(self.db_write, revs, "revs", mode)
188 |                 self._check_db_equal()
189 | 
190 |     def test_replace_deletes(self):
191 |         notes = get_table(self.db_read, "notes")
192 |         cards = get_table(self.db_read, "cards")
193 |         revs = get_table(self.db_read, "revs")
194 |         notes = notes.drop(notes.index)
195 |         cards = cards.drop(cards.index)
196 |         revs = revs.drop(revs.index)
197 |         self._reset()
198 |         set_table(self.db_write, notes, "notes", "replace")
199 |         set_table(self.db_write, cards, "cards", "replace")
200 |         set_table(self.db_write, revs, "revs", "replace")
201 |         notes = get_table(self.db_write, "notes")
202 |         cards = get_table(self.db_write, "cards")
203 |         revs = get_table(self.db_write, "revs")
204 |         self.assertEqual(len(notes), 0)
205 |         self.assertEqual(len(revs), 0)
206 |         self.assertEqual(len(cards), 0)
207 | 
208 |     def test_set_get_inverse(self):
209 |         info = get_info(self.db_read)
210 |         set_info(self.db_write, info)
211 |         info2 = get_info(self.db_write)
212 |         self.assertDictEqual(info, info2)
213 | 
214 | 
215 | class TestRawWriteV1(unittest.TestCase):
216 |     db_read_path = (
217 |         pathlib.Path(__file__).parent
218 |         / "data"
219 |         / "few_basic_cards"
220 |         / "collection_v1.anki2"
221 |     )
222 | 
223 | 
224 | class TestMergeDfs(unittest.TestCase):
225 |     def setUp(self):
226 |         set_debug_log_level()
227 |         self.df = pd.DataFrame(
228 |             {"id_df": [1, 2, 3, 1, 1], "clash": ["a", "b", "c", "a", "a"]}
229 |         )
230 |         self.df_add = pd.DataFrame(
231 |             {
232 |                 "id_add": [1, 2, 3],
233 |                 "value": [4, 5, 6],
234 |                 "drop": [7, 8, 9],
235 |                 "ignore": [10, 11, 12],
236 |                 "clash": [1, 1, 1],
237 |             }
238 |         )
239 | 
240 |     def test_merge_dfs(self):
241 |         df_merged = merge_dfs(
242 |             self.df,
243 |             self.df_add,
244 |             id_df="id_df",
245 |             id_add="id_add",
246 |             prepend="_",
247 |             columns=["value", "drop", "clash"],
248 |             drop_columns=["id_add", "drop"],
249 |         )
250 |         self.assertListEqual(
251 |             sorted(df_merged.columns),
252 |             ["_clash", "clash", "id_df", "value"],
253 |         )
254 |         self.assertListEqual(sorted(df_merged["value"]), [4, 4, 4, 5, 6])
255 | 
256 |     def test_merge_dfs_prepend_all(self):
257 |         df_merged = merge_dfs(
258 |             self.df,
259 |             self.df_add,
260 |             id_df="id_df",
261 |             id_add="id_add",
262 |             prepend="_",
263 |             prepend_clash_only=False,
264 |         )
265 |         self.assertListEqual(
266 |             sorted(df_merged.columns),
267 |             ["_clash", "_drop", "_ignore", "_value", "clash", "id_df"],
268 |         )
269 | 
270 |     def test_merge_dfs_inplace(self):
271 |         df = copy.deepcopy(self.df)
272 |         merge_dfs(df, self.df_add, id_df="id_df", id_add="id_add", inplace=True)
273 |         self.assertListEqual(
274 |             sorted(df.columns),
275 |             ["clash_x", "clash_y", "drop", "id_df", "ignore", "value"],
276 |         )
277 |         self.assertListEqual(sorted(df["value"]), [4, 4, 4, 5, 6])
278 | 
279 | 
280 | if __name__ == "__main__":
281 |     unittest.main()
282 | 


--------------------------------------------------------------------------------
/ankipandas/test/test_regression.py:
--------------------------------------------------------------------------------
 1 | """ These tests are created from issues that we fixed to avoid that they might
 2 | come back later.
 3 | """
 4 | 
 5 | from __future__ import annotations
 6 | 
 7 | # ours
 8 | from ankipandas.collection import Collection
 9 | from ankipandas.test.util import parameterized_paths
10 | 
11 | 
12 | @parameterized_paths()
13 | def test_inplace_merge_notes(db_path):
14 |     """https://github.com/klieret/AnkiPandas/issues/51
15 |     AttributeError: 'NoneType' object has no attribute 'col'
16 |     """
17 |     col = Collection(db_path)
18 |     col.cards.merge_notes(inplace=True)
19 | 


--------------------------------------------------------------------------------
/ankipandas/test/util.py:
--------------------------------------------------------------------------------
 1 | # std
 2 | from __future__ import annotations
 3 | 
 4 | import pathlib
 5 | 
 6 | # 3rd
 7 | import pytest
 8 | 
 9 | _test_db_paths = [
10 |     pathlib.Path(__file__).resolve().parent
11 |     / "data"
12 |     / "few_basic_cards"
13 |     / "collection.anki2",
14 |     pathlib.Path(__file__).resolve().parent
15 |     / "data"
16 |     / "few_basic_cards"
17 |     / "collection_v1.anki2",
18 | ]
19 | 
20 | 
21 | def parameterized_paths():
22 |     return pytest.mark.parametrize("db_path", _test_db_paths)
23 | 


--------------------------------------------------------------------------------
/ankipandas/util/__init__.py:
--------------------------------------------------------------------------------
 1 | """ Various utilities of this package.
 2 | 
 3 | .. warning::
 4 | 
 5 |     These utilities are less aimed at end users and might therefore be subject
 6 |     to change.
 7 | 
 8 | """
 9 | 
10 | from __future__ import annotations
11 | 
12 | import ankipandas.util.checksum
13 | import ankipandas.util.dataframe
14 | import ankipandas.util.log
15 | import ankipandas.util.misc
16 | 


--------------------------------------------------------------------------------
/ankipandas/util/checksum.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | import re
 4 | from hashlib import sha1
 5 | from html.entities import name2codepoint
 6 | 
 7 | # Implementation directly copied from Anki (anki/anki/utils.py).
 8 | # Only a bit of PEP8ing and making things private.
 9 | 
10 | _reComment = re.compile("(?s)<!--.*?-->")
11 | _reStyle = re.compile("(?si)<style.*?>.*?</style>")
12 | _reScript = re.compile("(?si)<script.*?>.*?</script>")
13 | _reTag = re.compile("(?s)<.*?>")
14 | _reEnts = re.compile(r"&#?\w+;")
15 | _reMedia = re.compile("(?i)<img[^>]+src=[\"']?([^\"'>]+)[\"']?[^>]*>")
16 | 
17 | 
18 | def _strip_html(s):
19 |     s = _reComment.sub("", s)
20 |     s = _reStyle.sub("", s)
21 |     s = _reScript.sub("", s)
22 |     s = _reTag.sub("", s)
23 |     s = _ents_to_txt(s)
24 |     return s
25 | 
26 | 
27 | def _strip_html_media(s):
28 |     """Strip HTML but keep media filenames"""
29 |     s = _reMedia.sub(" \\1 ", s)
30 |     return _strip_html(s)
31 | 
32 | 
33 | def _ents_to_txt(html):
34 |     # entitydefs defines nbsp as \xa0 instead of a standard space, so we
35 |     # replace it first
36 |     html = html.replace("&nbsp;", " ")
37 | 
38 |     def fixup(m):
39 |         text = m.group(0)
40 |         if text[:2] == "&#":
41 |             # character reference
42 |             try:
43 |                 if text[:3] == "&#x":
44 |                     return chr(int(text[3:-1], 16))
45 |                 else:
46 |                     return chr(int(text[2:-1]))
47 |             except ValueError:
48 |                 pass
49 |         else:
50 |             # named entity
51 |             try:
52 |                 text = chr(name2codepoint[text[1:-1]])
53 |             except KeyError:
54 |                 pass
55 |         return text  # leave as is
56 | 
57 |     return _reEnts.sub(fixup, html)
58 | 
59 | 
60 | def _checksum(data):
61 |     if isinstance(data, str):
62 |         data = data.encode("utf-8")
63 |     return sha1(data).hexdigest()
64 | 
65 | 
66 | def field_checksum(data: str) -> int:
67 |     """32 bit unsigned number from first 8 digits of sha1 hash.
68 |     Apply this to the first field to the the field checksum that is used by
69 |     Anki to detect duplicates.
70 | 
71 |     Args:
72 |         data: string like
73 | 
74 |     Returns:
75 |         int
76 |     """
77 |     return int(_checksum(_strip_html_media(data).encode("utf-8"))[:8], 16)
78 | 


--------------------------------------------------------------------------------
/ankipandas/util/dataframe.py:
--------------------------------------------------------------------------------
  1 | """ DataFrame utilities. """
  2 | 
  3 | # std
  4 | from __future__ import annotations
  5 | 
  6 | # 3rd
  7 | import pandas as pd
  8 | 
  9 | # ours
 10 | from ankipandas.util.log import log
 11 | 
 12 | 
 13 | def _sync_metadata(df_ret: pd.DataFrame, df_old: pd.DataFrame) -> None:
 14 |     """
 15 |     If the df_old has a `_metadata` field, containing a list of attribute
 16 |     names that contain metadata, then this is copied from `df_old` to the new
 17 |     dataframe `df_ret.
 18 | 
 19 |     Args:
 20 |         df_ret:
 21 |         df_old:
 22 | 
 23 |     Returns:
 24 |         None
 25 |     """
 26 |     if hasattr(df_old, "_metadata"):
 27 |         for key in df_old._metadata:
 28 |             value = getattr(df_old, key)
 29 |             log.debug("Setting metadata attribute %s to %s", key, value)
 30 |             setattr(df_ret, key, value)
 31 | 
 32 | 
 33 | def replace_df_inplace(df: pd.DataFrame, df_new: pd.DataFrame) -> None:
 34 |     """Replace dataframe 'in place'.
 35 |     If the dataframe has a `_metadata` field, containing a list of attribute
 36 |     names that contain metadata, then this is copied from `df` to the new
 37 |     dataframe.
 38 | 
 39 |     Args:
 40 |         df: :class:`pandas.DataFrame` to be replaced
 41 |         df_new: :class:`pandas.DataFrame` to replace the previous one
 42 | 
 43 |     Returns:
 44 |         None
 45 |     """
 46 |     # Drop all ROWs (not columns)
 47 |     if df.index.any():
 48 |         df.drop(df.index, inplace=True)
 49 |     for col in df_new.columns:
 50 |         df[col] = df_new[col]
 51 |     drop_cols = set(df.columns) - set(df_new.columns)
 52 |     if drop_cols:
 53 |         df.drop(drop_cols, axis=1, inplace=True)
 54 |     _sync_metadata(df_new, df)
 55 | 
 56 | 
 57 | # todo: this might be made more elegant in the future for sure...
 58 | # fixme: This removes items whenever it can't merge!
 59 | def merge_dfs(
 60 |     df: pd.DataFrame,
 61 |     df_add: pd.DataFrame,
 62 |     id_df: str,
 63 |     inplace=False,
 64 |     id_add="id",
 65 |     prepend="",
 66 |     replace=False,
 67 |     prepend_clash_only=True,
 68 |     columns=None,
 69 |     drop_columns=None,
 70 | ) -> pd.DataFrame | None:
 71 |     """
 72 |     Merge information from two dataframes.
 73 |     If the dataframe has a `_metadata` field, containing a list of attribute
 74 |     names that contain metadata, then this is copied from `df` to the new
 75 |     dataframe.
 76 | 
 77 |     Args:
 78 |         df: Original :class:`pandas.DataFrame`
 79 |         df_add: :class:`pandas.DataFrame` to be merged with original
 80 |             :class:`pandas.DataFrame`
 81 |         id_df: Column of original dataframe that contains the id along which
 82 |             we merge.
 83 |         inplace: If False, return new dataframe, else update old one
 84 |         id_add: Column of the new dataframe that contains the id along which
 85 |             we merge
 86 |         prepend: Prepend a string to the column names from the new dataframe
 87 |         replace: Replace columns
 88 |         prepend_clash_only: Only prepend string to the column names from the
 89 |             new dataframe if there is a name clash.
 90 |         columns: Keep only these columns
 91 |         drop_columns: Drop these columns
 92 | 
 93 |     Returns:
 94 |         New merged :class:`pandas.DataFrame`
 95 |     """
 96 |     # Careful: Do not drop the id column until later (else we can't merge)
 97 |     # Still, we want to remove as much as possible here, because it's probably
 98 |     # better performing
 99 |     if columns:
100 |         df_add = df_add.drop(
101 |             set(df_add.columns) - (set(columns) | {id_add}), axis=1
102 |         )
103 |     if drop_columns:
104 |         df_add = df_add.drop(set(drop_columns) - {id_add}, axis=1)
105 |     # Careful: Rename columns after dropping unwanted ones
106 |     if prepend_clash_only:
107 |         col_clash = set(df.columns) & set(df_add.columns)
108 |         rename_dict = {col: prepend + col for col in col_clash}
109 |     else:
110 |         rename_dict = {col: prepend + col for col in df_add.columns}
111 |     df_add = df_add.rename(columns=rename_dict)
112 |     # Careful: Might have renamed id_add as well
113 |     if id_add in rename_dict:
114 |         id_add = rename_dict[id_add]
115 | 
116 |     if replace:
117 |         # Simply remove all potential clashes
118 |         replaced_columns = set(df_add.columns) & set(df.columns)
119 |         df = df.drop(replaced_columns, axis=1)
120 | 
121 |     merge_kwargs = {}
122 | 
123 |     if id_add in df_add.columns:
124 |         merge_kwargs["right_on"] = id_add
125 |     elif id_add == df_add.index.name:
126 |         merge_kwargs["right_index"] = True
127 |     else:
128 |         raise ValueError(f"'{id_add}' is neither index nor column.")
129 | 
130 |     if id_df in df.columns:
131 |         merge_kwargs["left_on"] = id_df
132 |     elif id_df == df.index.name:
133 |         merge_kwargs["left_index"] = True
134 |     else:
135 |         raise ValueError(f"'{id_df}' is neither index nor column.")
136 | 
137 |     df_merge = df.merge(df_add, **merge_kwargs)
138 | 
139 |     # Now remove id_add if it was to be removed
140 |     # Careful: 'in' doesn't work with None
141 |     if (columns and id_add not in columns) or (
142 |         drop_columns and id_add in drop_columns
143 |     ):
144 |         df_merge.drop(id_add, axis=1, inplace=True)
145 | 
146 |     # Make sure we don't have two ID columns
147 |     new_id_add_col = id_add
148 |     if id_add in rename_dict:
149 |         new_id_add_col = rename_dict[id_add]
150 |     if new_id_add_col in df_merge.columns and id_df != new_id_add_col:
151 |         df_merge.drop(new_id_add_col, axis=1, inplace=True)
152 | 
153 |     _sync_metadata(df_merge, df)
154 | 
155 |     if inplace:
156 |         replace_df_inplace(df, df_merge)
157 |         return None  # mypy
158 |     else:
159 |         return df_merge
160 | 


--------------------------------------------------------------------------------
/ankipandas/util/guid.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | import random
 4 | import string
 5 | 
 6 | # Directly copied from anki utils!
 7 | 
 8 | 
 9 | # used in ankiweb
10 | def _base62(num, extra=""):
11 |     s = string
12 |     table = s.ascii_letters + s.digits + extra
13 |     buf = ""
14 |     while num:
15 |         num, i = divmod(num, len(table))
16 |         buf = table[i] + buf
17 |     return buf
18 | 
19 | 
20 | _base91_extra_chars = "!#$%&()*+,-./:;<=>?@[]^_`{|}~"
21 | 
22 | 
23 | def _base91(num):
24 |     # all printable characters minus quotes, backslash and separators
25 |     return _base62(num, _base91_extra_chars)
26 | 
27 | 
28 | def _guid64():
29 |     """Return a base91-encoded 64bit random number."""
30 |     return _base91(random.randint(0, 2**64 - 1))
31 | 
32 | 
33 | def guid():
34 |     """Return globally unique ID"""
35 |     return _guid64()
36 | 


--------------------------------------------------------------------------------
/ankipandas/util/log.py:
--------------------------------------------------------------------------------
 1 | # std
 2 | from __future__ import annotations
 3 | 
 4 | import logging
 5 | 
 6 | import colorlog
 7 | 
 8 | LOG_DEFAULT_LEVEL = logging.INFO
 9 | 
10 | 
11 | def get_logger():
12 |     """Sets up global logger."""
13 |     _log = colorlog.getLogger("AnkiPandas")
14 | 
15 |     if _log.handlers:
16 |         # the logger already has handlers attached to it, even though
17 |         # we didn't add it ==> logging.get_logger got us an existing
18 |         # logger ==> we don't need to do anything
19 |         return _log
20 | 
21 |     _log.setLevel(LOG_DEFAULT_LEVEL)
22 | 
23 |     sh = colorlog.StreamHandler()
24 |     log_colors = {
25 |         "DEBUG": "cyan",
26 |         "INFO": "green",
27 |         "WARNING": "yellow",
28 |         "ERROR": "red",
29 |         "CRITICAL": "red",
30 |     }
31 |     formatter = colorlog.ColoredFormatter(
32 |         "%(log_color)s%(levelname)s: %(message)s", log_colors=log_colors
33 |     )
34 |     sh.setFormatter(formatter)
35 |     # Controlled by overall logger level
36 |     sh.setLevel(logging.DEBUG)
37 | 
38 |     _log.addHandler(sh)
39 | 
40 |     return _log
41 | 
42 | 
43 | def set_log_level(level: str | int) -> None:
44 |     """Set global log level.
45 | 
46 |     Args:
47 |         level: Either an int
48 |             (https://docs.python.org/3/library/logging.html#levels)
49 |             or one of the keywords, 'critical' (only the most terrifying of log
50 |             messages), 'error', 'warning', 'info',
51 |             'debug' (all log messages)
52 | 
53 |     Returns:
54 |         None
55 |     """
56 |     lvl = level
57 |     if isinstance(level, str):
58 |         lvl = getattr(logging, level.upper())
59 |     get_logger().setLevel(lvl)
60 | 
61 | 
62 | def set_debug_log_level() -> None:
63 |     """Set global log level to debug."""
64 |     set_log_level(logging.DEBUG)
65 | 
66 | 
67 | log = get_logger()
68 | 


--------------------------------------------------------------------------------
/ankipandas/util/misc.py:
--------------------------------------------------------------------------------
 1 | # std
 2 | from __future__ import annotations
 3 | 
 4 | import collections
 5 | from typing import Any
 6 | 
 7 | 
 8 | def invert_dict(dct: dict) -> dict:
 9 |     """Invert dictionary, i.e. reverse keys and values.
10 | 
11 |     Args:
12 |         dct: Dictionary
13 | 
14 |     Returns:
15 |         Dictionary with reversed keys and values.
16 | 
17 |     Raises:
18 |         :class:`ValueError` if values are not unique.
19 |     """
20 |     if not len(set(dct.values())) == len(dct.values()):
21 |         print(dct)
22 |         print(sorted(dct.values()))
23 |         raise ValueError("Dictionary does not seem to be invertible.")
24 |     return {value: key for key, value in dct.items()}
25 | 
26 | 
27 | def flatten_list_list(lst: list[list[Any]]) -> list[Any]:
28 |     """Takes a list of lists and returns a list of all elements.
29 | 
30 |     Args:
31 |         lst: List of Lists
32 | 
33 |     Returns:
34 |         list
35 |     """
36 |     return [item for sublist in lst for item in sublist]
37 | 
38 | 
39 | def nested_dict():
40 |     """This is very clever and stolen from
41 |     https://stackoverflow.com/questions/16724788/
42 |     Use it to initialize a dictionary-like object which automatically adds
43 |     levels.
44 |     E.g.
45 | 
46 |     .. code-block:: python
47 | 
48 |         a = nested_dict()
49 |         a['test']['this']['is']['working'] = "yaaay"
50 |     """
51 |     return collections.defaultdict(nested_dict)
52 | 
53 | 
54 | def defaultdict2dict(defdict: collections.defaultdict) -> dict:
55 |     return {
56 |         key: (
57 |             defaultdict2dict(value)
58 |             if isinstance(value, collections.defaultdict)
59 |             else value
60 |         )
61 |         for key, value in defdict.items()
62 |     }
63 | 


--------------------------------------------------------------------------------
/ankipandas/util/test/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/klieret/AnkiPandas/0b17a1870711d4adc3e9fc82bff8aac986b09f5e/ankipandas/util/test/__init__.py


--------------------------------------------------------------------------------
/ankipandas/util/test/test_dataframe.py:
--------------------------------------------------------------------------------
 1 | # std
 2 | from __future__ import annotations
 3 | 
 4 | import unittest
 5 | 
 6 | # 3rd
 7 | import pandas as pd
 8 | 
 9 | # ours
10 | from ankipandas.util.dataframe import replace_df_inplace
11 | 
12 | 
13 | class TestUtils(unittest.TestCase):
14 |     def test__replace_df_inplace(self):
15 |         df = pd.DataFrame({"a": [1, 2], "b": [3, 4]})
16 |         df_new = pd.DataFrame({"a": [1]})
17 |         replace_df_inplace(df, df_new)
18 |         self.assertEqual(len(df), 1)
19 |         self.assertEqual(len(df.columns), 1)
20 |         self.assertListEqual(list(df["a"].values), [1])
21 | 
22 | 
23 | if __name__ == "__main__":
24 |     unittest.main()
25 | 


--------------------------------------------------------------------------------
/ankipandas/util/test/test_log.py:
--------------------------------------------------------------------------------
 1 | # std
 2 | from __future__ import annotations
 3 | 
 4 | import unittest
 5 | 
 6 | # ours
 7 | from ankipandas.util.log import get_logger, log, set_log_level
 8 | 
 9 | 
10 | class TestLogging(unittest.TestCase):
11 |     """Only tests that things run without error."""
12 | 
13 |     def test_log(self):
14 |         log.info("Test info")
15 |         log.warning("Test warning")
16 | 
17 |     def test_get_logger(self):
18 |         get_logger().info("Test info")
19 |         get_logger().warning("Test warning")
20 | 
21 |     def test_set_log_level(self):
22 |         set_log_level("warning")
23 |         set_log_level("WARNING")
24 |         set_log_level(0)
25 | 
26 | 
27 | if __name__ == "__main__":
28 |     unittest.main()
29 | 


--------------------------------------------------------------------------------
/ankipandas/util/test/test_misc.py:
--------------------------------------------------------------------------------
 1 | # std
 2 | from __future__ import annotations
 3 | 
 4 | import unittest
 5 | 
 6 | # ours
 7 | from ankipandas.util.misc import invert_dict
 8 | 
 9 | 
10 | class TestInvertDict(unittest.TestCase):
11 |     def test_ok(self):
12 |         a = {1: 2, 3: 4, 5: 6}
13 |         self.assertDictEqual(invert_dict(a), {2: 1, 4: 3, 6: 5})
14 | 
15 |     def test_fails(self):
16 |         a = {1: 2, 3: 2}
17 |         with self.assertRaises(ValueError):
18 |             invert_dict(a)
19 | 
20 | 
21 | if __name__ == "__main__":
22 |     unittest.main()
23 | 


--------------------------------------------------------------------------------
/ankipandas/util/test/test_types.py:
--------------------------------------------------------------------------------
 1 | # std
 2 | from __future__ import annotations
 3 | 
 4 | import unittest
 5 | 
 6 | from ankipandas.util.log import set_debug_log_level
 7 | 
 8 | # ours
 9 | from ankipandas.util.types import (
10 |     is_dict_list_like,
11 |     is_list_dict_like,
12 |     is_list_like,
13 |     is_list_list_like,
14 | )
15 | 
16 | 
17 | class TestTypes(unittest.TestCase):
18 |     def setUp(self):
19 |         set_debug_log_level()
20 | 
21 |     def test_is_list_like(self):
22 |         self.assertTrue(is_list_like([1, 2]))
23 |         self.assertTrue(is_list_like((1, 2)))
24 |         self.assertFalse(is_list_like("asdf"))
25 | 
26 |     def test_is_list_list_like(self):
27 |         self.assertTrue(is_list_list_like([[1, 2]]))
28 |         self.assertTrue(is_list_list_like([(1, 2)]))
29 |         self.assertFalse(is_list_list_like([(1, 2), 3]))
30 | 
31 |     def test_is_list_dict_like(self):
32 |         self.assertTrue(is_list_dict_like([{1: 3}, {4: 5}]))
33 |         self.assertTrue(is_list_dict_like([]))
34 |         self.assertFalse(is_list_dict_like([(1, 2), (4, 5)]))
35 | 
36 |     def test_is_dict_list_like(self):
37 |         self.assertTrue(is_dict_list_like({1: [], 2: (3, 4)}))
38 |         self.assertTrue(is_dict_list_like({}))
39 |         self.assertFalse(is_dict_list_like([(1, 2), (4, 5)]))
40 | 
41 | 
42 | if __name__ == "__main__":
43 |     unittest.main()
44 | 


--------------------------------------------------------------------------------
/ankipandas/util/types.py:
--------------------------------------------------------------------------------
 1 | def is_list_like(obj):
 2 |     """True if object type is similar to list, tuple etc."""
 3 |     return isinstance(obj, (tuple, list))
 4 | 
 5 | 
 6 | def is_list_list_like(obj):
 7 |     """True if object is like-like object of list-like objects"""
 8 |     return is_list_like(obj) and all(map(is_list_like, obj))
 9 | 
10 | 
11 | def is_list_dict_like(obj):
12 |     """True if object is list-like object of dictionaries."""
13 |     return is_list_like(obj) and all(map(lambda x: isinstance(x, dict), obj))
14 | 
15 | 
16 | def is_dict_list_like(obj):
17 |     """True if object is dictionary with list-like objects as values."""
18 |     return isinstance(obj, dict) and all(map(is_list_like, obj.values()))
19 | 


--------------------------------------------------------------------------------
/ankipandas/version.txt:
--------------------------------------------------------------------------------
1 | 0.3.15
2 | 


--------------------------------------------------------------------------------
/codespell.txt:
--------------------------------------------------------------------------------
1 | hist
2 | inpt
3 | assertIn
4 | 


--------------------------------------------------------------------------------
/doc/Makefile:
--------------------------------------------------------------------------------
  1 | # Makefile for Sphinx documentation
  2 | #
  3 | 
  4 | # You can set these variables from the command line.
  5 | SPHINXOPTS    =
  6 | SPHINXBUILD   = sphinx-build
  7 | PAPER         =
  8 | BUILDDIR      = _build
  9 | 
 10 | # User-friendly check for sphinx-build
 11 | ifeq ($(shell which $(SPHINXBUILD) >/dev/null 2>&1; echo $$?), 1)
 12 | $(error The '$(SPHINXBUILD)' command was not found. Make sure you have Sphinx installed, then set the SPHINXBUILD environment variable to point to the full path of the '$(SPHINXBUILD)' executable. Alternatively you can add the directory with the executable to your PATH. If you don't have Sphinx installed, grab it from http://sphinx-doc.org/)
 13 | endif
 14 | 
 15 | # Internal variables.
 16 | PAPEROPT_a4     = -D latex_paper_size=a4
 17 | PAPEROPT_letter = -D latex_paper_size=letter
 18 | ALLSPHINXOPTS   = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) .
 19 | # the i18n builder cannot share the environment and doctrees with the others
 20 | I18NSPHINXOPTS  = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) .
 21 | 
 22 | .PHONY: help
 23 | help:
 24 | 	@echo "Please use \`make <target>' where <target> is one of"
 25 | 	@echo "  html       to make standalone HTML files"
 26 | 	@echo "  dirhtml    to make HTML files named index.html in directories"
 27 | 	@echo "  singlehtml to make a single large HTML file"
 28 | 	@echo "  pickle     to make pickle files"
 29 | 	@echo "  json       to make JSON files"
 30 | 	@echo "  htmlhelp   to make HTML files and a HTML help project"
 31 | 	@echo "  qthelp     to make HTML files and a qthelp project"
 32 | 	@echo "  applehelp  to make an Apple Help Book"
 33 | 	@echo "  devhelp    to make HTML files and a Devhelp project"
 34 | 	@echo "  epub       to make an epub"
 35 | 	@echo "  latex      to make LaTeX files, you can set PAPER=a4 or PAPER=letter"
 36 | 	@echo "  latexpdf   to make LaTeX files and run them through pdflatex"
 37 | 	@echo "  latexpdfja to make LaTeX files and run them through platex/dvipdfmx"
 38 | 	@echo "  text       to make text files"
 39 | 	@echo "  man        to make manual pages"
 40 | 	@echo "  texinfo    to make Texinfo files"
 41 | 	@echo "  info       to make Texinfo files and run them through makeinfo"
 42 | 	@echo "  gettext    to make PO message catalogs"
 43 | 	@echo "  changes    to make an overview of all changed/added/deprecated items"
 44 | 	@echo "  xml        to make Docutils-native XML files"
 45 | 	@echo "  pseudoxml  to make pseudoxml-XML files for display purposes"
 46 | 	@echo "  linkcheck  to check all external links for integrity"
 47 | 	@echo "  doctest    to run all doctests embedded in the documentation (if enabled)"
 48 | 	@echo "  coverage   to run coverage check of the documentation (if enabled)"
 49 | 
 50 | .PHONY: clean
 51 | clean:
 52 | 	rm -rf $(BUILDDIR)/*
 53 | 
 54 | .PHONY: html
 55 | html:
 56 | 	$(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html
 57 | 	@echo
 58 | 	@echo "Build finished. The HTML pages are in $(BUILDDIR)/html."
 59 | 
 60 | .PHONY: dirhtml
 61 | dirhtml:
 62 | 	$(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml
 63 | 	@echo
 64 | 	@echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml."
 65 | 
 66 | .PHONY: singlehtml
 67 | singlehtml:
 68 | 	$(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml
 69 | 	@echo
 70 | 	@echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml."
 71 | 
 72 | .PHONY: pickle
 73 | pickle:
 74 | 	$(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle
 75 | 	@echo
 76 | 	@echo "Build finished; now you can process the pickle files."
 77 | 
 78 | .PHONY: json
 79 | json:
 80 | 	$(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json
 81 | 	@echo
 82 | 	@echo "Build finished; now you can process the JSON files."
 83 | 
 84 | .PHONY: htmlhelp
 85 | htmlhelp:
 86 | 	$(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp
 87 | 	@echo
 88 | 	@echo "Build finished; now you can run HTML Help Workshop with the" \
 89 | 	      ".hhp project file in $(BUILDDIR)/htmlhelp."
 90 | 
 91 | .PHONY: qthelp
 92 | qthelp:
 93 | 	$(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp
 94 | 	@echo
 95 | 	@echo "Build finished; now you can run "qcollectiongenerator" with the" \
 96 | 	      ".qhcp project file in $(BUILDDIR)/qthelp, like this:"
 97 | 	@echo "# qcollectiongenerator $(BUILDDIR)/qthelp/BDecaysKinematicClustering.qhcp"
 98 | 	@echo "To view the help file:"
 99 | 	@echo "# assistant -collectionFile $(BUILDDIR)/qthelp/BDecaysKinematicClustering.qhc"
100 | 
101 | .PHONY: applehelp
102 | applehelp:
103 | 	$(SPHINXBUILD) -b applehelp $(ALLSPHINXOPTS) $(BUILDDIR)/applehelp
104 | 	@echo
105 | 	@echo "Build finished. The help book is in $(BUILDDIR)/applehelp."
106 | 	@echo "N.B. You won't be able to view it unless you put it in" \
107 | 	      "~/Library/Documentation/Help or install it in your application" \
108 | 	      "bundle."
109 | 
110 | .PHONY: devhelp
111 | devhelp:
112 | 	$(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp
113 | 	@echo
114 | 	@echo "Build finished."
115 | 	@echo "To view the help file:"
116 | 	@echo "# mkdir -p $$HOME/.local/share/devhelp/BDecaysKinematicClustering"
117 | 	@echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/BDecaysKinematicClustering"
118 | 	@echo "# devhelp"
119 | 
120 | .PHONY: epub
121 | epub:
122 | 	$(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub
123 | 	@echo
124 | 	@echo "Build finished. The epub file is in $(BUILDDIR)/epub."
125 | 
126 | .PHONY: latex
127 | latex:
128 | 	$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
129 | 	@echo
130 | 	@echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex."
131 | 	@echo "Run \`make' in that directory to run these through (pdf)latex" \
132 | 	      "(use \`make latexpdf' here to do that automatically)."
133 | 
134 | .PHONY: latexpdf
135 | latexpdf:
136 | 	$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
137 | 	@echo "Running LaTeX files through pdflatex..."
138 | 	$(MAKE) -C $(BUILDDIR)/latex all-pdf
139 | 	@echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex."
140 | 
141 | .PHONY: latexpdfja
142 | latexpdfja:
143 | 	$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
144 | 	@echo "Running LaTeX files through platex and dvipdfmx..."
145 | 	$(MAKE) -C $(BUILDDIR)/latex all-pdf-ja
146 | 	@echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex."
147 | 
148 | .PHONY: text
149 | text:
150 | 	$(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text
151 | 	@echo
152 | 	@echo "Build finished. The text files are in $(BUILDDIR)/text."
153 | 
154 | .PHONY: man
155 | man:
156 | 	$(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man
157 | 	@echo
158 | 	@echo "Build finished. The manual pages are in $(BUILDDIR)/man."
159 | 
160 | .PHONY: texinfo
161 | texinfo:
162 | 	$(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
163 | 	@echo
164 | 	@echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo."
165 | 	@echo "Run \`make' in that directory to run these through makeinfo" \
166 | 	      "(use \`make info' here to do that automatically)."
167 | 
168 | .PHONY: info
169 | info:
170 | 	$(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
171 | 	@echo "Running Texinfo files through makeinfo..."
172 | 	make -C $(BUILDDIR)/texinfo info
173 | 	@echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo."
174 | 
175 | .PHONY: gettext
176 | gettext:
177 | 	$(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale
178 | 	@echo
179 | 	@echo "Build finished. The message catalogs are in $(BUILDDIR)/locale."
180 | 
181 | .PHONY: changes
182 | changes:
183 | 	$(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes
184 | 	@echo
185 | 	@echo "The overview file is in $(BUILDDIR)/changes."
186 | 
187 | .PHONY: linkcheck
188 | linkcheck:
189 | 	$(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck
190 | 	@echo
191 | 	@echo "Link check complete; look for any errors in the above output " \
192 | 	      "or in $(BUILDDIR)/linkcheck/output.txt."
193 | 
194 | .PHONY: doctest
195 | doctest:
196 | 	$(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest
197 | 	@echo "Testing of doctests in the sources finished, look at the " \
198 | 	      "results in $(BUILDDIR)/doctest/output.txt."
199 | 
200 | .PHONY: coverage
201 | coverage:
202 | 	$(SPHINXBUILD) -b coverage $(ALLSPHINXOPTS) $(BUILDDIR)/coverage
203 | 	@echo "Testing of coverage in the sources finished, look at the " \
204 | 	      "results in $(BUILDDIR)/coverage/python.txt."
205 | 
206 | .PHONY: xml
207 | xml:
208 | 	$(SPHINXBUILD) -b xml $(ALLSPHINXOPTS) $(BUILDDIR)/xml
209 | 	@echo
210 | 	@echo "Build finished. The XML files are in $(BUILDDIR)/xml."
211 | 
212 | .PHONY: pseudoxml
213 | pseudoxml:
214 | 	$(SPHINXBUILD) -b pseudoxml $(ALLSPHINXOPTS) $(BUILDDIR)/pseudoxml
215 | 	@echo
216 | 	@echo "Build finished. The pseudo-XML files are in $(BUILDDIR)/pseudoxml."
217 | 


--------------------------------------------------------------------------------
/doc/ankidf.rst:
--------------------------------------------------------------------------------
 1 | AnkiDataFrame
 2 | ^^^^^^^^^^^^^
 3 | 
 4 | The class :class:`~ankipandas.ankidf.AnkiDataFrame` is the central data
 5 | structure in which we provide the notes, cards and review tables.
 6 | Access it via an instance of :class:`~ankipandas.collection.Collection`.
 7 | 
 8 | Example:
 9 | 
10 | .. code-block:: python
11 | 
12 |     from ankipandas import Collection
13 |     col = Collection()
14 | 
15 |     col.notes  # Notes as AnkiDataFrame
16 |     col.cards  # Cards as AnkiDataFrame
17 |     col.revs   # Reviews as AnkiDataFrame
18 | 
19 | .. autoclass:: ankipandas.ankidf.AnkiDataFrame
20 |     :members:
21 |     :undoc-members:
22 |     :exclude-members: equals, update, append
23 | 


--------------------------------------------------------------------------------
/doc/collection.rst:
--------------------------------------------------------------------------------
 1 | Collection
 2 | ^^^^^^^^^^
 3 | 
 4 | This is the starting point for most end-users.
 5 | The :class:`~ankipandas.collection.Collection` class loads the Anki collection
 6 | and provides access to its notes, cards and reviews as pandas
 7 | :class:`~pandas.DataFrame` objects.
 8 | 
 9 | .. autoclass:: ankipandas.collection.Collection
10 |     :members:
11 |     :undoc-members:
12 | 


--------------------------------------------------------------------------------
/doc/conf.py:
--------------------------------------------------------------------------------
  1 | from __future__ import annotations
  2 | 
  3 | import os
  4 | import pathlib
  5 | import sys
  6 | from pathlib import Path
  7 | 
  8 | # If extensions (or modules to document with autodoc) are in another directory,
  9 | # add these directories to sys.path here. If the directory is relative to the
 10 | # documentation root, use os.path.abspath to make it absolute, like shown here.
 11 | sys.path.insert(0, os.path.abspath("../"))
 12 | 
 13 | readme_path = Path(__file__).parent.resolve().parent / "README.md"
 14 | readme_target = Path(__file__).parent / "readme.md"
 15 | 
 16 | with readme_target.open("w") as outf:
 17 |     outf.write(
 18 |         "\n".join(
 19 |             [
 20 |                 "Readme",
 21 |                 "======",
 22 |                 "",
 23 |             ]
 24 |         )
 25 |     )
 26 |     lines = []
 27 |     for line in readme_path.read_text().split("\n"):
 28 |         lines.append(line)
 29 |     outf.write("\n".join(lines))
 30 | 
 31 | 
 32 | # -- General configuration ------------------------------------------------
 33 | 
 34 | # If your documentation needs a minimal Sphinx version, state it here.
 35 | # needs_sphinx = '1.0'
 36 | 
 37 | autodoc_mock_imports = []
 38 | 
 39 | autodoc_default_options = {
 40 |     "special-members": "__init__",
 41 |     "undoc-members": True,
 42 |     "show-inheritance": True,
 43 | }
 44 | 
 45 | autodoc_inherit_docstrings = False
 46 | 
 47 | autoclass_content = "class"
 48 | 
 49 | # https://stackoverflow.com/questions/12772927/
 50 | # Will only work for sphinx > 1.4 onward
 51 | suppress_warnings = ["image.nonlocal_uri"]
 52 | 
 53 | # Add any Sphinx extension module names here, as strings. They can be
 54 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
 55 | # ones.
 56 | extensions = [
 57 |     "sphinx.ext.autodoc",
 58 |     "sphinx.ext.todo",
 59 |     "sphinx.ext.coverage",
 60 |     "sphinx.ext.mathjax",
 61 |     "sphinx.ext.ifconfig",
 62 |     "sphinx.ext.viewcode",
 63 |     "sphinx.ext.napoleon",
 64 |     "sphinx.ext.intersphinx",
 65 |     "recommonmark",
 66 | ]
 67 | 
 68 | intersphinx_mapping = {
 69 |     "python": ("https://docs.python.org/3", None),
 70 |     "pandas": ("http://pandas-docs.github.io/pandas-docs-travis/", None),
 71 | }
 72 | 
 73 | # Add any paths that contain templates here, relative to this directory.
 74 | templates_path = ["_templates"]
 75 | 
 76 | # The suffix(es) of source filenames.
 77 | # You can specify multiple suffix as a list of string:
 78 | source_suffix = [".rst", ".md"]
 79 | # source_suffix = ".rst"
 80 | 
 81 | # The encoding of source files.
 82 | # source_encoding = 'utf-8-sig'
 83 | 
 84 | # The master toctree document.
 85 | master_doc = "index"
 86 | 
 87 | # General information about the project.
 88 | project = "AnkiPandas"
 89 | copyright = "2019, Kilian Lieret"
 90 | author = "Kilian Lieret"
 91 | 
 92 | # The version info for the project you're documenting, acts as replacement for
 93 | # |version| and |release|, also used in various other places throughout the
 94 | # built documents.
 95 | #
 96 | 
 97 | this_dir = pathlib.Path(__file__).resolve().parent
 98 | with (this_dir / ".." / "ankipandas" / "version.txt").open() as vf:
 99 |     version = vf.read().strip()
100 | print(f"Version as read from version.txt: '{version}'")
101 | 
102 | # The short X.Y version.
103 | # version = 'dev'
104 | # The full version, including alpha/beta/rc tags.
105 | release = version
106 | 
107 | # The language for content autogenerated by Sphinx. Refer to documentation
108 | # for a list of supported languages.
109 | #
110 | # This is also used if you do content translation via gettext catalogs.
111 | # Usually you set "language" from the command line for these cases.
112 | language = None
113 | 
114 | # There are two options for replacing |today|: either, you set today to some
115 | # non-false value, then it is used:
116 | # today = ''
117 | # Else, today_fmt is used as the format for a strftime call.
118 | # today_fmt = '%B %d, %Y'
119 | 
120 | # List of patterns, relative to source directory, that match files and
121 | # directories to ignore when looking for source files.
122 | exclude_patterns = ["_build"]
123 | 
124 | # The reST default role (used for this markup: `text`) to use for all
125 | # documents.
126 | # default_role = None
127 | 
128 | # If true, '()' will be appended to :func: etc. cross-reference text.
129 | # add_function_parentheses = True
130 | 
131 | # If true, the current module name will be prepended to all description
132 | # unit titles (such as .. function::).
133 | # add_module_names = True
134 | 
135 | # If true, sectionauthor and moduleauthor directives will be shown in the
136 | # output. They are ignored by default.
137 | # show_authors = False
138 | 
139 | # The name of the Pygments (syntax highlighting) style to use.
140 | pygments_style = "sphinx"
141 | 
142 | # A list of ignored prefixes for module index sorting.
143 | # modindex_common_prefix = []
144 | 
145 | # If true, keep warnings as "system message" paragraphs in the built documents.
146 | # keep_warnings = False
147 | 
148 | # If true, `todo` and `todoList` produce output, else they produce nothing.
149 | todo_include_todos = True
150 | 
151 | # -- Options for HTML output ----------------------------------------------
152 | 
153 | # The theme to use for HTML and HTML Help pages.  See the documentation for
154 | # a list of builtin themes.
155 | # html_theme = 'alabaster'
156 | 
157 | # Add any paths that contain custom themes here, relative to this directory.
158 | # html_theme_path = ["_themes"]
159 | 
160 | try:
161 |     import importlib
162 | 
163 |     theme = importlib.import_module("sphinx_book_theme")
164 |     html_theme = "sphinx_book_theme"
165 |     html_theme_path = [theme.get_html_theme_path()]
166 | except ImportError:
167 |     print(
168 |         "**** WARNING ****: reverting to default theme, because "
169 |         "sphinx_book_theme is not installed"
170 |     )
171 |     html_theme = "default"
172 | print(f"html_theme='{html_theme}'")
173 | 
174 | # Theme options are theme-specific and customize the look and feel of a theme
175 | # further.  For a list of options available for each theme, see the
176 | # documentation.
177 | # html_theme_options = {}
178 | 
179 | if html_theme == "sphinx_rtd_theme":
180 |     html_theme_options = {"logo_only": True}
181 | else:
182 |     html_theme_options = {}
183 | print(f"html_theme_options={html_theme_options}")
184 | 
185 | if html_theme == "sphinx_rtd_theme":
186 |     html_logo = "../misc/logo/logo_rtd.svg"
187 | else:
188 |     html_logo = "../misc/logo/logo.svg"
189 | 
190 | # The name for this set of Sphinx documents.  If None, it defaults to
191 | # "<project> v<release> documentation".
192 | # html_title = None
193 | 
194 | # A shorter title for the navigation bar.  Default is the same as html_title.
195 | # html_short_title = None
196 | 
197 | # The name of an image file (relative to this directory) to place at the top
198 | # of the sidebar.
199 | 
200 | # The name of an image file (relative to this directory) to use as a favicon of
201 | # the docs.  This file should be a Windows icon file (.ico) being 16x16 or 32x32
202 | # pixels large.
203 | # html_favicon = None
204 | 
205 | # Add any paths that contain custom static files (such as style sheets) here,
206 | # relative to this directory. They are copied after the builtin static files,
207 | # so a file named "default.css" will overwrite the builtin "default.css".
208 | # html_static_path = ['_static']
209 | 
210 | 
211 | # Output file base name for HTML help builder.
212 | htmlhelp_basename = "AnkiPandas"
213 | 
214 | # -- Options for LaTeX output ---------------------------------------------
215 | 
216 | latex_elements = {
217 |     # The paper size ('letterpaper' or 'a4paper').
218 |     # 'papersize': 'letterpaper',
219 |     # The font size ('10pt', '11pt' or '12pt').
220 |     # 'pointsize': '10pt',
221 |     # Additional stuff for the LaTeX preamble.
222 |     # 'preamble': '',
223 |     # Latex figure (float) alignment
224 |     # 'figure_align': 'htbp',
225 | }
226 | 
227 | # Grouping the document tree into LaTeX files. List of tuples
228 | # (source start file, target name, title,
229 | #  author, documentclass [howto, manual, or own class]).
230 | latex_documents = [
231 |     (master_doc, "ankipandas.tex", "AnkiPandas", "Kilian Lieret", "manual")
232 | ]
233 | 
234 | # -- Options for manual page output ---------------------------------------
235 | 
236 | # One entry per manual page. List of tuples
237 | # (source start file, name, description, authors, manual section).
238 | man_pages = [(master_doc, "ankipandas", "AnkiPandas", [author], 1)]
239 | 
240 | # If true, show URL addresses after external links.
241 | # man_show_urls = False
242 | 
243 | 
244 | # -- Options for Texinfo output -------------------------------------------
245 | 
246 | # Grouping the document tree into Texinfo files. List of tuples
247 | # (source start file, target name, title, author,
248 | #  dir menu entry, description, category)
249 | texinfo_documents = [
250 |     (
251 |         master_doc,
252 |         "ankipandas",
253 |         "AnkiPandas",
254 |         author,
255 |         "AnkiPandas",
256 |         "Load your anki database as a pandas DataFrame with just one "
257 |         "line of code!",
258 |         "Miscellaneous",
259 |     )
260 | ]
261 | 
262 | # Documents to append as an appendix to all manuals.
263 | # texinfo_appendices = []
264 | 
265 | # If false, no module index is generated.
266 | # texinfo_domain_indices = True
267 | 
268 | # How to display URL addresses: 'footnote', 'no', or 'inline'.
269 | # texinfo_show_urls = 'footnote'
270 | 
271 | # If true, do not generate a @detailmenu in the "Top" node's menu.
272 | # texinfo_no_detailmenu = False
273 | 
274 | 
275 | # -- Options for Epub output ----------------------------------------------
276 | 
277 | # Bibliographic Dublin Core info.
278 | epub_title = project
279 | epub_author = author
280 | epub_publisher = author
281 | epub_copyright = copyright
282 | 
283 | # The basename for the epub file. It defaults to the project name.
284 | # epub_basename = project
285 | 
286 | # The HTML theme for the epub output. Since the default themes are not
287 | # optimized for small screen space, using the same theme for HTML and epub
288 | # output is usually not wise. This defaults to 'epub', a theme designed to save
289 | # visual space.
290 | # epub_theme = 'epub'
291 | 
292 | # The language of the text. It defaults to the language option
293 | # or 'en' if the language is not set.
294 | # epub_language = ''
295 | 
296 | # The scheme of the identifier. Typical schemes are ISBN or URL.
297 | # epub_scheme = ''
298 | 
299 | # The unique identifier of the text. This can be a ISBN number
300 | # or the project homepage.
301 | # epub_identifier = ''
302 | 
303 | # A unique identification for the text.
304 | # epub_uid = ''
305 | 
306 | # A tuple containing the cover image and cover page html template filenames.
307 | # epub_cover = ()
308 | 
309 | # A sequence of (type, uri, title) tuples for the guide element of content.opf.
310 | # epub_guide = ()
311 | 
312 | # HTML files that should be inserted before the pages created by sphinx.
313 | # The format is a list of tuples containing the path and title.
314 | # epub_pre_files = []
315 | 
316 | # HTML files that should be inserted after the pages created by sphinx.
317 | # The format is a list of tuples containing the path and title.
318 | # epub_post_files = []
319 | 
320 | # A list of files that should not be packed into the epub file.
321 | epub_exclude_files = ["search.html"]
322 | 
323 | # The depth of the table of contents in toc.ncx.
324 | # epub_tocdepth = 3
325 | 
326 | # Allow duplicate toc entries.
327 | # epub_tocdup = True
328 | 
329 | # Choose between 'default' and 'includehidden'.
330 | # epub_tocscope = 'default'
331 | 
332 | # Fix unsupported image types using the Pillow.
333 | # epub_fix_images = False
334 | 
335 | # Scale large images.
336 | # epub_max_image_width = 0
337 | 
338 | # How to display URL addresses: 'footnote', 'no', or 'inline'.
339 | # epub_show_urls = 'inline'
340 | 
341 | # If false, no index is generated.
342 | # epub_use_index = True
343 | 
344 | # Do not order alphabetically but by source
345 | autodoc_member_order = "bysource"
346 | 


--------------------------------------------------------------------------------
/doc/examples.rst:
--------------------------------------------------------------------------------
 1 | Analysis
 2 | --------
 3 | 
 4 | .. note::
 5 | 
 6 |     All examples assume the line
 7 | 
 8 |     .. code-block:: python
 9 | 
10 |         col = Collection()
11 | 
12 |     Or ``col = Collection("/path/to/col.anki2")``, etc.
13 | 
14 | In which deck are the most leeches?
15 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
16 | 
17 | .. literalinclude:: examples/examples/leeches_per_deck.py
18 |     :linenos:
19 | 
20 | .. image:: examples/plots/leeches_per_deck.png
21 | 
22 | Which deck has the longest average retention rates?
23 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
24 | 
25 | .. literalinclude:: examples/examples/retention_rate_per_deck.py
26 |     :linenos:
27 | 
28 | .. image:: examples/plots/retention_rate_per_deck.png
29 | 
30 | Repetitions vs type
31 | ^^^^^^^^^^^^^^^^^^^
32 | 
33 | Minimal:
34 | 
35 | .. code-block:: python
36 | 
37 |     col.cards.hist("crepts", by="ctype")
38 | 
39 | Prettier:
40 | 
41 | .. literalinclude:: examples/examples/repetitions_per_type.py
42 |     :linenos:
43 | 
44 | .. image:: examples/plots/repetitions_per_type.png
45 | 
46 | Repetitions vs deck
47 | ^^^^^^^^^^^^^^^^^^^
48 | 
49 | One liner:
50 | 
51 | .. code-block:: python
52 | 
53 |     col.cards.hist(column="creps", by="cdeck")
54 | 
55 | Prettier:
56 | 
57 | .. literalinclude:: examples/examples/repetitions_per_deck.py
58 |     :linenos:
59 | 
60 | .. image:: examples/plots/repetitions_per_deck.png
61 | 
62 | Retention distribution vs deck
63 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
64 | 
65 | .. literalinclude:: examples/examples/retention_distribution_vs_deck.py
66 |     :linenos:
67 | 
68 | .. image:: examples/plots/retention_distribution_vs_deck.png
69 | 
70 | Reviews vs retention length vs deck
71 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
72 | 
73 | .. literalinclude:: examples/examples/reviews_vs_ease.py
74 |     :linenos:
75 | 
76 | .. image:: examples/plots/reviews_vs_ease.png
77 | 


--------------------------------------------------------------------------------
/doc/examples/col.anki2:
--------------------------------------------------------------------------------
1 | /home/fuchur/.local/share/Anki2/fuchurMain/collection.anki2


--------------------------------------------------------------------------------
/doc/examples/examples/leeches_per_deck.py:
--------------------------------------------------------------------------------
1 | cards = col.cards.merge_notes()
2 | counts = cards[cards.has_tag("leech")]["cdeck"].value_counts()
3 | counts.plot.pie(title="Leeches per deck")
4 | 


--------------------------------------------------------------------------------
/doc/examples/examples/repetitions_per_deck.py:
--------------------------------------------------------------------------------
 1 | interesting_decks = list(col.cards.cdeck.unique())
 2 | interesting_decks.remove("archived::physics")
 3 | selected = col.cards[col.cards.cdeck.isin(interesting_decks)]
 4 | axss = selected.hist(
 5 |     column="creps",
 6 |     by="cdeck",
 7 |     sharex=True,
 8 |     layout=(5, 4),
 9 |     figsize=(15, 15),
10 |     density=True,
11 | )
12 | for axs in axss:
13 |     for ax in axs:
14 |         ax.set_xlabel("#Reviews")
15 |         ax.set_ylabel("Count")
16 | 


--------------------------------------------------------------------------------
/doc/examples/examples/repetitions_per_type.py:
--------------------------------------------------------------------------------
1 | axs = col.cards.hist(column="creps", by="ctype", layout=(1, 2), figsize=(12, 3))
2 | for ax in axs:
3 |     ax.set_xlabel("#Reviews")
4 |     ax.set_ylabel("Count")
5 | 


--------------------------------------------------------------------------------
/doc/examples/examples/retention_distribution_vs_deck.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | import matplotlib.pyplot as plt
 4 | import numpy as np
 5 | 
 6 | ax = plt.gca()
 7 | for deck in col.cards.cdeck.unique():
 8 |     selected = col.cards[col.cards.cdeck == deck]["civl"]
 9 |     if len(selected) < 1000:
10 |         continue
11 |     selected.plot.hist(
12 |         ax=ax,
13 |         label=deck,
14 |         histtype="step",
15 |         linewidth=2,
16 |         xlim=(0, 365),
17 |         bins=np.linspace(0, 365, 10),
18 |     )
19 | ax.set_xlabel("Predicted retention length (review interval)")
20 | ax.set_ylabel("Number of cards")
21 | ax.set_title("Expected retention length per deck [days]")
22 | ax.legend(frameon=False)
23 | 


--------------------------------------------------------------------------------
/doc/examples/examples/retention_rate_per_deck.py:
--------------------------------------------------------------------------------
1 | grouped = col.cards.groupby("cdeck")
2 | data = grouped.mean()["civl"].sort_values().tail()
3 | ax = data.plot.barh()
4 | ax.set_ylabel("Deck name")
5 | ax.set_xlabel("Average expected retention length/review interval [days]")
6 | ax.set_title("Average retention length per deck")
7 | 


--------------------------------------------------------------------------------
/doc/examples/examples/reviews_vs_ease.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | import pandas as pd
 4 | 
 5 | xs = []
 6 | ys = []
 7 | decks = []
 8 | for deck in col.cards.cdeck.unique():
 9 |     selected = col.cards[col.cards["cdeck"] == deck]
10 |     if len(selected) < 500:
11 |         continue
12 |     decks.append(deck)
13 |     binned = pd.qcut(selected["creps"], 15, duplicates="drop")
14 |     results = selected.groupby(binned)["civl"].mean()
15 |     y = results.tolist()
16 |     x = results.index.map(lambda x: x.mid).tolist()
17 |     xs.append(x)
18 |     ys.append(y)
19 | 
20 | ax = plt.gca()
21 | for i in range(len(xs)):
22 |     ax.plot(xs[i], ys[i], "o-", label=decks[i])
23 | ax.set_xlabel("#Reviews")
24 | ax.set_ylabel("Expected retention length/review interval [days]")
25 | ax.set_title("Number of reviews vs retention length")
26 | ax.legend(frameon=False)
27 | 


--------------------------------------------------------------------------------
/doc/examples/loader.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | 
 3 | # std
 4 | from __future__ import annotations
 5 | 
 6 | import logging
 7 | import os
 8 | import sys
 9 | from pathlib import Path
10 | 
11 | # 3rd
12 | import matplotlib.pyplot as plt
13 | 
14 | # ours
15 | sys.path.insert(0, "../..")
16 | import ankipandas  # noqa E402
17 | from ankipandas.util.log import get_logger  # noqa E402
18 | 
19 | 
20 | class Loader:
21 |     def __init__(self):
22 |         this_dir = Path(__file__).parent
23 |         self.col_path = this_dir / "col.anki2"
24 |         self.examples_dir = this_dir / "examples"
25 |         self.output_dir = this_dir / "plots"
26 |         self.log = get_logger()
27 |         self.log.setLevel(logging.DEBUG)
28 | 
29 |     def get_examples(self):
30 |         examples = []
31 |         for root, _, files in os.walk(str(self.examples_dir)):
32 |             for file in files:
33 |                 examples.append(Path(root) / file)
34 |         return examples
35 | 
36 |     def run_example(self, path: Path, save=True):
37 |         self.log.info("Running example %s", path)
38 |         col = ankipandas.Collection(self.col_path)  # noqa F841
39 |         with path.open("r") as example_file:
40 |             exec(example_file.read())
41 |         if save:
42 |             out = self.output_dir.resolve() / (path.resolve().stem + ".png")
43 |             self.log.info("Plotting to %s", out)
44 |             plt.savefig(out, bbox_inches="tight", transparent=True, dpi=75)
45 |             plt.cla()
46 |             plt.clf()
47 |             plt.close()
48 | 
49 |     def run_all(self, **kwargs):
50 |         for example in self.get_examples():
51 |             self.run_example(example, **kwargs)
52 | 
53 | 
54 | if __name__ == "__main__":
55 |     loader = Loader()
56 |     loader.run_all()
57 | 


--------------------------------------------------------------------------------
/doc/examples/plots/leeches_per_deck.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/klieret/AnkiPandas/0b17a1870711d4adc3e9fc82bff8aac986b09f5e/doc/examples/plots/leeches_per_deck.png


--------------------------------------------------------------------------------
/doc/examples/plots/repetitions_per_deck.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/klieret/AnkiPandas/0b17a1870711d4adc3e9fc82bff8aac986b09f5e/doc/examples/plots/repetitions_per_deck.png


--------------------------------------------------------------------------------
/doc/examples/plots/repetitions_per_type.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/klieret/AnkiPandas/0b17a1870711d4adc3e9fc82bff8aac986b09f5e/doc/examples/plots/repetitions_per_type.png


--------------------------------------------------------------------------------
/doc/examples/plots/retention_distribution_vs_deck.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/klieret/AnkiPandas/0b17a1870711d4adc3e9fc82bff8aac986b09f5e/doc/examples/plots/retention_distribution_vs_deck.png


--------------------------------------------------------------------------------
/doc/examples/plots/retention_rate_per_deck.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/klieret/AnkiPandas/0b17a1870711d4adc3e9fc82bff8aac986b09f5e/doc/examples/plots/retention_rate_per_deck.png


--------------------------------------------------------------------------------
/doc/examples/plots/reviews_vs_ease.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/klieret/AnkiPandas/0b17a1870711d4adc3e9fc82bff8aac986b09f5e/doc/examples/plots/reviews_vs_ease.png


--------------------------------------------------------------------------------
/doc/faq.md:
--------------------------------------------------------------------------------
 1 | Questions & Answers
 2 | ===================
 3 | 
 4 | > What do the different columns mean?
 5 | 
 6 | Please use the `help`, `help_cols`, or `help_col` methods of the `AnkiDataFrame`
 7 | object to display information about the columns.
 8 | 
 9 | > How to get the creation time of a card/note?
10 | 
11 | The IDs of the cards/notes correspond to the creation time.
12 | See [issue #112](https://github.com/klieret/AnkiPandas/issues/112) for a small
13 | code snippet to convert it to a ``datetime`` object.
14 | 
15 | > Can I access deck settings (e.g., the card intake per day) from `ankipandas`?
16 | 
17 | This is currently not supported by `ankipandas`. However, you can find related
18 | discussion in [issue #113](https://github.com/klieret/AnkiPandas/issues/113).
19 | 


--------------------------------------------------------------------------------
/doc/index.rst:
--------------------------------------------------------------------------------
 1 | AnkiPandas
 2 | ==========
 3 | 
 4 | Load your anki database as a pandas DataFrame with just one
 5 | line of code!
 6 | 
 7 | .. toctree::
 8 |     :maxdepth: 2
 9 |     :caption: Basics
10 | 
11 |     readme
12 |     troubleshooting
13 |     faq
14 | 
15 | .. toctree::
16 |     :maxdepth: 2
17 |     :caption: Examples
18 | 
19 |     examples
20 |     projects_with_ap
21 | 
22 | .. toctree::
23 |     :maxdepth: 2
24 |     :caption: Module documentation
25 | 
26 |     collection
27 |     ankidf
28 |     paths
29 |     raw
30 |     util
31 | 


--------------------------------------------------------------------------------
/doc/paths.rst:
--------------------------------------------------------------------------------
1 | Paths and Searching
2 | ^^^^^^^^^^^^^^^^^^^
3 | 
4 | 
5 | .. automodule:: ankipandas.paths
6 |     :members:
7 |     :undoc-members:
8 | 


--------------------------------------------------------------------------------
/doc/projects_with_ap.rst:
--------------------------------------------------------------------------------
 1 | Projects that use AnkiPandas
 2 | ----------------------------
 3 | 
 4 | .. note::
 5 | 
 6 |     To add your project, please open an `issue <https://github.com/klieret/AnkiPandas/issues>`_.
 7 | 
 8 | * `cryptocoinserver/AnkiCardSimilarity.py <https://gist.github.com/cryptocoinserver/399eff4505708bca8f7074ab6eebe8cb>`_  A script that checks for similar cards in a deck and adds tags to them. It uses TF-IDF and cosine similarity to calculate the similarity between cards.
 9 | * `thiswillbeyourgithub/DocToolsLLM <https://github.com/thiswillbeyourgithub/DocToolsLLM/>`_: AI powered Q&A on any kind of document or combination of documents, including entire anki collections, pdfs, videos, youtube playlists etc.
10 | * `thiswillbeyourgithub/Anki-Semantic-Search <https://github.com/thiswillbeyourgithub/Anki-Semantic-Search>`_: Use AI (fastText's multilingual word vectors) to search through your anki notes!
11 | 


--------------------------------------------------------------------------------
/doc/raw.rst:
--------------------------------------------------------------------------------
1 | Internals
2 | ^^^^^^^^^
3 | 
4 | 
5 | .. automodule:: ankipandas.raw
6 |     :members:
7 |     :undoc-members:
8 | 


--------------------------------------------------------------------------------
/doc/requirements.txt:
--------------------------------------------------------------------------------
1 | recommonmark
2 | sphinx_book_theme
3 | git+https://github.com/klieret/AnkiPandas.git
4 | 


--------------------------------------------------------------------------------
/doc/troubleshooting.rst:
--------------------------------------------------------------------------------
 1 | Troubleshooting
 2 | ---------------
 3 | 
 4 | Getting help
 5 | ^^^^^^^^^^^^
 6 | 
 7 | Submit an `issue on github`_. Thank you for
 8 | improving this toolkit with me!
 9 | 
10 | .. _issue on github: https://github.com/klieret/ankipandas/issues
11 | 
12 | Common problems
13 | ^^^^^^^^^^^^^^^
14 | 
15 | * **Locked database**: While Anki is running, your database will be locked and
16 |   you might not be able to access it. Simply close Anki and try again. Similarly
17 |   Anki might refuse to open the database if ``ankipandas`` has currently opened
18 |   it (be it in a Jupyter notebook or in a currently running project).
19 | 
20 | .. note::
21 | 
22 |     Any unlisted problem that you ran into (and solved)? Help others by bringing
23 |     it to my attention_. Please check if there is already an issue created for
24 |     it by going through this list_.
25 | 
26 | .. _attention: https://github.com/klieret/ankipandas/issues
27 | .. _list: https://github.com/klieret/AnkiPandas/issues?q=is%3Aissue+is%3Aopen+sort%3Aupdated-desc+label%3Abug
28 | 
29 | 
30 | Debugging
31 | ^^^^^^^^^
32 | 
33 | For better debugging, you can increase the log level of ``ankipandas``:
34 | 
35 | .. code-block:: python
36 | 
37 |     ankipandas.set_log_level("debug")
38 | 


--------------------------------------------------------------------------------
/doc/util.rst:
--------------------------------------------------------------------------------
 1 | Utilities
 2 | ^^^^^^^^^
 3 | 
 4 | .. automodule:: ankipandas.util
 5 | 
 6 | .. automodule:: ankipandas.util.log
 7 |     :members:
 8 |     :undoc-members:
 9 | 
10 | .. automodule:: ankipandas.util.dataframe
11 |     :members:
12 |     :undoc-members:
13 | 
14 | .. automodule:: ankipandas.util.misc
15 |     :members:
16 |     :undoc-members:
17 | 
18 | .. automodule:: ankipandas.util.checksum
19 |     :members:
20 |     :undoc-members:
21 | 


--------------------------------------------------------------------------------
/misc/logo/logo_github.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/klieret/AnkiPandas/0b17a1870711d4adc3e9fc82bff8aac986b09f5e/misc/logo/logo_github.png


--------------------------------------------------------------------------------
/mlc_config.json:
--------------------------------------------------------------------------------
1 | {
2 |   "ignorePatterns": [
3 |     {
4 |       "pattern": "https://github.com/issues?.*"
5 |     }
6 |   ]
7 | }
8 | 


--------------------------------------------------------------------------------
/mypy.ini:
--------------------------------------------------------------------------------
1 | [mypy]
2 | ignore_missing_imports = True
3 | follow_imports = silent
4 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
1 | [tool.black]
2 | line-length = 80
3 | target-version = ["py37"]
4 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | pandas
2 | colorlog
3 | randomfiletree
4 | numpy
5 | 


--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
 1 | [metadata]
 2 | name = ankipandas
 3 | description = Load your anki database as a pandas DataFrame with just one line of code!
 4 | url = https://github.com/klieret/ankipandas
 5 | version = file: ankipandas/version.txt
 6 | long_description = file: README.md
 7 | long_description_content_type = text/markdown
 8 | license = MIT
 9 | license_file = LICENSE.txt
10 | python_requires = >=3.7
11 | project_urls =
12 |     Bug Tracker =   https://github.com/klieret/ankipandas/issues
13 |     Documentation = https://ankipandas.readthedocs.io/
14 |     Source Code =   https://github.com/klieret/ankipandas/
15 | keywords =
16 |     anki
17 |     pandas
18 |     dataframe
19 | classifiers =
20 |     Programming Language :: Python :: 3
21 |     Programming Language :: Python :: 3.7
22 |     Programming Language :: Python :: 3.8
23 |     Programming Language :: Python :: 3.9
24 |     Programming Language :: Python :: 3.10
25 |     License :: OSI Approved :: MIT License
26 |     Operating System :: OS Independent
27 |     Topic :: Database
28 |     Topic :: Education
29 |     Topic :: Utilities
30 | 
31 | [options]
32 | packages = find:
33 | include_package_data = True
34 | zip_safe = False
35 | 
36 | 
37 | [tool:pytest]
38 | addopts = --cov=ankipandas
39 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | 
 3 | """ To install this package, change to the directory of this file and run
 4 | 
 5 |     pip3 install --user .
 6 | 
 7 | (the ``--user`` flag installs the package for your user account only, otherwise
 8 | you will need administrator rights).
 9 | """
10 | 
11 | from __future__ import annotations
12 | 
13 | # std
14 | import site
15 | import sys
16 | from pathlib import Path
17 | 
18 | # noinspection PyUnresolvedReferences
19 | import setuptools
20 | 
21 | # Sometimes editable install fails with an error message about user site
22 | # being not writeable. The following line can fix that, see
23 | # https://github.com/pypa/pip/issues/7953
24 | site.ENABLE_USER_SITE = "--user" in sys.argv[1:]
25 | 
26 | this_dir = Path(__file__).resolve().parent
27 | 
28 | setuptools.setup(
29 |     install_requires=[
30 |         req.strip()
31 |         for req in (this_dir / "requirements.txt").read_text().splitlines()
32 |         if req.strip() and not req.startswith("#")
33 |     ]
34 | )
35 | 


--------------------------------------------------------------------------------