├── src └── docstore │ ├── __init__.py │ ├── static │ ├── stacks.ico │ ├── specktre.png │ ├── natural_paper.png │ ├── generic_document.png │ ├── stacks.svg │ └── style.css │ ├── templates │ ├── _title_attribution.html │ ├── _head.html │ ├── _tag_list.html │ ├── _meta_info.html │ └── index.html │ ├── git.py │ ├── merging.py │ ├── downloads.py │ ├── tag_cloud.py │ ├── file_normalisation.py │ ├── text_utils.py │ ├── tint_colors.py │ ├── models.py │ ├── thumbnails.py │ ├── tag_list.py │ ├── server.py │ ├── documents.py │ └── cli.py ├── .dockerignore ├── .gitignore ├── tests ├── stubs │ ├── smartypants.pyi │ └── wcag_contrast_ratio.pyi ├── files │ ├── cluster.png │ ├── snakes.pdf │ ├── Newtons_cradle.gif │ ├── cluster_segment.png │ ├── Rotating_earth_(large).gif │ ├── Rotating_earth_(large)_singleframe.gif │ └── credits.txt ├── test_tag_list.py ├── conftest.py ├── test_downloads.py ├── test_tint_colors.py ├── test_models.py ├── test_thumbnails.py ├── test_merging.py ├── test_text_utils.py ├── test_file_normalisation.py ├── test_documents.py ├── test_cli.py └── test_server.py ├── docstore.png ├── migrations ├── exceptions.py ├── from_2-0-0_to_2-1-0.py └── from_2-1-0_to_2-2-0.py ├── docs ├── quick_look.png ├── thumbnails.png ├── tint_colors.png ├── previewing-the-files.md ├── storing-the-metadata.md └── storing-the-files.md ├── .gitattributes ├── dev_requirements.in ├── requirements.in ├── .github ├── dependabot.yml ├── workflows │ └── test.yml └── install-github-bin ├── pyproject.toml ├── requirements.txt ├── LICENSE ├── dev_requirements.txt └── README.md /src/docstore/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /.dockerignore: -------------------------------------------------------------------------------- 1 | **/*.pyc 2 | .hypothesis 3 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .tox 2 | *.egg-info 3 | *.pyc 4 | .coverage 5 | -------------------------------------------------------------------------------- /tests/stubs/smartypants.pyi: -------------------------------------------------------------------------------- 1 | def smartypants(s: str) -> str: ... 2 | -------------------------------------------------------------------------------- /docstore.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alexwlchan/docstore/main/docstore.png -------------------------------------------------------------------------------- /migrations/exceptions.py: -------------------------------------------------------------------------------- 1 | class IncorrectSchemaError(Exception): 2 | pass 3 | -------------------------------------------------------------------------------- /docs/quick_look.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alexwlchan/docstore/main/docs/quick_look.png -------------------------------------------------------------------------------- /docs/thumbnails.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alexwlchan/docstore/main/docs/thumbnails.png -------------------------------------------------------------------------------- /docs/tint_colors.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alexwlchan/docstore/main/docs/tint_colors.png -------------------------------------------------------------------------------- /tests/files/cluster.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alexwlchan/docstore/main/tests/files/cluster.png -------------------------------------------------------------------------------- /tests/files/snakes.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alexwlchan/docstore/main/tests/files/snakes.pdf -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | requirements.txt linguist-generated=true 2 | dev_requirements.txt linguist-generated=true 3 | -------------------------------------------------------------------------------- /src/docstore/static/stacks.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alexwlchan/docstore/main/src/docstore/static/stacks.ico -------------------------------------------------------------------------------- /tests/files/Newtons_cradle.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alexwlchan/docstore/main/tests/files/Newtons_cradle.gif -------------------------------------------------------------------------------- /tests/files/cluster_segment.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alexwlchan/docstore/main/tests/files/cluster_segment.png -------------------------------------------------------------------------------- /src/docstore/static/specktre.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alexwlchan/docstore/main/src/docstore/static/specktre.png -------------------------------------------------------------------------------- /src/docstore/static/natural_paper.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alexwlchan/docstore/main/src/docstore/static/natural_paper.png -------------------------------------------------------------------------------- /tests/files/Rotating_earth_(large).gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alexwlchan/docstore/main/tests/files/Rotating_earth_(large).gif -------------------------------------------------------------------------------- /src/docstore/static/generic_document.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alexwlchan/docstore/main/src/docstore/static/generic_document.png -------------------------------------------------------------------------------- /tests/files/Rotating_earth_(large)_singleframe.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alexwlchan/docstore/main/tests/files/Rotating_earth_(large)_singleframe.gif -------------------------------------------------------------------------------- /tests/test_tag_list.py: -------------------------------------------------------------------------------- 1 | from docstore.tag_list import render_tag_list 2 | 3 | 4 | def test_empty_render_tag_list() -> None: 5 | assert render_tag_list({}) == [] 6 | -------------------------------------------------------------------------------- /tests/stubs/wcag_contrast_ratio.pyi: -------------------------------------------------------------------------------- 1 | import typing 2 | 3 | Color: typing.TypeAlias = tuple[float, float, float] 4 | 5 | def rgb(color1: Color, color2: Color) -> float: ... 6 | -------------------------------------------------------------------------------- /dev_requirements.in: -------------------------------------------------------------------------------- 1 | -e file:. 2 | -r requirements.txt 3 | 4 | bs4 5 | pytest 6 | pytest-cov 7 | coverage 8 | mypy 9 | ruff 10 | types-beautifulsoup4 11 | types-tqdm 12 | -------------------------------------------------------------------------------- /requirements.in: -------------------------------------------------------------------------------- 1 | attrs>=20.2.0 2 | cattrs>=1.1.1 3 | click>=7.1.2 4 | hyperlink>=21.0.0 5 | Flask>=1.1.2 6 | Pillow 7 | rapidfuzz 8 | smartypants>=2.0.1 9 | Unidecode>=1.1.1 10 | wcag_contrast_ratio>=0.9 11 | -------------------------------------------------------------------------------- /tests/files/credits.txt: -------------------------------------------------------------------------------- 1 | Newtons_cradle.gif 2 | https://en.wikipedia.org/wiki/File:Newtons_cradle_animation_book_2.gif 3 | 4 | Rotating_earth_(large).gif 5 | https://en.wikipedia.org/wiki/File:Rotating_earth_(large).gif 6 | -------------------------------------------------------------------------------- /tests/conftest.py: -------------------------------------------------------------------------------- 1 | import os 2 | import pathlib 3 | 4 | import pytest 5 | 6 | 7 | @pytest.fixture 8 | def root(tmpdir: pathlib.Path) -> pathlib.Path: 9 | os.makedirs(str(tmpdir / "root")) 10 | return pathlib.Path(str(tmpdir / "root")) 11 | -------------------------------------------------------------------------------- /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | updates: 3 | - package-ecosystem: "github-actions" 4 | directory: "/" 5 | schedule: 6 | interval: "daily" 7 | time: "09:00" 8 | - package-ecosystem: "pip" 9 | directory: "/" 10 | schedule: 11 | interval: "daily" 12 | time: "09:00" 13 | -------------------------------------------------------------------------------- /src/docstore/templates/_title_attribution.html: -------------------------------------------------------------------------------- 1 | {%- if doc|tags_with_prefix(prefix + ":") -%} 2 | , {{ prefix }} 3 | {% for t in doc|tags_with_prefix(prefix + ":") -%} 4 | {%- if t not in request_tags %}{% endif -%} 5 | {{ t | replace(prefix + ":", "") }} 6 | {%- if t not in request_tags -%}{% endif %} 7 | {%- if not loop.last -%}, {% endif -%} 8 | {%- endfor -%} 9 | {% endif %} 10 | -------------------------------------------------------------------------------- /src/docstore/git.py: -------------------------------------------------------------------------------- 1 | import functools 2 | import os 3 | import subprocess 4 | 5 | 6 | @functools.lru_cache() 7 | def current_commit() -> str: 8 | """ 9 | Returns the commit of the current docstore version. 10 | """ 11 | return ( 12 | subprocess.check_output( 13 | ["git", "rev-parse", "HEAD"], cwd=os.path.dirname(os.path.abspath(__file__)) 14 | ) 15 | .strip() 16 | .decode("utf8")[:7] 17 | ) 18 | -------------------------------------------------------------------------------- /src/docstore/templates/_head.html: -------------------------------------------------------------------------------- 1 |
| Filename | 30 |Comments | 31 |
|---|---|
VolcanoPattern.pdf |
34 | 10/10 great name. | 35 |
Alex Chan_5312.pdf |
38 | Spaces in filenames cause nothing but trouble. | 39 |
Statement.pdf |
42 | This is a bank statement with no context. I have dozens of files with identical names, covering different accounts and date ranges. | 43 |
Alexander Chan›Payslip November 2014-2015.PDF |
46 | Special characters are annoying. | 47 |
V5C:3 scrappage note.pdf |
50 | I have no idea how I created this file. This is the V5C/3 form, so at some point the slash has been converted to a colon – but both the colon and slash are used as path separators on macOS, and are best avoided. | 51 |