├── docs ├── _build │ ├── html │ │ ├── _static │ │ │ ├── docsearch_config.js │ │ │ ├── scripts │ │ │ │ ├── furo-extensions.js │ │ │ │ └── furo.js │ │ │ ├── custom.css │ │ │ ├── file.png │ │ │ ├── icon.png │ │ │ ├── plus.png │ │ │ ├── minus.png │ │ │ ├── pyldavis.png │ │ │ ├── simulation.png │ │ │ ├── topicwizard.png │ │ │ ├── icon_w_title.png │ │ │ ├── plate_notation.png │ │ │ ├── btm_plate_notation.png │ │ │ ├── dmm_plate_notation.png │ │ │ ├── icon_w_title_below.png │ │ │ ├── 04221d1c6838f9fdd557.woff │ │ │ ├── 48af7707fe9e6494d6a5.woff │ │ │ ├── 518464e0ec13eabaab5b.woff │ │ │ ├── 9ac5da2442b734abc516.woff │ │ │ ├── d037cb4792991826de7d.woff │ │ │ ├── d56c9111fe2295657c50.woff │ │ │ ├── e2899e588021cfc45d7f.woff │ │ │ ├── f1e2a76794cb86b2aa8e.woff │ │ │ ├── f58c05a5dca66a6c14cf.woff │ │ │ ├── 1b0f4d9f360bf795b840.woff2 │ │ │ ├── 30670a02974f80a898ff.woff2 │ │ │ ├── 3a43b67e5bbdfb3ab0a6.woff2 │ │ │ ├── 7b7c80708f0c7a904863.woff2 │ │ │ ├── 9d02cef1963502c9b2f9.woff2 │ │ │ ├── b009a76ad6afe4ebd301.woff2 │ │ │ ├── c3a23073096fe509111c.woff2 │ │ │ ├── e10742dbb1d4a0864ba8.woff2 │ │ │ ├── f25d774ecfe0996f8eb5.woff2 │ │ │ ├── icon_w_title_below_dark.png │ │ │ ├── documentation_options.js │ │ │ ├── debug.css │ │ │ ├── manifest.json │ │ │ ├── _sphinx_javascript_frameworks_compat.js │ │ │ ├── doctools.js │ │ │ ├── styles │ │ │ │ ├── furo-extensions.css │ │ │ │ └── furo-extensions.css.map │ │ │ ├── sphinx_highlight.js │ │ │ ├── language_data.js │ │ │ ├── icon_with_text.svg │ │ │ ├── icon.svg │ │ │ ├── icon_with_text_bellow.svg │ │ │ ├── icon_with_text_below_dark.svg │ │ │ └── skeleton.css │ │ ├── objects.inv │ │ ├── _images │ │ │ ├── pyldavis.png │ │ │ ├── simulation.png │ │ │ ├── topicwizard.png │ │ │ ├── icon_w_title.png │ │ │ ├── plate_notation.png │ │ │ ├── btm_plate_notation.png │ │ │ ├── dmm_plate_notation.png │ │ │ └── icon_with_text.svg │ │ ├── .buildinfo │ │ ├── searchindex.js │ │ ├── search.html │ │ └── tweetopic.typing.html │ └── doctrees │ │ ├── index.doctree │ │ ├── environment.pickle │ │ ├── installation.doctree │ │ ├── tweetopic.btm.doctree │ │ ├── tweetopic.dmm.doctree │ │ ├── tweetopic.mgp.doctree │ │ ├── tweetopic.typing.doctree │ │ ├── using_tweetopic.doctree │ │ ├── tweetopic.pipeline.doctree │ │ ├── using_tweetopic.btm.doctree │ │ ├── using_tweetopic.dmm.doctree │ │ ├── using_tweetopic.pipeline.doctree │ │ ├── using_tweetopic.visualization.doctree │ │ └── using_tweetopic.model_persistence.doctree ├── _static │ ├── icon.png │ ├── pyldavis.png │ ├── simulation.png │ ├── topicwizard.png │ ├── icon_w_title.png │ ├── btm_plate_notation.png │ ├── dmm_plate_notation.png │ ├── icon_w_title_below.png │ ├── icon_w_title_below_dark.png │ ├── icon_with_text.svg │ ├── icon.svg │ ├── icon_with_text_bellow.svg │ └── icon_with_text_below_dark.svg ├── installation.rst ├── tweetopic.btm.rst ├── tweetopic.dmm.rst ├── using_tweetopic.model_persistence.rst ├── Makefile ├── index.rst ├── make.bat ├── using_tweetopic.visualization.rst ├── using_tweetopic.pipeline.rst ├── using_tweetopic.btm.rst ├── conf.py └── using_tweetopic.dmm.rst ├── tweetopic ├── exceptions.py ├── __init__.py ├── utils.py ├── func.py ├── _doc.py ├── _prob.py ├── btm.py └── dmm.py ├── .flake8 ├── citation.cff ├── pyproject.toml ├── tests └── test_integration.py ├── .github └── workflows │ ├── test.yml │ └── static.yaml ├── LICENSE ├── .pre-commit-config.yaml ├── README.md ├── mess.py └── .gitignore /docs/_build/html/_static/docsearch_config.js: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /docs/_build/html/_static/scripts/furo-extensions.js: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tweetopic/exceptions.py: -------------------------------------------------------------------------------- 1 | class NotFittedException(Exception): 2 | pass 3 | -------------------------------------------------------------------------------- /docs/_build/html/_static/custom.css: -------------------------------------------------------------------------------- 1 | /* This file intentionally left blank. */ 2 | -------------------------------------------------------------------------------- /docs/_static/icon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/centre-for-humanities-computing/tweetopic/HEAD/docs/_static/icon.png -------------------------------------------------------------------------------- /docs/_static/pyldavis.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/centre-for-humanities-computing/tweetopic/HEAD/docs/_static/pyldavis.png -------------------------------------------------------------------------------- /docs/_build/html/objects.inv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/centre-for-humanities-computing/tweetopic/HEAD/docs/_build/html/objects.inv -------------------------------------------------------------------------------- /docs/_static/simulation.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/centre-for-humanities-computing/tweetopic/HEAD/docs/_static/simulation.png -------------------------------------------------------------------------------- /docs/_static/topicwizard.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/centre-for-humanities-computing/tweetopic/HEAD/docs/_static/topicwizard.png -------------------------------------------------------------------------------- /docs/_static/icon_w_title.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/centre-for-humanities-computing/tweetopic/HEAD/docs/_static/icon_w_title.png -------------------------------------------------------------------------------- /docs/_build/html/_static/file.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/centre-for-humanities-computing/tweetopic/HEAD/docs/_build/html/_static/file.png -------------------------------------------------------------------------------- /docs/_build/html/_static/icon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/centre-for-humanities-computing/tweetopic/HEAD/docs/_build/html/_static/icon.png -------------------------------------------------------------------------------- /docs/_build/html/_static/plus.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/centre-for-humanities-computing/tweetopic/HEAD/docs/_build/html/_static/plus.png -------------------------------------------------------------------------------- /docs/_build/doctrees/index.doctree: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/centre-for-humanities-computing/tweetopic/HEAD/docs/_build/doctrees/index.doctree -------------------------------------------------------------------------------- /docs/_build/html/_static/minus.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/centre-for-humanities-computing/tweetopic/HEAD/docs/_build/html/_static/minus.png -------------------------------------------------------------------------------- /docs/_static/btm_plate_notation.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/centre-for-humanities-computing/tweetopic/HEAD/docs/_static/btm_plate_notation.png -------------------------------------------------------------------------------- /docs/_static/dmm_plate_notation.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/centre-for-humanities-computing/tweetopic/HEAD/docs/_static/dmm_plate_notation.png -------------------------------------------------------------------------------- /docs/_static/icon_w_title_below.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/centre-for-humanities-computing/tweetopic/HEAD/docs/_static/icon_w_title_below.png -------------------------------------------------------------------------------- /docs/_build/html/_images/pyldavis.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/centre-for-humanities-computing/tweetopic/HEAD/docs/_build/html/_images/pyldavis.png -------------------------------------------------------------------------------- /docs/_build/html/_static/pyldavis.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/centre-for-humanities-computing/tweetopic/HEAD/docs/_build/html/_static/pyldavis.png -------------------------------------------------------------------------------- /tweetopic/__init__.py: -------------------------------------------------------------------------------- 1 | from tweetopic.btm import BTM # noqa: F401 2 | from tweetopic.dmm import DMM # noqa: F401 3 | 4 | __all__ = ["BTM", "DMM"] 5 | -------------------------------------------------------------------------------- /docs/_build/doctrees/environment.pickle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/centre-for-humanities-computing/tweetopic/HEAD/docs/_build/doctrees/environment.pickle -------------------------------------------------------------------------------- /docs/_build/html/_images/simulation.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/centre-for-humanities-computing/tweetopic/HEAD/docs/_build/html/_images/simulation.png -------------------------------------------------------------------------------- /docs/_build/html/_images/topicwizard.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/centre-for-humanities-computing/tweetopic/HEAD/docs/_build/html/_images/topicwizard.png -------------------------------------------------------------------------------- /docs/_build/html/_static/simulation.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/centre-for-humanities-computing/tweetopic/HEAD/docs/_build/html/_static/simulation.png -------------------------------------------------------------------------------- /docs/_build/html/_static/topicwizard.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/centre-for-humanities-computing/tweetopic/HEAD/docs/_build/html/_static/topicwizard.png -------------------------------------------------------------------------------- /docs/_static/icon_w_title_below_dark.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/centre-for-humanities-computing/tweetopic/HEAD/docs/_static/icon_w_title_below_dark.png -------------------------------------------------------------------------------- /docs/_build/doctrees/installation.doctree: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/centre-for-humanities-computing/tweetopic/HEAD/docs/_build/doctrees/installation.doctree -------------------------------------------------------------------------------- /docs/_build/doctrees/tweetopic.btm.doctree: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/centre-for-humanities-computing/tweetopic/HEAD/docs/_build/doctrees/tweetopic.btm.doctree -------------------------------------------------------------------------------- /docs/_build/doctrees/tweetopic.dmm.doctree: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/centre-for-humanities-computing/tweetopic/HEAD/docs/_build/doctrees/tweetopic.dmm.doctree -------------------------------------------------------------------------------- /docs/_build/doctrees/tweetopic.mgp.doctree: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/centre-for-humanities-computing/tweetopic/HEAD/docs/_build/doctrees/tweetopic.mgp.doctree -------------------------------------------------------------------------------- /docs/_build/html/_images/icon_w_title.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/centre-for-humanities-computing/tweetopic/HEAD/docs/_build/html/_images/icon_w_title.png -------------------------------------------------------------------------------- /docs/_build/html/_images/plate_notation.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/centre-for-humanities-computing/tweetopic/HEAD/docs/_build/html/_images/plate_notation.png -------------------------------------------------------------------------------- /docs/_build/html/_static/icon_w_title.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/centre-for-humanities-computing/tweetopic/HEAD/docs/_build/html/_static/icon_w_title.png -------------------------------------------------------------------------------- /docs/_build/html/_static/plate_notation.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/centre-for-humanities-computing/tweetopic/HEAD/docs/_build/html/_static/plate_notation.png -------------------------------------------------------------------------------- /docs/_build/doctrees/tweetopic.typing.doctree: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/centre-for-humanities-computing/tweetopic/HEAD/docs/_build/doctrees/tweetopic.typing.doctree -------------------------------------------------------------------------------- /docs/_build/doctrees/using_tweetopic.doctree: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/centre-for-humanities-computing/tweetopic/HEAD/docs/_build/doctrees/using_tweetopic.doctree -------------------------------------------------------------------------------- /docs/_build/doctrees/tweetopic.pipeline.doctree: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/centre-for-humanities-computing/tweetopic/HEAD/docs/_build/doctrees/tweetopic.pipeline.doctree -------------------------------------------------------------------------------- /docs/_build/doctrees/using_tweetopic.btm.doctree: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/centre-for-humanities-computing/tweetopic/HEAD/docs/_build/doctrees/using_tweetopic.btm.doctree -------------------------------------------------------------------------------- /docs/_build/doctrees/using_tweetopic.dmm.doctree: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/centre-for-humanities-computing/tweetopic/HEAD/docs/_build/doctrees/using_tweetopic.dmm.doctree -------------------------------------------------------------------------------- /docs/_build/html/_images/btm_plate_notation.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/centre-for-humanities-computing/tweetopic/HEAD/docs/_build/html/_images/btm_plate_notation.png -------------------------------------------------------------------------------- /docs/_build/html/_images/dmm_plate_notation.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/centre-for-humanities-computing/tweetopic/HEAD/docs/_build/html/_images/dmm_plate_notation.png -------------------------------------------------------------------------------- /docs/_build/html/_static/btm_plate_notation.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/centre-for-humanities-computing/tweetopic/HEAD/docs/_build/html/_static/btm_plate_notation.png -------------------------------------------------------------------------------- /docs/_build/html/_static/dmm_plate_notation.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/centre-for-humanities-computing/tweetopic/HEAD/docs/_build/html/_static/dmm_plate_notation.png -------------------------------------------------------------------------------- /docs/_build/html/_static/icon_w_title_below.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/centre-for-humanities-computing/tweetopic/HEAD/docs/_build/html/_static/icon_w_title_below.png -------------------------------------------------------------------------------- /docs/_build/html/_static/04221d1c6838f9fdd557.woff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/centre-for-humanities-computing/tweetopic/HEAD/docs/_build/html/_static/04221d1c6838f9fdd557.woff -------------------------------------------------------------------------------- /docs/_build/html/_static/48af7707fe9e6494d6a5.woff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/centre-for-humanities-computing/tweetopic/HEAD/docs/_build/html/_static/48af7707fe9e6494d6a5.woff -------------------------------------------------------------------------------- /docs/_build/html/_static/518464e0ec13eabaab5b.woff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/centre-for-humanities-computing/tweetopic/HEAD/docs/_build/html/_static/518464e0ec13eabaab5b.woff -------------------------------------------------------------------------------- /docs/_build/html/_static/9ac5da2442b734abc516.woff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/centre-for-humanities-computing/tweetopic/HEAD/docs/_build/html/_static/9ac5da2442b734abc516.woff -------------------------------------------------------------------------------- /docs/_build/html/_static/d037cb4792991826de7d.woff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/centre-for-humanities-computing/tweetopic/HEAD/docs/_build/html/_static/d037cb4792991826de7d.woff -------------------------------------------------------------------------------- /docs/_build/html/_static/d56c9111fe2295657c50.woff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/centre-for-humanities-computing/tweetopic/HEAD/docs/_build/html/_static/d56c9111fe2295657c50.woff -------------------------------------------------------------------------------- /docs/_build/html/_static/e2899e588021cfc45d7f.woff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/centre-for-humanities-computing/tweetopic/HEAD/docs/_build/html/_static/e2899e588021cfc45d7f.woff -------------------------------------------------------------------------------- /docs/_build/html/_static/f1e2a76794cb86b2aa8e.woff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/centre-for-humanities-computing/tweetopic/HEAD/docs/_build/html/_static/f1e2a76794cb86b2aa8e.woff -------------------------------------------------------------------------------- /docs/_build/html/_static/f58c05a5dca66a6c14cf.woff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/centre-for-humanities-computing/tweetopic/HEAD/docs/_build/html/_static/f58c05a5dca66a6c14cf.woff -------------------------------------------------------------------------------- /docs/_build/doctrees/using_tweetopic.pipeline.doctree: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/centre-for-humanities-computing/tweetopic/HEAD/docs/_build/doctrees/using_tweetopic.pipeline.doctree -------------------------------------------------------------------------------- /docs/_build/html/_static/1b0f4d9f360bf795b840.woff2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/centre-for-humanities-computing/tweetopic/HEAD/docs/_build/html/_static/1b0f4d9f360bf795b840.woff2 -------------------------------------------------------------------------------- /docs/_build/html/_static/30670a02974f80a898ff.woff2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/centre-for-humanities-computing/tweetopic/HEAD/docs/_build/html/_static/30670a02974f80a898ff.woff2 -------------------------------------------------------------------------------- /docs/_build/html/_static/3a43b67e5bbdfb3ab0a6.woff2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/centre-for-humanities-computing/tweetopic/HEAD/docs/_build/html/_static/3a43b67e5bbdfb3ab0a6.woff2 -------------------------------------------------------------------------------- /docs/_build/html/_static/7b7c80708f0c7a904863.woff2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/centre-for-humanities-computing/tweetopic/HEAD/docs/_build/html/_static/7b7c80708f0c7a904863.woff2 -------------------------------------------------------------------------------- /docs/_build/html/_static/9d02cef1963502c9b2f9.woff2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/centre-for-humanities-computing/tweetopic/HEAD/docs/_build/html/_static/9d02cef1963502c9b2f9.woff2 -------------------------------------------------------------------------------- /docs/_build/html/_static/b009a76ad6afe4ebd301.woff2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/centre-for-humanities-computing/tweetopic/HEAD/docs/_build/html/_static/b009a76ad6afe4ebd301.woff2 -------------------------------------------------------------------------------- /docs/_build/html/_static/c3a23073096fe509111c.woff2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/centre-for-humanities-computing/tweetopic/HEAD/docs/_build/html/_static/c3a23073096fe509111c.woff2 -------------------------------------------------------------------------------- /docs/_build/html/_static/e10742dbb1d4a0864ba8.woff2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/centre-for-humanities-computing/tweetopic/HEAD/docs/_build/html/_static/e10742dbb1d4a0864ba8.woff2 -------------------------------------------------------------------------------- /docs/_build/html/_static/f25d774ecfe0996f8eb5.woff2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/centre-for-humanities-computing/tweetopic/HEAD/docs/_build/html/_static/f25d774ecfe0996f8eb5.woff2 -------------------------------------------------------------------------------- /docs/_build/html/_static/icon_w_title_below_dark.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/centre-for-humanities-computing/tweetopic/HEAD/docs/_build/html/_static/icon_w_title_below_dark.png -------------------------------------------------------------------------------- /docs/_build/doctrees/using_tweetopic.visualization.doctree: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/centre-for-humanities-computing/tweetopic/HEAD/docs/_build/doctrees/using_tweetopic.visualization.doctree -------------------------------------------------------------------------------- /docs/installation.rst: -------------------------------------------------------------------------------- 1 | Installation 2 | ================== 3 | tweetopic can be simply installed by installing the PyPI package. 4 | 5 | .. code-block:: 6 | 7 | pip install tweetopic 8 | -------------------------------------------------------------------------------- /.flake8: -------------------------------------------------------------------------------- 1 | [flake8] 2 | ignore = E203, E266, E501, E731, W503 3 | max-line-length = 88 4 | max-complexity = 10 5 | select = B,C,E,F,W,T4,B9 6 | exclude = 7 | .env, 8 | .git, 9 | __pycache__, -------------------------------------------------------------------------------- /docs/tweetopic.btm.rst: -------------------------------------------------------------------------------- 1 | .. _tweetopic btm: 2 | 3 | BTM 4 | ======================= 5 | 6 | :ref:`Usage guide ` 7 | 8 | 9 | .. autoclass:: tweetopic.btm.BTM 10 | :members: -------------------------------------------------------------------------------- /docs/tweetopic.dmm.rst: -------------------------------------------------------------------------------- 1 | .. _tweetopic dmm: 2 | 3 | DMM 4 | ======================= 5 | 6 | :ref:`Usage guide ` 7 | 8 | 9 | .. autoclass:: tweetopic.dmm.DMM 10 | :members: -------------------------------------------------------------------------------- /docs/_build/doctrees/using_tweetopic.model_persistence.doctree: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/centre-for-humanities-computing/tweetopic/HEAD/docs/_build/doctrees/using_tweetopic.model_persistence.doctree -------------------------------------------------------------------------------- /tweetopic/utils.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from numba import njit 3 | 4 | 5 | @njit 6 | def _jitted_seed(seed: int): 7 | np.random.seed(seed) 8 | 9 | 10 | def set_numba_seed(seed: int): 11 | np.random.seed(seed) 12 | _jitted_seed(seed) 13 | -------------------------------------------------------------------------------- /docs/_build/html/.buildinfo: -------------------------------------------------------------------------------- 1 | # Sphinx build info version 1 2 | # This file hashes the configuration used when building these files. When it is not found, a full rebuild will be done. 3 | config: d06ea895fc513754a48714e283df0229 4 | tags: 645f666f9bcd5a90fca523b33c5a78b7 5 | -------------------------------------------------------------------------------- /citation.cff: -------------------------------------------------------------------------------- 1 | 2 | cff-version: 1.2.0 3 | message: "When using this package please cite us." 4 | authors: 5 | - family-names: "Kardos" 6 | given-names: "Márton" 7 | orcid: "https://orcid.org/0000-0001-9652-4498" 8 | title: "tweetopic: Blazing fast topic modelling for short texts." 9 | version: 0.3.0 10 | date-released: 2022-09-21 11 | url: "https://github.com/centre-for-humanities-computing/tweetopic" 12 | -------------------------------------------------------------------------------- /tweetopic/func.py: -------------------------------------------------------------------------------- 1 | """Utility functions for use in the library.""" 2 | from functools import wraps 3 | from typing import Callable 4 | 5 | 6 | def spread(fn: Callable): 7 | """Creates a new function from the given function so that it takes one 8 | dict (PyTree) and spreads the arguments.""" 9 | 10 | @wraps(fn) 11 | def inner(kwargs): 12 | return fn(**kwargs) 13 | 14 | return inner 15 | -------------------------------------------------------------------------------- /docs/_build/html/_static/documentation_options.js: -------------------------------------------------------------------------------- 1 | var DOCUMENTATION_OPTIONS = { 2 | URL_ROOT: document.getElementById("documentation_options").getAttribute('data-url_root'), 3 | VERSION: '', 4 | LANGUAGE: 'en', 5 | COLLAPSE_INDEX: false, 6 | BUILDER: 'html', 7 | FILE_SUFFIX: '.html', 8 | LINK_SUFFIX: '.html', 9 | HAS_SOURCE: true, 10 | SOURCELINK_SUFFIX: '.txt', 11 | NAVIGATION_WITH_KEYS: false, 12 | SHOW_SEARCH_SUMMARY: true, 13 | ENABLE_SEARCH_SHORTCUTS: true, 14 | }; -------------------------------------------------------------------------------- /docs/using_tweetopic.model_persistence.rst: -------------------------------------------------------------------------------- 1 | Model persistence 2 | ================= 3 | 4 | For model persistence we suggest you use joblib, 5 | since it stores numpy arrays much more efficiently: 6 | 7 | .. code-block:: python 8 | 9 | from joblib import dump, load 10 | dump(dmm, "dmm_model.joblib") 11 | 12 | You may load the model as follows: 13 | 14 | .. code-block:: python 15 | 16 | dmm = load("dmm_model.joblib") 17 | 18 | .. note:: 19 | For a comprehensive overview of limitations, 20 | consult `Sklearn's documentation `_ -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [tool.black] 2 | line-length=79 3 | 4 | [tool.poetry] 5 | name = "tweetopic" 6 | version = "0.4.0" 7 | description = "Topic modelling over short texts" 8 | authors = ["Márton Kardos "] 9 | license = "MIT" 10 | readme = "README.md" 11 | 12 | [tool.poetry.dependencies] 13 | python = ">=3.8.0" 14 | numpy = ">=1.22,<1.27.0" 15 | numba = ">=0.58.1" 16 | scikit-learn = ">=1.1.1,<1.4.0" 17 | joblib = ">=1.1.0" 18 | deprecated = ">=1.2.0" 19 | tqdm = ">=4.64.0" 20 | 21 | 22 | [build-system] 23 | requires = ["poetry-core"] 24 | build-backend = "poetry.core.masonry.api" 25 | -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line, and also 5 | # from the environment for the first two. 6 | SPHINXOPTS ?= 7 | SPHINXBUILD ?= sphinx-build 8 | SOURCEDIR = . 9 | BUILDDIR = _build 10 | 11 | # Put it first so that "make" without argument is like "make help". 12 | help: 13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 14 | 15 | .PHONY: help Makefile 16 | 17 | # Catch-all target: route all unknown targets to Sphinx using the new 18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 19 | %: Makefile 20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 21 | -------------------------------------------------------------------------------- /tests/test_integration.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from sklearn.datasets import fetch_20newsgroups 3 | from sklearn.feature_extraction.text import CountVectorizer 4 | from sklearn.pipeline import make_pipeline 5 | 6 | from tweetopic import BTM, DMM 7 | 8 | newsgroups = fetch_20newsgroups( 9 | subset="all", 10 | categories=[ 11 | "misc.forsale", 12 | ], 13 | remove=("headers", "footers", "quotes"), 14 | ) 15 | texts = newsgroups.data 16 | 17 | models = [DMM(10), BTM(10)] 18 | 19 | 20 | @pytest.mark.parametrize("model", models) 21 | def test_fit(model): 22 | pipe = make_pipeline(CountVectorizer(), model) 23 | doc_topic_matrix = pipe.fit_transform(texts) 24 | assert doc_topic_matrix.shape[0] == len(texts) 25 | -------------------------------------------------------------------------------- /docs/index.rst: -------------------------------------------------------------------------------- 1 | 2 | tweetopic 3 | ===================================== 4 | 5 | tweetopic is a Python library for blazing fast and convenient topic 6 | modelling for Tweets and other short texts. 7 | 8 | This website contains the API reference for tweetopic as well as 9 | a usage guide for getting started with tweetopic. 10 | 11 | .. toctree:: 12 | :maxdepth: 1 13 | :caption: Getting started 14 | 15 | installation 16 | 17 | .. toctree:: 18 | :maxdepth: 1 19 | :caption: Usage 20 | 21 | using_tweetopic.dmm 22 | using_tweetopic.btm 23 | using_tweetopic.pipeline 24 | using_tweetopic.visualization 25 | using_tweetopic.model_persistence 26 | 27 | .. toctree:: 28 | :maxdepth: 1 29 | :caption: API reference 30 | 31 | tweetopic.dmm 32 | tweetopic.btm 33 | 34 | .. toctree:: 35 | 36 | GitHub Repository 37 | -------------------------------------------------------------------------------- /docs/make.bat: -------------------------------------------------------------------------------- 1 | @ECHO OFF 2 | 3 | pushd %~dp0 4 | 5 | REM Command file for Sphinx documentation 6 | 7 | if "%SPHINXBUILD%" == "" ( 8 | set SPHINXBUILD=sphinx-build 9 | ) 10 | set SOURCEDIR=. 11 | set BUILDDIR=_build 12 | 13 | %SPHINXBUILD% >NUL 2>NUL 14 | if errorlevel 9009 ( 15 | echo. 16 | echo.The 'sphinx-build' command was not found. Make sure you have Sphinx 17 | echo.installed, then set the SPHINXBUILD environment variable to point 18 | echo.to the full path of the 'sphinx-build' executable. Alternatively you 19 | echo.may add the Sphinx directory to PATH. 20 | echo. 21 | echo.If you don't have Sphinx installed, grab it from 22 | echo.https://www.sphinx-doc.org/ 23 | exit /b 1 24 | ) 25 | 26 | if "%1" == "" goto help 27 | 28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 29 | goto end 30 | 31 | :help 32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 33 | 34 | :end 35 | popd 36 | -------------------------------------------------------------------------------- /tweetopic/_doc.py: -------------------------------------------------------------------------------- 1 | from typing import Tuple 2 | 3 | import numpy as np 4 | import scipy.sparse as spr 5 | 6 | 7 | def init_doc_words( 8 | doc_term_matrix: spr.lil_matrix, 9 | max_unique_words: int, 10 | ) -> Tuple[np.ndarray, np.ndarray]: 11 | n_docs, _ = doc_term_matrix.shape 12 | doc_unique_words = np.zeros((n_docs, max_unique_words)).astype(np.uint32) 13 | doc_unique_word_counts = np.zeros((n_docs, max_unique_words)).astype( 14 | np.uint32 15 | ) 16 | for i_doc in range(n_docs): 17 | unique_words = doc_term_matrix[i_doc].rows[0] # type: ignore 18 | unique_word_counts = doc_term_matrix[i_doc].data[0] # type: ignore 19 | for i_unique in range(len(unique_words)): 20 | doc_unique_words[i_doc, i_unique] = unique_words[i_unique] 21 | doc_unique_word_counts[i_doc, i_unique] = unique_word_counts[ 22 | i_unique 23 | ] 24 | return doc_unique_words, doc_unique_word_counts 25 | -------------------------------------------------------------------------------- /docs/using_tweetopic.visualization.rst: -------------------------------------------------------------------------------- 1 | .. _usage visualization: 2 | 3 | Visualization 4 | ============= 5 | 6 | For visualizing your topic models we recommend that you use `topicwizard `_ , 7 | which natively works with all sklearn-compatible topic models. 8 | 9 | Install topicwizard from PyPI: 10 | 11 | .. code-block:: bash 12 | 13 | pip install topic-wizard 14 | 15 | 16 | topicwizard can then visualize either your :ref:`Pipeline ` or your individual model components. 17 | 18 | .. code-block:: python 19 | 20 | import topicwizard 21 | 22 | # Passing the whole pipeline 23 | topicwizard.visualize(pipeline=pipeline, corpus=texts) 24 | 25 | # Passing the individual components 26 | topicwizard.visualize(vectorizer=vectorizer, topic_model=dmm, corpus=texts) 27 | 28 | 29 | .. image:: _static/topicwizard.png 30 | :width: 800 31 | :alt: Topic visualization with topicwizard 32 | 33 | For more information consult the `documentation `_. 34 | -------------------------------------------------------------------------------- /.github/workflows/test.yml: -------------------------------------------------------------------------------- 1 | name: Tests 2 | on: 3 | push: 4 | branches: [main] 5 | pull_request: 6 | branches: [main] 7 | 8 | jobs: 9 | pytest: 10 | runs-on: ubuntu-latest 11 | strategy: 12 | matrix: 13 | python-version: ["3.9", "3.12"] 14 | # 15 | # This allows a subsequently queued workflow run to interrupt previous runs 16 | concurrency: 17 | group: "${{ github.workflow }}-${{ matrix.python-version}}-${{ matrix.os }} @ ${{ github.ref }}" 18 | cancel-in-progress: true 19 | 20 | steps: 21 | - uses: actions/checkout@v4 22 | - name: Set up Python ${{ matrix.python-version }} 23 | uses: actions/setup-python@v4 24 | with: 25 | python-version: ${{ matrix.python-version }} 26 | cache: "pip" 27 | # You can test your matrix by printing the current Python version 28 | - name: Display Python version 29 | run: python3 -c "import sys; print(sys.version)" 30 | 31 | - name: Install dependencies 32 | run: python3 -m pip install --upgrade tweetopic pandas pytest 33 | 34 | - name: Run tests 35 | run: python3 -m pytest tests/ 36 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2022 Center for Humanities Computing Aarhus 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | default_stages: [commit, push] 2 | 3 | ci: 4 | autofix_commit_msg: | 5 | [pre-commit.ci] auto fixes from pre-commit.com hooks 6 | 7 | for more information, see https://pre-commit.ci 8 | autofix_prs: true 9 | autoupdate_branch: "" 10 | autoupdate_commit_msg: "[pre-commit.ci] pre-commit autoupdate" 11 | autoupdate_schedule: quarterly 12 | skip: [] 13 | submodules: false 14 | 15 | repos: 16 | - repo: https://github.com/pycqa/isort 17 | rev: 5.10.1 18 | hooks: 19 | - id: isort 20 | name: isort (python) 21 | args: ["--profile", "black", "--filter-files", "--skip __init__.py"] 22 | 23 | - repo: https://github.com/asottile/add-trailing-comma 24 | rev: v2.2.3 25 | hooks: 26 | - id: add-trailing-comma 27 | 28 | - repo: https://github.com/myint/docformatter 29 | rev: v1.3.1 30 | hooks: 31 | - id: docformatter 32 | args: [--in-place] 33 | 34 | - repo: https://github.com/psf/black 35 | rev: 22.3.0 36 | hooks: 37 | - id: black 38 | 39 | - repo: https://github.com/PyCQA/flake8 40 | rev: 4.0.1 41 | hooks: 42 | - id: flake8 43 | args: [--config, .flake8] -------------------------------------------------------------------------------- /.github/workflows/static.yaml: -------------------------------------------------------------------------------- 1 | # Simple workflow for deploying static content to GitHub Pages 2 | name: Deploy static content to Pages 3 | 4 | on: 5 | # Runs on pushes targeting the default branch 6 | push: 7 | branches: ["main"] 8 | 9 | # Allows you to run this workflow manually from the Actions tab 10 | workflow_dispatch: 11 | 12 | # Sets permissions of the GITHUB_TOKEN to allow deployment to GitHub Pages 13 | permissions: 14 | contents: read 15 | pages: write 16 | id-token: write 17 | 18 | # Allow one concurrent deployment 19 | concurrency: 20 | group: "pages" 21 | cancel-in-progress: true 22 | 23 | jobs: 24 | # Single deploy job since we're just deploying 25 | deploy: 26 | environment: 27 | name: github-pages 28 | url: ${{ steps.deployment.outputs.page_url }} 29 | runs-on: ubuntu-latest 30 | steps: 31 | - name: Checkout 32 | uses: actions/checkout@v3 33 | - name: Setup Pages 34 | uses: actions/configure-pages@v2 35 | - name: Upload artifact 36 | uses: actions/upload-pages-artifact@v1 37 | with: 38 | # Upload entire repository 39 | path: './docs/_build/html' 40 | - name: Deploy to GitHub Pages 41 | id: deployment 42 | uses: actions/deploy-pages@v1 43 | -------------------------------------------------------------------------------- /docs/using_tweetopic.pipeline.rst: -------------------------------------------------------------------------------- 1 | .. _usage pipeline: 2 | 3 | Pipelines 4 | ========= 5 | 6 | To avoid data leakage and make it easier to operate with topic models, we recommend that you use scikit-learn's `Pipeline `_ 7 | 8 | Create a vectorizer and topic model: 9 | 10 | .. code-block:: python 11 | 12 | from tweetopic import DMM 13 | from sklearn.feature_extraction.text import CountVectorizer 14 | 15 | vectorizer = CountVectorizer( 16 | stop_words="english", 17 | max_df=0.3, 18 | min_df=15, 19 | ) 20 | dmm = DMM( 21 | n_components=15, 22 | n_iterations=200, 23 | alpha=0.1, 24 | beta=0.2, 25 | ) 26 | 27 | Add the two components to a tweetopic pipeline: 28 | 29 | .. code-block:: python 30 | 31 | from sklearn.pipeline import Pipeline 32 | 33 | pipeline = Pipeline([ 34 | ("vectorizer", vectorizer), 35 | ("dmm", dmm) 36 | ]) 37 | 38 | Fit pipelines on a stream of texts: 39 | 40 | .. code-block:: python 41 | 42 | pipeline.fit(texts) 43 | 44 | .. note:: 45 | It is highly advisable to pre-process texts with an NLP library 46 | such as `Spacy `_ or `NLTK `_. 47 | Removal of stop/function words and lemmatization could drastically improve the quality of topics. 48 | 49 | 50 | -------------------------------------------------------------------------------- /tweetopic/_prob.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from numba import njit 3 | 4 | 5 | @njit(fastmath=True) 6 | def sample_categorical(pvals: np.ndarray) -> int: 7 | """Samples from a categorical distribution given its parameters. 8 | 9 | Parameters 10 | ---------- 11 | pvals: array of shape (n_clusters, ) 12 | Parameters of the categorical distribution. 13 | 14 | Returns 15 | ------- 16 | int 17 | Sample. 18 | """ 19 | # NOTE: This function was needed as numba's implementation 20 | # of numpy's multinomial sampling function has some floating point shenanigans going on. 21 | # Rejection sampling with cummulutative probabilities :) 22 | cum_prob = 0 23 | u = np.random.uniform(0.0, 1.0) 24 | for i in range(len(pvals)): 25 | cum_prob += pvals[i] 26 | if u < cum_prob: 27 | return i 28 | else: 29 | # This shouldn't ever happen, but floating point errors can 30 | # cause such behaviour ever so often. 31 | return 0 32 | 33 | 34 | @njit 35 | def norm_prob(prob: np.ndarray) -> None: 36 | """Normalizes probabilities in place. 37 | 38 | Parameters 39 | ---------- 40 | prob: ndarray 41 | Improper probability distribution. 42 | """ 43 | (n,) = prob.shape 44 | total = np.sum(prob) 45 | if total == 0: 46 | prob[:] = 1 / n 47 | return 48 | for i in range(n): 49 | prob[i] = prob[i] / total 50 | -------------------------------------------------------------------------------- /docs/_build/html/_static/debug.css: -------------------------------------------------------------------------------- 1 | /* 2 | This CSS file should be overridden by the theme authors. It's 3 | meant for debugging and developing the skeleton that this theme provides. 4 | */ 5 | body { 6 | font-family: -apple-system, "Segoe UI", Roboto, Helvetica, Arial, sans-serif, 7 | "Apple Color Emoji", "Segoe UI Emoji"; 8 | background: lavender; 9 | } 10 | .sb-announcement { 11 | background: rgb(131, 131, 131); 12 | } 13 | .sb-announcement__inner { 14 | background: black; 15 | color: white; 16 | } 17 | .sb-header { 18 | background: lightskyblue; 19 | } 20 | .sb-header__inner { 21 | background: royalblue; 22 | color: white; 23 | } 24 | .sb-header-secondary { 25 | background: lightcyan; 26 | } 27 | .sb-header-secondary__inner { 28 | background: cornflowerblue; 29 | color: white; 30 | } 31 | .sb-sidebar-primary { 32 | background: lightgreen; 33 | } 34 | .sb-main { 35 | background: blanchedalmond; 36 | } 37 | .sb-main__inner { 38 | background: antiquewhite; 39 | } 40 | .sb-header-article { 41 | background: lightsteelblue; 42 | } 43 | .sb-article-container { 44 | background: snow; 45 | } 46 | .sb-article-main { 47 | background: white; 48 | } 49 | .sb-footer-article { 50 | background: lightpink; 51 | } 52 | .sb-sidebar-secondary { 53 | background: lightgoldenrodyellow; 54 | } 55 | .sb-footer-content { 56 | background: plum; 57 | } 58 | .sb-footer-content__inner { 59 | background: palevioletred; 60 | } 61 | .sb-footer { 62 | background: pink; 63 | } 64 | .sb-footer__inner { 65 | background: salmon; 66 | } 67 | .sb-article { 68 | background: white; 69 | } 70 | -------------------------------------------------------------------------------- /docs/_build/html/_static/manifest.json: -------------------------------------------------------------------------------- 1 | { 2 | "_static/theme.css": "_static/theme.e6081a2fb60dd7bf7015.css", 3 | "_static/theme.js": "_static/theme.a9d041328d1560cbc665.js", 4 | "_static/docsearch.css": "_static/docsearch.2899249b16efd7b83b16.css", 5 | "_static/docsearch.js": "_static/docsearch.958a9d9988f644d7ff85.js", 6 | "_static/jetbrains-mono-latin-500-italic.woff": "_static/d56c9111fe2295657c50.woff", 7 | "_static/jetbrains-mono-latin-400-italic.woff": "_static/04221d1c6838f9fdd557.woff", 8 | "_static/jetbrains-mono-latin-600-normal.woff": "_static/518464e0ec13eabaab5b.woff", 9 | "_static/jetbrains-mono-latin-500-normal.woff": "_static/f58c05a5dca66a6c14cf.woff", 10 | "_static/jetbrains-mono-latin-400-normal.woff": "_static/e2899e588021cfc45d7f.woff", 11 | "_static/roboto-latin-500-italic.woff": "_static/9ac5da2442b734abc516.woff", 12 | "_static/roboto-latin-400-italic.woff": "_static/d037cb4792991826de7d.woff", 13 | "_static/jetbrains-mono-latin-500-italic.woff2": "_static/9d02cef1963502c9b2f9.woff2", 14 | "_static/roboto-latin-500-normal.woff": "_static/48af7707fe9e6494d6a5.woff", 15 | "_static/roboto-latin-400-normal.woff": "_static/f1e2a76794cb86b2aa8e.woff", 16 | "_static/jetbrains-mono-latin-400-italic.woff2": "_static/30670a02974f80a898ff.woff2", 17 | "_static/jetbrains-mono-latin-600-normal.woff2": "_static/7b7c80708f0c7a904863.woff2", 18 | "_static/jetbrains-mono-latin-500-normal.woff2": "_static/1b0f4d9f360bf795b840.woff2", 19 | "_static/jetbrains-mono-latin-400-normal.woff2": "_static/c3a23073096fe509111c.woff2", 20 | "_static/roboto-latin-400-italic.woff2": "_static/e10742dbb1d4a0864ba8.woff2", 21 | "_static/roboto-latin-500-italic.woff2": "_static/3a43b67e5bbdfb3ab0a6.woff2", 22 | "_static/roboto-latin-500-normal.woff2": "_static/f25d774ecfe0996f8eb5.woff2", 23 | "_static/roboto-latin-400-normal.woff2": "_static/b009a76ad6afe4ebd301.woff2", 24 | "_static/docsearch_config.js_t": "_static/docsearch_config.js_t" 25 | } -------------------------------------------------------------------------------- /docs/using_tweetopic.btm.rst: -------------------------------------------------------------------------------- 1 | .. _usage btm: 2 | 3 | Biterm Topic Model 4 | =================================== 5 | 6 | The `tweetopic.BTM` class provides utilities for fitting and using 7 | Biterm Topic Models 8 | 9 | The Biterm Topic Model is a generative probabilistic model for topic modelling. 10 | Instead of describing the document generation process, biterm models focus on describing how word coocurrances are generated from a topic distribution. 11 | This allows them to capture word-to-word relations better in short texts, but unlike DMM, they can also work well for corpora containing longer texts. 12 | 13 | .. image:: _static/btm_plate_notation.png 14 | :width: 400 15 | :alt: Graphical model with plate notation 16 | 17 | *Graphical model of BTM with plate notation (Yan et al. 2013)* 18 | 19 | BTMs in tweetopic are fitted with `Gibbs sampling `_ . 20 | Since Gibbs sampling is an iterative `ḾCMC `_ method, increasing the number of iterations 21 | will usually result in better convergence. 22 | 23 | Usage 24 | ^^^^^^^ 25 | (:ref:`API reference `) 26 | 27 | 28 | Creating a model: 29 | 30 | .. code-block:: python 31 | 32 | from tweetopic import BTM 33 | 34 | btm = BTM( 35 | n_components=15, 36 | n_iterations=200, 37 | alpha=6.0, 38 | beta=0.2, 39 | random_state=42, 40 | ) 41 | 42 | Fitting the model on a document-term matrix: 43 | 44 | .. code-block:: python 45 | 46 | btm.fit(doc_term_matrix) 47 | 48 | Predicting cluster labels for unseen documents: 49 | 50 | .. code-block:: python 51 | 52 | btm.transform(new_docs) 53 | 54 | References 55 | ^^^^^^^^^^ 56 | `Yan, X., Guo, J., Lan, Y., & Cheng, X. (2013). A Biterm Topic Model for Short Texts. `_ *Proceedings of the 22nd International Conference on World Wide Web, 1445–1456. Παρουσιάστηκε στο Rio de Janeiro, Brazil.* doi:10.1145/2488388.2488514 57 | -------------------------------------------------------------------------------- /docs/conf.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | 4 | sys.path.insert(0, os.path.abspath("..")) 5 | # Configuration file for the Sphinx documentation builder. 6 | # 7 | # For the full list of built-in configuration values, see the documentation: 8 | # https://www.sphinx-doc.org/en/master/usage/configuration.html 9 | 10 | # -- Project information ----------------------------------------------------- 11 | # https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information 12 | 13 | project = "tweetopic" 14 | copyright = "2022, Márton Kardos" 15 | author = "Márton Kardos" 16 | 17 | # -- General configuration --------------------------------------------------- 18 | # https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration 19 | 20 | extensions = [ 21 | "sphinx.ext.autodoc", 22 | "sphinx.ext.coverage", 23 | "sphinx.ext.napoleon", 24 | ] 25 | 26 | templates_path = ["_templates"] 27 | exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"] 28 | 29 | 30 | # -- Options for HTML output ------------------------------------------------- 31 | # https://www.sphinx-doc.org/en/master/usage/configuration.html#options-for-html-output 32 | 33 | html_theme = "furo" 34 | # html_collapsible_definitions = True 35 | # html_awesome_code_headers = True 36 | html_favicon = "_static/icon.svg" 37 | html_logo = "_static/icon.svg" 38 | html_title = " " 39 | html_static_path = ["_static"] 40 | html_theme_options = { 41 | "sidebar_hide_name": False, 42 | "light_css_variables": { 43 | "color-api-name": "#28a4df", 44 | "color-api-pre-name": "#ffa671", 45 | }, 46 | "dark_css_variables": { 47 | "color-api-name": "#28a4df", 48 | "color-api-pre-name": "#ffa671", 49 | }, 50 | } 51 | 52 | # Napoleon settings 53 | napoleon_numpy_docstring = True 54 | napoleon_include_init_with_doc = False 55 | napoleon_include_private_with_doc = False 56 | napoleon_include_special_with_doc = True 57 | napoleon_use_admonition_for_examples = False 58 | napoleon_use_admonition_for_notes = False 59 | napoleon_use_admonition_for_references = False 60 | napoleon_use_ivar = False 61 | napoleon_use_param = True 62 | napoleon_use_rtype = True 63 | napoleon_preprocess_types = True 64 | napoleon_type_aliases = None 65 | napoleon_attr_annotations = True 66 | 67 | # Autodoc settings: 68 | autodoc_type_aliases = {"ArrayLike": "ArrayLike"} 69 | autodoc_member_order = "bysource" 70 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | # tweetopic 4 | 5 | :zap: Blazing Fast topic modelling over short texts in Python 6 |
7 | 8 | [![PyPI version](https://badge.fury.io/py/tweetopic.svg)](https://pypi.org/project/tweetopic/) 9 | [![pip downloads](https://img.shields.io/pypi/dm/tweetopic.svg)](https://pypi.org/project/tweetopic/) 10 | [![python version](https://img.shields.io/badge/Python-%3E=3.7-blue)](https://github.com/centre-for-humanities-computing/tweetopic) 11 | [![Code style: black](https://img.shields.io/badge/Code%20Style-Black-black)](https://black.readthedocs.io/en/stable/the_black_code_style/current_style.html) 12 |
13 | 14 |

15 | 16 |

17 | 18 | 19 | ## Features 20 | 21 | - Fast :zap: 22 | - Scalable :collision: 23 | - High consistency and coherence :dart: 24 | - High quality topics :fire: 25 | - Easy visualization and inspection :eyes: 26 | - Full scikit-learn compatibility :nut_and_bolt: 27 | 28 | #### New in version 0.4.0 ✨ 29 | You can now pass `random_state` to topic models to make your results reproducible. 30 | 31 | ```python 32 | from tweetopic import DMM 33 | 34 | model = DMM(10, random_state=42) 35 | ``` 36 | 37 | ## 🛠 Installation 38 | 39 | Install from PyPI: 40 | 41 | ```bash 42 | pip install tweetopic 43 | ``` 44 | 45 | ## 👩‍💻 Usage ([documentation](https://centre-for-humanities-computing.github.io/tweetopic/)) 46 | 47 | Train your a topic model on a corpus of short texts: 48 | 49 | ```python 50 | from tweetopic import DMM 51 | from sklearn.feature_extraction.text import CountVectorizer 52 | from sklearn.pipeline import Pipeline 53 | 54 | # Creating a vectorizer for extracting document-term matrix from the 55 | # text corpus. 56 | vectorizer = CountVectorizer(min_df=15, max_df=0.1) 57 | 58 | # Creating a Dirichlet Multinomial Mixture Model with 30 components 59 | dmm = DMM(n_components=30, n_iterations=100, alpha=0.1, beta=0.1) 60 | 61 | # Creating topic pipeline 62 | pipeline = Pipeline([ 63 | ("vectorizer", vectorizer), 64 | ("dmm", dmm), 65 | ]) 66 | ``` 67 | 68 | You may fit the model with a stream of short texts: 69 | 70 | ```python 71 | pipeline.fit(texts) 72 | ``` 73 | 74 | To investigate internal structure of topics and their relations to words and indicidual documents we recommend using [topicwizard](https://github.com/x-tabdeveloping/topic-wizard). 75 | 76 | Install it from PyPI: 77 | 78 | ```bash 79 | pip install topic-wizard 80 | ``` 81 | 82 | Then visualize your topic model: 83 | 84 | ```python 85 | import topicwizard 86 | 87 | topicwizard.visualize(pipeline=pipeline, corpus=texts) 88 | ``` 89 | 90 | ![topicwizard visualization](docs/_static/topicwizard.png) 91 | 92 | ## 🎓 References 93 | 94 | - Yin, J., & Wang, J. (2014). A Dirichlet Multinomial Mixture Model-Based Approach for Short Text Clustering. _In Proceedings of the 20th ACM SIGKDD International Conference on Knowledge Discovery and Data Mining (pp. 233–242). Association for Computing Machinery._ 95 | -------------------------------------------------------------------------------- /docs/using_tweetopic.dmm.rst: -------------------------------------------------------------------------------- 1 | .. _usage dmm: 2 | 3 | Dirichlet Multinomial Mixture Model 4 | =================================== 5 | 6 | The `tweetopic.DMM` class provides utilities for fitting and using 7 | Dirichlet Multinomial Mixture Models. 8 | 9 | .. image:: _static/simulation.png 10 | :width: 800 11 | :alt: Observations drawn from a DMM 12 | 13 | *Simulation of 1000 observations drawn from a DMM* 14 | `source: Wikipedia `_ 15 | 16 | 17 | The Dirichlet Multinomial Mixture Model or DMM is a generative probabilistic model, 18 | that assumes that all data points in the population are generated from 19 | a mixture of dirichlet distributions with unknown parameters. 20 | 21 | DMMs can be thought of as a fuzzy clustering method, but can also be employed as topic models. 22 | It has been demonstrated that DMMs work particularly well for topic modelling over short texts (Yin and Wang, 2014). 23 | 24 | .. image:: _static/dmm_plate_notation.png 25 | :width: 400 26 | :alt: Graphical model with plate notation 27 | 28 | *Graphical model of DMM with plate notation (Yin & Wang, 2014)* 29 | 30 | Dirichlet Multinomial Mixtures in tweetopic are fitted with `Gibbs sampling `_ . 31 | Since Gibbs sampling is an iterative `ḾCMC `_ method, increasing the number of iterations 32 | will usually result in better convergence. 33 | 34 | | The parameters *alpha* and *beta* can be explained in terms of the *Movie Group Process*, an analogy used for the fitting algorithm in Yin and Wang(2014). 35 | | The fitting procedure can be thought of as a process, where students in a classroom have to divide themselves up into groups based on their movie preferences. 36 | | Each student expresses their preferences in the form of a list of favorite movies (analogous to documents being lists of words). 37 | | In each iteration the students will try to choose a table (cluster/component) based on these two rules: 38 | 39 | - Rule 1: Prefer tables with more students. 40 | - Rule 2: Choose a table where students have similar preferences to your own. 41 | 42 | | Here *alpha* represents the willingness of a student to join tables with lower number of students. 43 | | While *beta* represents the willingness of students to join tables with preferences differing from their own. 44 | 45 | Usage 46 | ^^^^^^^ 47 | (:ref:`API reference `) 48 | 49 | 50 | Creating a model: 51 | 52 | .. code-block:: python 53 | 54 | from tweetopic import DMM 55 | 56 | dmm = DMM( 57 | n_components=15, 58 | n_iterations=200, 59 | alpha=0.1, 60 | beta=0.2, 61 | random_state=42, 62 | ) 63 | 64 | Fitting the model on a document-term matrix: 65 | 66 | .. code-block:: python 67 | 68 | dmm.fit(doc_term_matrix) 69 | 70 | Predicting cluster labels for unseen documents: 71 | 72 | .. code-block:: python 73 | 74 | dmm.transform(new_docs) 75 | 76 | References 77 | ^^^^^^^^^^ 78 | `Yin, J., & Wang, J. (2014). A Dirichlet Multinomial Mixture Model-Based Approach for Short Text Clustering. `_ *In Proceedings of the 20th ACM SIGKDD International Conference on Knowledge Discovery and Data Mining (pp. 233–242). Association for Computing Machinery.* 79 | -------------------------------------------------------------------------------- /mess.py: -------------------------------------------------------------------------------- 1 | # %load_ext autoreload 2 | # %autoreload 2 3 | # %autoindent off 4 | 5 | import random 6 | from functools import partial 7 | 8 | import blackjax 9 | import jax 10 | import jax.numpy as jnp 11 | import numpy as np 12 | import plotly.express as px 13 | import scipy.sparse as spr 14 | import scipy.stats as stats 15 | from sklearn.feature_extraction.text import CountVectorizer 16 | from sklearn.metrics import r2_score 17 | from tqdm import trange 18 | 19 | from tweetopic._doc import init_doc_words 20 | from tweetopic.bayesian.dmm import (BayesianDMM, posterior_predictive, 21 | predict_doc, sparse_multinomial_logpdf, 22 | symmetric_dirichlet_logpdf, 23 | symmetric_dirichlet_multinomial_logpdf) 24 | from tweetopic.bayesian.sampling import batch_data, sample_nuts 25 | from tweetopic.func import spread 26 | 27 | alpha = 0.2 28 | n_features = 10 29 | n_docs = 1000 30 | 31 | doc_lengths = np.random.randint(10, 100, size=n_docs) 32 | components = stats.dirichlet.rvs(alpha=np.full(n_features, alpha)) 33 | X = np.stack([stats.multinomial.rvs(n, components[0]) for n in doc_lengths]) 34 | X = spr.csr_matrix(X) 35 | X = X[X.getnnz(1) > 0] 36 | n_documents, n_features_in_ = X.shape 37 | max_unique_words = np.max(np.diff(X.indptr)) 38 | doc_unique_words, doc_unique_word_counts = init_doc_words( 39 | X.tolil(), 40 | max_unique_words=max_unique_words, 41 | ) 42 | data = dict( 43 | doc_unique_words=doc_unique_words, 44 | doc_unique_word_counts=doc_unique_word_counts, 45 | ) 46 | 47 | 48 | def transform(component): 49 | component = jnp.square(component) 50 | component = component / jnp.sum(component) 51 | return component 52 | 53 | 54 | def logprior_fn(params): 55 | component = transform(params["component"]) 56 | return symmetric_dirichlet_logpdf(component, alpha=alpha) 57 | 58 | 59 | def loglikelihood_fn(params, data): 60 | doc_likelihood = jax.vmap( 61 | partial(sparse_multinomial_logpdf, component=params["component"]) 62 | ) 63 | return jnp.sum( 64 | doc_likelihood( 65 | unique_words=data["doc_unique_words"], 66 | unique_word_counts=data["doc_unique_word_counts"], 67 | ) 68 | ) 69 | 70 | 71 | logdensity_fn(position) 72 | 73 | logdensity_fn = lambda params: logprior_fn(params) + loglikelihood_fn( 74 | params, data 75 | ) 76 | grad_estimator = blackjax.sgmcmc.gradients.grad_estimator( 77 | logprior_fn, loglikelihood_fn, data_size=n_documents 78 | ) 79 | rng_key = jax.random.PRNGKey(0) 80 | batch_key, warmup_key, sampling_key = jax.random.split(rng_key, 3) 81 | batch_idx = batch_data(batch_key, batch_size=64, data_size=n_documents) 82 | batches = ( 83 | dict( 84 | doc_unique_words=doc_unique_words[idx], 85 | doc_unique_word_counts=doc_unique_word_counts[idx], 86 | ) 87 | for idx in batch_idx 88 | ) 89 | position = dict( 90 | component=jnp.array( 91 | transform(stats.dirichlet.mean(alpha=np.full(n_features, alpha))) 92 | ) 93 | ) 94 | 95 | samples, states = sample_nuts(position, logdensity_fn) 96 | 97 | 98 | rng_key = jax.random.PRNGKey(0) 99 | n_samples = 4000 100 | sghmc = blackjax.sgld(grad_estimator) # , num_integration_steps=10) 101 | states = [] 102 | step_size = 1e-8 103 | samples = [] 104 | for i in trange(n_samples, desc="Sampling"): 105 | _, rng_key = jax.random.split(rng_key) 106 | minibatch = next(batches) 107 | position = jax.jit(sghmc)(rng_key, position, minibatch, step_size) 108 | samples.append(position) 109 | 110 | densities = [jax.jit(logdensity_fn)(sample) for sample in samples] 111 | component_trace = jnp.stack([sample["component"] for sample in samples]) 112 | component_trace = jax.vmap(transform)(component_trace) 113 | px.line(component_trace).show() 114 | 115 | for i, density in enumerate(densities): 116 | if np.array(density) != -np.inf: 117 | print(f"{i}: {density}") 118 | 119 | 120 | px.line(densities).show() 121 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # python 2 | __pycache__/ 3 | *.egg-info/ 4 | dist/ 5 | 6 | messing_around.py 7 | 8 | # Notebooks 9 | *.ipynb 10 | /.ipynb_checkpoints/ 11 | 12 | # VScode 13 | /.vscode/ 14 | 15 | # Data files 16 | *.zip 17 | *.csv 18 | *.txt 19 | *.joblib 20 | 21 | # Other 22 | /tweetopic/_old.py 23 | 24 | # Byte-compiled / optimized / DLL files 25 | __pycache__/ 26 | *.py[cod] 27 | *$py.class 28 | 29 | # C extensions 30 | *.so 31 | 32 | # Distribution / packaging 33 | .Python 34 | build/ 35 | develop-eggs/ 36 | dist/ 37 | downloads/ 38 | eggs/ 39 | .eggs/ 40 | lib/ 41 | lib64/ 42 | parts/ 43 | sdist/ 44 | var/ 45 | wheels/ 46 | share/python-wheels/ 47 | *.egg-info/ 48 | .installed.cfg 49 | *.egg 50 | MANIFEST 51 | 52 | # PyInstaller 53 | # Usually these files are written by a python script from a template 54 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 55 | *.manifest 56 | *.spec 57 | 58 | # Installer logs 59 | pip-log.txt 60 | pip-delete-this-directory.txt 61 | 62 | # Unit test / coverage reports 63 | htmlcov/ 64 | .tox/ 65 | .nox/ 66 | .coverage 67 | .coverage.* 68 | .cache 69 | nosetests.xml 70 | coverage.xml 71 | *.cover 72 | *.py,cover 73 | .hypothesis/ 74 | .pytest_cache/ 75 | cover/ 76 | 77 | # Translations 78 | *.mo 79 | *.pot 80 | 81 | # Django stuff: 82 | *.log 83 | local_settings.py 84 | db.sqlite3 85 | db.sqlite3-journal 86 | 87 | # Flask stuff: 88 | instance/ 89 | .webassets-cache 90 | 91 | # Scrapy stuff: 92 | .scrapy 93 | 94 | # Sphinx documentation 95 | # docs/_build/ 96 | 97 | # PyBuilder 98 | .pybuilder/ 99 | target/ 100 | 101 | # Jupyter Notebook 102 | .ipynb_checkpoints 103 | 104 | # IPython 105 | profile_default/ 106 | ipython_config.py 107 | 108 | # pyenv 109 | # For a library or package, you might want to ignore these files since the code is 110 | # intended to run in multiple environments; otherwise, check them in: 111 | # .python-version 112 | 113 | # pipenv 114 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 115 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 116 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 117 | # install all needed dependencies. 118 | #Pipfile.lock 119 | 120 | # poetry 121 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. 122 | # This is especially recommended for binary packages to ensure reproducibility, and is more 123 | # commonly ignored for libraries. 124 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control 125 | #poetry.lock 126 | 127 | # pdm 128 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. 129 | #pdm.lock 130 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it 131 | # in version control. 132 | # https://pdm.fming.dev/#use-with-ide 133 | .pdm.toml 134 | 135 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm 136 | __pypackages__/ 137 | 138 | # Celery stuff 139 | celerybeat-schedule 140 | celerybeat.pid 141 | 142 | # SageMath parsed files 143 | *.sage.py 144 | 145 | # Environments 146 | .env 147 | .venv 148 | env/ 149 | venv/ 150 | ENV/ 151 | env.bak/ 152 | venv.bak/ 153 | 154 | # Spyder project settings 155 | .spyderproject 156 | .spyproject 157 | 158 | # Rope project settings 159 | .ropeproject 160 | 161 | # mkdocs documentation 162 | /site 163 | 164 | # mypy 165 | .mypy_cache/ 166 | .dmypy.json 167 | dmypy.json 168 | 169 | # Pyre type checker 170 | .pyre/ 171 | 172 | # pytype static type analyzer 173 | .pytype/ 174 | 175 | # Cython debug symbols 176 | cython_debug/ 177 | 178 | # PyCharm 179 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can 180 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore 181 | # and can be added to the global gitignore or merged into this file. For a more nuclear 182 | # option (not recommended) you can uncomment the following to ignore the entire idea folder. 183 | #.idea/ 184 | -------------------------------------------------------------------------------- /docs/_build/html/_static/_sphinx_javascript_frameworks_compat.js: -------------------------------------------------------------------------------- 1 | /* 2 | * _sphinx_javascript_frameworks_compat.js 3 | * ~~~~~~~~~~ 4 | * 5 | * Compatability shim for jQuery and underscores.js. 6 | * 7 | * WILL BE REMOVED IN Sphinx 6.0 8 | * xref RemovedInSphinx60Warning 9 | * 10 | */ 11 | 12 | /** 13 | * select a different prefix for underscore 14 | */ 15 | $u = _.noConflict(); 16 | 17 | 18 | /** 19 | * small helper function to urldecode strings 20 | * 21 | * See https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/decodeURIComponent#Decoding_query_parameters_from_a_URL 22 | */ 23 | jQuery.urldecode = function(x) { 24 | if (!x) { 25 | return x 26 | } 27 | return decodeURIComponent(x.replace(/\+/g, ' ')); 28 | }; 29 | 30 | /** 31 | * small helper function to urlencode strings 32 | */ 33 | jQuery.urlencode = encodeURIComponent; 34 | 35 | /** 36 | * This function returns the parsed url parameters of the 37 | * current request. Multiple values per key are supported, 38 | * it will always return arrays of strings for the value parts. 39 | */ 40 | jQuery.getQueryParameters = function(s) { 41 | if (typeof s === 'undefined') 42 | s = document.location.search; 43 | var parts = s.substr(s.indexOf('?') + 1).split('&'); 44 | var result = {}; 45 | for (var i = 0; i < parts.length; i++) { 46 | var tmp = parts[i].split('=', 2); 47 | var key = jQuery.urldecode(tmp[0]); 48 | var value = jQuery.urldecode(tmp[1]); 49 | if (key in result) 50 | result[key].push(value); 51 | else 52 | result[key] = [value]; 53 | } 54 | return result; 55 | }; 56 | 57 | /** 58 | * highlight a given string on a jquery object by wrapping it in 59 | * span elements with the given class name. 60 | */ 61 | jQuery.fn.highlightText = function(text, className) { 62 | function highlight(node, addItems) { 63 | if (node.nodeType === 3) { 64 | var val = node.nodeValue; 65 | var pos = val.toLowerCase().indexOf(text); 66 | if (pos >= 0 && 67 | !jQuery(node.parentNode).hasClass(className) && 68 | !jQuery(node.parentNode).hasClass("nohighlight")) { 69 | var span; 70 | var isInSVG = jQuery(node).closest("body, svg, foreignObject").is("svg"); 71 | if (isInSVG) { 72 | span = document.createElementNS("http://www.w3.org/2000/svg", "tspan"); 73 | } else { 74 | span = document.createElement("span"); 75 | span.className = className; 76 | } 77 | span.appendChild(document.createTextNode(val.substr(pos, text.length))); 78 | node.parentNode.insertBefore(span, node.parentNode.insertBefore( 79 | document.createTextNode(val.substr(pos + text.length)), 80 | node.nextSibling)); 81 | node.nodeValue = val.substr(0, pos); 82 | if (isInSVG) { 83 | var rect = document.createElementNS("http://www.w3.org/2000/svg", "rect"); 84 | var bbox = node.parentElement.getBBox(); 85 | rect.x.baseVal.value = bbox.x; 86 | rect.y.baseVal.value = bbox.y; 87 | rect.width.baseVal.value = bbox.width; 88 | rect.height.baseVal.value = bbox.height; 89 | rect.setAttribute('class', className); 90 | addItems.push({ 91 | "parent": node.parentNode, 92 | "target": rect}); 93 | } 94 | } 95 | } 96 | else if (!jQuery(node).is("button, select, textarea")) { 97 | jQuery.each(node.childNodes, function() { 98 | highlight(this, addItems); 99 | }); 100 | } 101 | } 102 | var addItems = []; 103 | var result = this.each(function() { 104 | highlight(this, addItems); 105 | }); 106 | for (var i = 0; i < addItems.length; ++i) { 107 | jQuery(addItems[i].parent).before(addItems[i].target); 108 | } 109 | return result; 110 | }; 111 | 112 | /* 113 | * backward compatibility for jQuery.browser 114 | * This will be supported until firefox bug is fixed. 115 | */ 116 | if (!jQuery.browser) { 117 | jQuery.uaMatch = function(ua) { 118 | ua = ua.toLowerCase(); 119 | 120 | var match = /(chrome)[ \/]([\w.]+)/.exec(ua) || 121 | /(webkit)[ \/]([\w.]+)/.exec(ua) || 122 | /(opera)(?:.*version|)[ \/]([\w.]+)/.exec(ua) || 123 | /(msie) ([\w.]+)/.exec(ua) || 124 | ua.indexOf("compatible") < 0 && /(mozilla)(?:.*? rv:([\w.]+)|)/.exec(ua) || 125 | []; 126 | 127 | return { 128 | browser: match[ 1 ] || "", 129 | version: match[ 2 ] || "0" 130 | }; 131 | }; 132 | jQuery.browser = {}; 133 | jQuery.browser[jQuery.uaMatch(navigator.userAgent).browser] = true; 134 | } 135 | -------------------------------------------------------------------------------- /docs/_build/html/_static/scripts/furo.js: -------------------------------------------------------------------------------- 1 | /*! For license information please see furo.js.LICENSE.txt */ 2 | (()=>{var t={212:function(t,e,n){var o,r;r=void 0!==n.g?n.g:"undefined"!=typeof window?window:this,o=function(){return function(t){"use strict";var e={navClass:"active",contentClass:"active",nested:!1,nestedClass:"active",offset:0,reflow:!1,events:!0},n=function(t,e,n){if(n.settings.events){var o=new CustomEvent(t,{bubbles:!0,cancelable:!0,detail:n});e.dispatchEvent(o)}},o=function(t){var e=0;if(t.offsetParent)for(;t;)e+=t.offsetTop,t=t.offsetParent;return e>=0?e:0},r=function(t){t&&t.sort((function(t,e){return o(t.content)=Math.max(document.body.scrollHeight,document.documentElement.scrollHeight,document.body.offsetHeight,document.documentElement.offsetHeight,document.body.clientHeight,document.documentElement.clientHeight)},l=function(t,e){var n=t[t.length-1];if(function(t,e){return!(!s()||!c(t.content,e,!0))}(n,e))return n;for(var o=t.length-1;o>=0;o--)if(c(t[o].content,e))return t[o]},a=function(t,e){if(e.nested&&t.parentNode){var n=t.parentNode.closest("li");n&&(n.classList.remove(e.nestedClass),a(n,e))}},i=function(t,e){if(t){var o=t.nav.closest("li");o&&(o.classList.remove(e.navClass),t.content.classList.remove(e.contentClass),a(o,e),n("gumshoeDeactivate",o,{link:t.nav,content:t.content,settings:e}))}},u=function(t,e){if(e.nested){var n=t.parentNode.closest("li");n&&(n.classList.add(e.nestedClass),u(n,e))}};return function(o,c){var s,a,d,f,m,v={setup:function(){s=document.querySelectorAll(o),a=[],Array.prototype.forEach.call(s,(function(t){var e=document.getElementById(decodeURIComponent(t.hash.substr(1)));e&&a.push({nav:t,content:e})})),r(a)},detect:function(){var t=l(a,m);t?d&&t.content===d.content||(i(d,m),function(t,e){if(t){var o=t.nav.closest("li");o&&(o.classList.add(e.navClass),t.content.classList.add(e.contentClass),u(o,e),n("gumshoeActivate",o,{link:t.nav,content:t.content,settings:e}))}}(t,m),d=t):d&&(i(d,m),d=null)}},h=function(e){f&&t.cancelAnimationFrame(f),f=t.requestAnimationFrame(v.detect)},g=function(e){f&&t.cancelAnimationFrame(f),f=t.requestAnimationFrame((function(){r(a),v.detect()}))};return v.destroy=function(){d&&i(d,m),t.removeEventListener("scroll",h,!1),m.reflow&&t.removeEventListener("resize",g,!1),a=null,s=null,d=null,f=null,m=null},m=function(){var t={};return Array.prototype.forEach.call(arguments,(function(e){for(var n in e){if(!e.hasOwnProperty(n))return;t[n]=e[n]}})),t}(e,c||{}),v.setup(),v.detect(),t.addEventListener("scroll",h,!1),m.reflow&&t.addEventListener("resize",g,!1),v}}(r)}.apply(e,[]),void 0===o||(t.exports=o)}},e={};function n(o){var r=e[o];if(void 0!==r)return r.exports;var c=e[o]={exports:{}};return t[o].call(c.exports,c,c.exports,n),c.exports}n.n=t=>{var e=t&&t.__esModule?()=>t.default:()=>t;return n.d(e,{a:e}),e},n.d=(t,e)=>{for(var o in e)n.o(e,o)&&!n.o(t,o)&&Object.defineProperty(t,o,{enumerable:!0,get:e[o]})},n.g=function(){if("object"==typeof globalThis)return globalThis;try{return this||new Function("return this")()}catch(t){if("object"==typeof window)return window}}(),n.o=(t,e)=>Object.prototype.hasOwnProperty.call(t,e),(()=>{"use strict";var t=n(212),e=n.n(t),o=null,r=null,c=window.pageYOffset||document.documentElement.scrollTop;function s(){const t=localStorage.getItem("theme")||"auto";var e;"light"!==(e=window.matchMedia("(prefers-color-scheme: dark)").matches?"auto"===t?"light":"light"==t?"dark":"auto":"auto"===t?"dark":"dark"==t?"light":"auto")&&"dark"!==e&&"auto"!==e&&(console.error(`Got invalid theme mode: ${e}. Resetting to auto.`),e="auto"),document.body.dataset.theme=e,localStorage.setItem("theme",e),console.log(`Changed to ${e} mode.`)}function l(){!function(){const t=document.getElementsByClassName("theme-toggle");Array.from(t).forEach((t=>{t.addEventListener("click",s)}))}(),function(){let t=0,e=!1;window.addEventListener("scroll",(function(n){t=window.scrollY,e||(window.requestAnimationFrame((function(){var n;n=t,0==Math.floor(r.getBoundingClientRect().top)?r.classList.add("scrolled"):r.classList.remove("scrolled"),function(t){t<64?document.documentElement.classList.remove("show-back-to-top"):tc&&document.documentElement.classList.remove("show-back-to-top"),c=t}(n),function(t){null!==o&&(0==t?o.scrollTo(0,0):Math.ceil(t)>=Math.floor(document.documentElement.scrollHeight-window.innerHeight)?o.scrollTo(0,o.scrollHeight):document.querySelector(".scroll-current"))}(n),e=!1})),e=!0)})),window.scroll()}(),null!==o&&new(e())(".toc-tree a",{reflow:!0,recursive:!0,navClass:"scroll-current",offset:()=>{let t=parseFloat(getComputedStyle(document.documentElement).fontSize);return r.getBoundingClientRect().height+.5*t+1}})}document.addEventListener("DOMContentLoaded",(function(){document.body.parentNode.classList.remove("no-js"),r=document.querySelector("header"),o=document.querySelector(".toc-scroll"),l()}))})()})(); 3 | //# sourceMappingURL=furo.js.map -------------------------------------------------------------------------------- /docs/_build/html/_static/doctools.js: -------------------------------------------------------------------------------- 1 | /* 2 | * doctools.js 3 | * ~~~~~~~~~~~ 4 | * 5 | * Base JavaScript utilities for all Sphinx HTML documentation. 6 | * 7 | * :copyright: Copyright 2007-2023 by the Sphinx team, see AUTHORS. 8 | * :license: BSD, see LICENSE for details. 9 | * 10 | */ 11 | "use strict"; 12 | 13 | const BLACKLISTED_KEY_CONTROL_ELEMENTS = new Set([ 14 | "TEXTAREA", 15 | "INPUT", 16 | "SELECT", 17 | "BUTTON", 18 | ]); 19 | 20 | const _ready = (callback) => { 21 | if (document.readyState !== "loading") { 22 | callback(); 23 | } else { 24 | document.addEventListener("DOMContentLoaded", callback); 25 | } 26 | }; 27 | 28 | /** 29 | * Small JavaScript module for the documentation. 30 | */ 31 | const Documentation = { 32 | init: () => { 33 | Documentation.initDomainIndexTable(); 34 | Documentation.initOnKeyListeners(); 35 | }, 36 | 37 | /** 38 | * i18n support 39 | */ 40 | TRANSLATIONS: {}, 41 | PLURAL_EXPR: (n) => (n === 1 ? 0 : 1), 42 | LOCALE: "unknown", 43 | 44 | // gettext and ngettext don't access this so that the functions 45 | // can safely bound to a different name (_ = Documentation.gettext) 46 | gettext: (string) => { 47 | const translated = Documentation.TRANSLATIONS[string]; 48 | switch (typeof translated) { 49 | case "undefined": 50 | return string; // no translation 51 | case "string": 52 | return translated; // translation exists 53 | default: 54 | return translated[0]; // (singular, plural) translation tuple exists 55 | } 56 | }, 57 | 58 | ngettext: (singular, plural, n) => { 59 | const translated = Documentation.TRANSLATIONS[singular]; 60 | if (typeof translated !== "undefined") 61 | return translated[Documentation.PLURAL_EXPR(n)]; 62 | return n === 1 ? singular : plural; 63 | }, 64 | 65 | addTranslations: (catalog) => { 66 | Object.assign(Documentation.TRANSLATIONS, catalog.messages); 67 | Documentation.PLURAL_EXPR = new Function( 68 | "n", 69 | `return (${catalog.plural_expr})` 70 | ); 71 | Documentation.LOCALE = catalog.locale; 72 | }, 73 | 74 | /** 75 | * helper function to focus on search bar 76 | */ 77 | focusSearchBar: () => { 78 | document.querySelectorAll("input[name=q]")[0]?.focus(); 79 | }, 80 | 81 | /** 82 | * Initialise the domain index toggle buttons 83 | */ 84 | initDomainIndexTable: () => { 85 | const toggler = (el) => { 86 | const idNumber = el.id.substr(7); 87 | const toggledRows = document.querySelectorAll(`tr.cg-${idNumber}`); 88 | if (el.src.substr(-9) === "minus.png") { 89 | el.src = `${el.src.substr(0, el.src.length - 9)}plus.png`; 90 | toggledRows.forEach((el) => (el.style.display = "none")); 91 | } else { 92 | el.src = `${el.src.substr(0, el.src.length - 8)}minus.png`; 93 | toggledRows.forEach((el) => (el.style.display = "")); 94 | } 95 | }; 96 | 97 | const togglerElements = document.querySelectorAll("img.toggler"); 98 | togglerElements.forEach((el) => 99 | el.addEventListener("click", (event) => toggler(event.currentTarget)) 100 | ); 101 | togglerElements.forEach((el) => (el.style.display = "")); 102 | if (DOCUMENTATION_OPTIONS.COLLAPSE_INDEX) togglerElements.forEach(toggler); 103 | }, 104 | 105 | initOnKeyListeners: () => { 106 | // only install a listener if it is really needed 107 | if ( 108 | !DOCUMENTATION_OPTIONS.NAVIGATION_WITH_KEYS && 109 | !DOCUMENTATION_OPTIONS.ENABLE_SEARCH_SHORTCUTS 110 | ) 111 | return; 112 | 113 | document.addEventListener("keydown", (event) => { 114 | // bail for input elements 115 | if (BLACKLISTED_KEY_CONTROL_ELEMENTS.has(document.activeElement.tagName)) return; 116 | // bail with special keys 117 | if (event.altKey || event.ctrlKey || event.metaKey) return; 118 | 119 | if (!event.shiftKey) { 120 | switch (event.key) { 121 | case "ArrowLeft": 122 | if (!DOCUMENTATION_OPTIONS.NAVIGATION_WITH_KEYS) break; 123 | 124 | const prevLink = document.querySelector('link[rel="prev"]'); 125 | if (prevLink && prevLink.href) { 126 | window.location.href = prevLink.href; 127 | event.preventDefault(); 128 | } 129 | break; 130 | case "ArrowRight": 131 | if (!DOCUMENTATION_OPTIONS.NAVIGATION_WITH_KEYS) break; 132 | 133 | const nextLink = document.querySelector('link[rel="next"]'); 134 | if (nextLink && nextLink.href) { 135 | window.location.href = nextLink.href; 136 | event.preventDefault(); 137 | } 138 | break; 139 | } 140 | } 141 | 142 | // some keyboard layouts may need Shift to get / 143 | switch (event.key) { 144 | case "/": 145 | if (!DOCUMENTATION_OPTIONS.ENABLE_SEARCH_SHORTCUTS) break; 146 | Documentation.focusSearchBar(); 147 | event.preventDefault(); 148 | } 149 | }); 150 | }, 151 | }; 152 | 153 | // quick alias for translations 154 | const _ = Documentation.gettext; 155 | 156 | _ready(Documentation.init); 157 | -------------------------------------------------------------------------------- /docs/_build/html/_static/styles/furo-extensions.css: -------------------------------------------------------------------------------- 1 | #furo-sidebar-ad-placement{padding:var(--sidebar-item-spacing-vertical) var(--sidebar-item-spacing-horizontal)}#furo-sidebar-ad-placement .ethical-sidebar{background:var(--color-background-secondary);border:none;box-shadow:none}#furo-sidebar-ad-placement .ethical-sidebar:hover{background:var(--color-background-hover)}#furo-sidebar-ad-placement .ethical-sidebar a{color:var(--color-foreground-primary)}#furo-sidebar-ad-placement .ethical-callout a{color:var(--color-foreground-secondary)!important}#furo-readthedocs-versions{background:transparent;display:block;position:static;width:100%}#furo-readthedocs-versions .rst-versions{background:#1a1c1e}#furo-readthedocs-versions .rst-current-version{background:var(--color-sidebar-item-background);cursor:unset}#furo-readthedocs-versions .rst-current-version:hover{background:var(--color-sidebar-item-background)}#furo-readthedocs-versions .rst-current-version .fa-book{color:var(--color-foreground-primary)}#furo-readthedocs-versions>.rst-other-versions{padding:0}#furo-readthedocs-versions>.rst-other-versions small{opacity:1}#furo-readthedocs-versions .injected .rst-versions{position:unset}#furo-readthedocs-versions:focus-within,#furo-readthedocs-versions:hover{box-shadow:0 0 0 1px var(--color-sidebar-background-border)}#furo-readthedocs-versions:focus-within .rst-current-version,#furo-readthedocs-versions:hover .rst-current-version{background:#1a1c1e;font-size:inherit;height:auto;line-height:inherit;padding:12px;text-align:right}#furo-readthedocs-versions:focus-within .rst-current-version .fa-book,#furo-readthedocs-versions:hover .rst-current-version .fa-book{color:#fff;float:left}#furo-readthedocs-versions:focus-within .fa-caret-down,#furo-readthedocs-versions:hover .fa-caret-down{display:none}#furo-readthedocs-versions:focus-within .injected,#furo-readthedocs-versions:focus-within .rst-current-version,#furo-readthedocs-versions:focus-within .rst-other-versions,#furo-readthedocs-versions:hover .injected,#furo-readthedocs-versions:hover .rst-current-version,#furo-readthedocs-versions:hover .rst-other-versions{display:block}#furo-readthedocs-versions:focus-within>.rst-current-version,#furo-readthedocs-versions:hover>.rst-current-version{display:none}.highlight:hover button.copybtn{color:var(--color-code-foreground)}.highlight button.copybtn{align-items:center;background-color:var(--color-code-background);border:none;color:var(--color-background-item);cursor:pointer;height:1.25em;opacity:1;right:.5rem;top:.625rem;transition:color .3s,opacity .3s;width:1.25em}.highlight button.copybtn:hover{background-color:var(--color-code-background);color:var(--color-brand-content)}.highlight button.copybtn:after{background-color:transparent;color:var(--color-code-foreground);display:none}.highlight button.copybtn.success{color:#22863a;transition:color 0ms}.highlight button.copybtn.success:after{display:block}.highlight button.copybtn svg{padding:0}body{--sd-color-primary:var(--color-brand-primary);--sd-color-primary-highlight:var(--color-brand-content);--sd-color-primary-text:var(--color-background-primary);--sd-color-shadow:rgba(0,0,0,.05);--sd-color-card-border:var(--color-card-border);--sd-color-card-border-hover:var(--color-brand-content);--sd-color-card-background:var(--color-card-background);--sd-color-card-text:var(--color-foreground-primary);--sd-color-card-header:var(--color-card-marginals-background);--sd-color-card-footer:var(--color-card-marginals-background);--sd-color-tabs-label-active:var(--color-brand-content);--sd-color-tabs-label-hover:var(--color-foreground-muted);--sd-color-tabs-label-inactive:var(--color-foreground-muted);--sd-color-tabs-underline-active:var(--color-brand-content);--sd-color-tabs-underline-hover:var(--color-foreground-border);--sd-color-tabs-underline-inactive:var(--color-background-border);--sd-color-tabs-overline:var(--color-background-border);--sd-color-tabs-underline:var(--color-background-border)}.sd-tab-content{box-shadow:0 -2px var(--sd-color-tabs-overline),0 1px var(--sd-color-tabs-underline)}.sd-card{box-shadow:0 .1rem .25rem var(--sd-color-shadow),0 0 .0625rem rgba(0,0,0,.1)}.sd-shadow-sm{box-shadow:0 .1rem .25rem var(--sd-color-shadow),0 0 .0625rem rgba(0,0,0,.1)!important}.sd-shadow-md{box-shadow:0 .3rem .75rem var(--sd-color-shadow),0 0 .0625rem rgba(0,0,0,.1)!important}.sd-shadow-lg{box-shadow:0 .6rem 1.5rem var(--sd-color-shadow),0 0 .0625rem rgba(0,0,0,.1)!important}.sd-card-hover:hover{transform:none}.sd-cards-carousel{gap:.25rem;padding:.25rem}body{--tabs--label-text:var(--color-foreground-muted);--tabs--label-text--hover:var(--color-foreground-muted);--tabs--label-text--active:var(--color-brand-content);--tabs--label-text--active--hover:var(--color-brand-content);--tabs--label-background:transparent;--tabs--label-background--hover:transparent;--tabs--label-background--active:transparent;--tabs--label-background--active--hover:transparent;--tabs--padding-x:0.25em;--tabs--margin-x:1em;--tabs--border:var(--color-background-border);--tabs--label-border:transparent;--tabs--label-border--hover:var(--color-foreground-muted);--tabs--label-border--active:var(--color-brand-content);--tabs--label-border--active--hover:var(--color-brand-content)}[role=main] .container{max-width:none;padding-left:0;padding-right:0}.shadow.docutils{border:none;box-shadow:0 .2rem .5rem rgba(0,0,0,.05),0 0 .0625rem rgba(0,0,0,.1)!important}.sphinx-bs .card{background-color:var(--color-background-secondary);color:var(--color-foreground)} 2 | /*# sourceMappingURL=furo-extensions.css.map*/ -------------------------------------------------------------------------------- /docs/_build/html/_static/sphinx_highlight.js: -------------------------------------------------------------------------------- 1 | /* Highlighting utilities for Sphinx HTML documentation. */ 2 | "use strict"; 3 | 4 | const SPHINX_HIGHLIGHT_ENABLED = true 5 | 6 | /** 7 | * highlight a given string on a node by wrapping it in 8 | * span elements with the given class name. 9 | */ 10 | const _highlight = (node, addItems, text, className) => { 11 | if (node.nodeType === Node.TEXT_NODE) { 12 | const val = node.nodeValue; 13 | const parent = node.parentNode; 14 | const pos = val.toLowerCase().indexOf(text); 15 | if ( 16 | pos >= 0 && 17 | !parent.classList.contains(className) && 18 | !parent.classList.contains("nohighlight") 19 | ) { 20 | let span; 21 | 22 | const closestNode = parent.closest("body, svg, foreignObject"); 23 | const isInSVG = closestNode && closestNode.matches("svg"); 24 | if (isInSVG) { 25 | span = document.createElementNS("http://www.w3.org/2000/svg", "tspan"); 26 | } else { 27 | span = document.createElement("span"); 28 | span.classList.add(className); 29 | } 30 | 31 | span.appendChild(document.createTextNode(val.substr(pos, text.length))); 32 | parent.insertBefore( 33 | span, 34 | parent.insertBefore( 35 | document.createTextNode(val.substr(pos + text.length)), 36 | node.nextSibling 37 | ) 38 | ); 39 | node.nodeValue = val.substr(0, pos); 40 | 41 | if (isInSVG) { 42 | const rect = document.createElementNS( 43 | "http://www.w3.org/2000/svg", 44 | "rect" 45 | ); 46 | const bbox = parent.getBBox(); 47 | rect.x.baseVal.value = bbox.x; 48 | rect.y.baseVal.value = bbox.y; 49 | rect.width.baseVal.value = bbox.width; 50 | rect.height.baseVal.value = bbox.height; 51 | rect.setAttribute("class", className); 52 | addItems.push({ parent: parent, target: rect }); 53 | } 54 | } 55 | } else if (node.matches && !node.matches("button, select, textarea")) { 56 | node.childNodes.forEach((el) => _highlight(el, addItems, text, className)); 57 | } 58 | }; 59 | const _highlightText = (thisNode, text, className) => { 60 | let addItems = []; 61 | _highlight(thisNode, addItems, text, className); 62 | addItems.forEach((obj) => 63 | obj.parent.insertAdjacentElement("beforebegin", obj.target) 64 | ); 65 | }; 66 | 67 | /** 68 | * Small JavaScript module for the documentation. 69 | */ 70 | const SphinxHighlight = { 71 | 72 | /** 73 | * highlight the search words provided in localstorage in the text 74 | */ 75 | highlightSearchWords: () => { 76 | if (!SPHINX_HIGHLIGHT_ENABLED) return; // bail if no highlight 77 | 78 | // get and clear terms from localstorage 79 | const url = new URL(window.location); 80 | const highlight = 81 | localStorage.getItem("sphinx_highlight_terms") 82 | || url.searchParams.get("highlight") 83 | || ""; 84 | localStorage.removeItem("sphinx_highlight_terms") 85 | url.searchParams.delete("highlight"); 86 | window.history.replaceState({}, "", url); 87 | 88 | // get individual terms from highlight string 89 | const terms = highlight.toLowerCase().split(/\s+/).filter(x => x); 90 | if (terms.length === 0) return; // nothing to do 91 | 92 | // There should never be more than one element matching "div.body" 93 | const divBody = document.querySelectorAll("div.body"); 94 | const body = divBody.length ? divBody[0] : document.querySelector("body"); 95 | window.setTimeout(() => { 96 | terms.forEach((term) => _highlightText(body, term, "highlighted")); 97 | }, 10); 98 | 99 | const searchBox = document.getElementById("searchbox"); 100 | if (searchBox === null) return; 101 | searchBox.appendChild( 102 | document 103 | .createRange() 104 | .createContextualFragment( 105 | '" 109 | ) 110 | ); 111 | }, 112 | 113 | /** 114 | * helper function to hide the search marks again 115 | */ 116 | hideSearchWords: () => { 117 | document 118 | .querySelectorAll("#searchbox .highlight-link") 119 | .forEach((el) => el.remove()); 120 | document 121 | .querySelectorAll("span.highlighted") 122 | .forEach((el) => el.classList.remove("highlighted")); 123 | localStorage.removeItem("sphinx_highlight_terms") 124 | }, 125 | 126 | initEscapeListener: () => { 127 | // only install a listener if it is really needed 128 | if (!DOCUMENTATION_OPTIONS.ENABLE_SEARCH_SHORTCUTS) return; 129 | 130 | document.addEventListener("keydown", (event) => { 131 | // bail for input elements 132 | if (BLACKLISTED_KEY_CONTROL_ELEMENTS.has(document.activeElement.tagName)) return; 133 | // bail with special keys 134 | if (event.shiftKey || event.altKey || event.ctrlKey || event.metaKey) return; 135 | if (DOCUMENTATION_OPTIONS.ENABLE_SEARCH_SHORTCUTS && (event.key === "Escape")) { 136 | SphinxHighlight.hideSearchWords(); 137 | event.preventDefault(); 138 | } 139 | }); 140 | }, 141 | }; 142 | 143 | _ready(SphinxHighlight.highlightSearchWords); 144 | _ready(SphinxHighlight.initEscapeListener); 145 | -------------------------------------------------------------------------------- /docs/_build/html/_static/language_data.js: -------------------------------------------------------------------------------- 1 | /* 2 | * language_data.js 3 | * ~~~~~~~~~~~~~~~~ 4 | * 5 | * This script contains the language-specific data used by searchtools.js, 6 | * namely the list of stopwords, stemmer, scorer and splitter. 7 | * 8 | * :copyright: Copyright 2007-2023 by the Sphinx team, see AUTHORS. 9 | * :license: BSD, see LICENSE for details. 10 | * 11 | */ 12 | 13 | var stopwords = ["a", "and", "are", "as", "at", "be", "but", "by", "for", "if", "in", "into", "is", "it", "near", "no", "not", "of", "on", "or", "such", "that", "the", "their", "then", "there", "these", "they", "this", "to", "was", "will", "with"]; 14 | 15 | 16 | /* Non-minified version is copied as a separate JS file, is available */ 17 | 18 | /** 19 | * Porter Stemmer 20 | */ 21 | var Stemmer = function() { 22 | 23 | var step2list = { 24 | ational: 'ate', 25 | tional: 'tion', 26 | enci: 'ence', 27 | anci: 'ance', 28 | izer: 'ize', 29 | bli: 'ble', 30 | alli: 'al', 31 | entli: 'ent', 32 | eli: 'e', 33 | ousli: 'ous', 34 | ization: 'ize', 35 | ation: 'ate', 36 | ator: 'ate', 37 | alism: 'al', 38 | iveness: 'ive', 39 | fulness: 'ful', 40 | ousness: 'ous', 41 | aliti: 'al', 42 | iviti: 'ive', 43 | biliti: 'ble', 44 | logi: 'log' 45 | }; 46 | 47 | var step3list = { 48 | icate: 'ic', 49 | ative: '', 50 | alize: 'al', 51 | iciti: 'ic', 52 | ical: 'ic', 53 | ful: '', 54 | ness: '' 55 | }; 56 | 57 | var c = "[^aeiou]"; // consonant 58 | var v = "[aeiouy]"; // vowel 59 | var C = c + "[^aeiouy]*"; // consonant sequence 60 | var V = v + "[aeiou]*"; // vowel sequence 61 | 62 | var mgr0 = "^(" + C + ")?" + V + C; // [C]VC... is m>0 63 | var meq1 = "^(" + C + ")?" + V + C + "(" + V + ")?$"; // [C]VC[V] is m=1 64 | var mgr1 = "^(" + C + ")?" + V + C + V + C; // [C]VCVC... is m>1 65 | var s_v = "^(" + C + ")?" + v; // vowel in stem 66 | 67 | this.stemWord = function (w) { 68 | var stem; 69 | var suffix; 70 | var firstch; 71 | var origword = w; 72 | 73 | if (w.length < 3) 74 | return w; 75 | 76 | var re; 77 | var re2; 78 | var re3; 79 | var re4; 80 | 81 | firstch = w.substr(0,1); 82 | if (firstch == "y") 83 | w = firstch.toUpperCase() + w.substr(1); 84 | 85 | // Step 1a 86 | re = /^(.+?)(ss|i)es$/; 87 | re2 = /^(.+?)([^s])s$/; 88 | 89 | if (re.test(w)) 90 | w = w.replace(re,"$1$2"); 91 | else if (re2.test(w)) 92 | w = w.replace(re2,"$1$2"); 93 | 94 | // Step 1b 95 | re = /^(.+?)eed$/; 96 | re2 = /^(.+?)(ed|ing)$/; 97 | if (re.test(w)) { 98 | var fp = re.exec(w); 99 | re = new RegExp(mgr0); 100 | if (re.test(fp[1])) { 101 | re = /.$/; 102 | w = w.replace(re,""); 103 | } 104 | } 105 | else if (re2.test(w)) { 106 | var fp = re2.exec(w); 107 | stem = fp[1]; 108 | re2 = new RegExp(s_v); 109 | if (re2.test(stem)) { 110 | w = stem; 111 | re2 = /(at|bl|iz)$/; 112 | re3 = new RegExp("([^aeiouylsz])\\1$"); 113 | re4 = new RegExp("^" + C + v + "[^aeiouwxy]$"); 114 | if (re2.test(w)) 115 | w = w + "e"; 116 | else if (re3.test(w)) { 117 | re = /.$/; 118 | w = w.replace(re,""); 119 | } 120 | else if (re4.test(w)) 121 | w = w + "e"; 122 | } 123 | } 124 | 125 | // Step 1c 126 | re = /^(.+?)y$/; 127 | if (re.test(w)) { 128 | var fp = re.exec(w); 129 | stem = fp[1]; 130 | re = new RegExp(s_v); 131 | if (re.test(stem)) 132 | w = stem + "i"; 133 | } 134 | 135 | // Step 2 136 | re = /^(.+?)(ational|tional|enci|anci|izer|bli|alli|entli|eli|ousli|ization|ation|ator|alism|iveness|fulness|ousness|aliti|iviti|biliti|logi)$/; 137 | if (re.test(w)) { 138 | var fp = re.exec(w); 139 | stem = fp[1]; 140 | suffix = fp[2]; 141 | re = new RegExp(mgr0); 142 | if (re.test(stem)) 143 | w = stem + step2list[suffix]; 144 | } 145 | 146 | // Step 3 147 | re = /^(.+?)(icate|ative|alize|iciti|ical|ful|ness)$/; 148 | if (re.test(w)) { 149 | var fp = re.exec(w); 150 | stem = fp[1]; 151 | suffix = fp[2]; 152 | re = new RegExp(mgr0); 153 | if (re.test(stem)) 154 | w = stem + step3list[suffix]; 155 | } 156 | 157 | // Step 4 158 | re = /^(.+?)(al|ance|ence|er|ic|able|ible|ant|ement|ment|ent|ou|ism|ate|iti|ous|ive|ize)$/; 159 | re2 = /^(.+?)(s|t)(ion)$/; 160 | if (re.test(w)) { 161 | var fp = re.exec(w); 162 | stem = fp[1]; 163 | re = new RegExp(mgr1); 164 | if (re.test(stem)) 165 | w = stem; 166 | } 167 | else if (re2.test(w)) { 168 | var fp = re2.exec(w); 169 | stem = fp[1] + fp[2]; 170 | re2 = new RegExp(mgr1); 171 | if (re2.test(stem)) 172 | w = stem; 173 | } 174 | 175 | // Step 5 176 | re = /^(.+?)e$/; 177 | if (re.test(w)) { 178 | var fp = re.exec(w); 179 | stem = fp[1]; 180 | re = new RegExp(mgr1); 181 | re2 = new RegExp(meq1); 182 | re3 = new RegExp("^" + C + v + "[^aeiouwxy]$"); 183 | if (re.test(stem) || (re2.test(stem) && !(re3.test(stem)))) 184 | w = stem; 185 | } 186 | re = /ll$/; 187 | re2 = new RegExp(mgr1); 188 | if (re.test(w) && re2.test(w)) { 189 | re = /.$/; 190 | w = w.replace(re,""); 191 | } 192 | 193 | // and turn initial Y back to y 194 | if (firstch == "y") 195 | w = firstch.toLowerCase() + w.substr(1); 196 | return w; 197 | } 198 | } 199 | 200 | -------------------------------------------------------------------------------- /docs/_static/icon_with_text.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 20 | 40 | 42 | 45 | 49 | 53 | 54 | 57 | 61 | 65 | 66 | 69 | 73 | 77 | 78 | 89 | 100 | 111 | 112 | 116 | 124 | 132 | 140 | tweetopic 151 | 152 | 153 | -------------------------------------------------------------------------------- /docs/_build/html/_images/icon_with_text.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 20 | 40 | 42 | 45 | 49 | 53 | 54 | 57 | 61 | 65 | 66 | 69 | 73 | 77 | 78 | 89 | 100 | 111 | 112 | 116 | 124 | 132 | 140 | tweetopic 151 | 152 | 153 | -------------------------------------------------------------------------------- /docs/_build/html/_static/icon_with_text.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 20 | 40 | 42 | 45 | 49 | 53 | 54 | 57 | 61 | 65 | 66 | 69 | 73 | 77 | 78 | 89 | 100 | 111 | 112 | 116 | 124 | 132 | 140 | tweetopic 151 | 152 | 153 | -------------------------------------------------------------------------------- /docs/_static/icon.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 152 | -------------------------------------------------------------------------------- /docs/_build/html/_static/icon.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 152 | -------------------------------------------------------------------------------- /docs/_static/icon_with_text_bellow.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 20 | 40 | 42 | 45 | 49 | 53 | 54 | 57 | 61 | 65 | 66 | 69 | 73 | 77 | 78 | 81 | 85 | 89 | 90 | 101 | 112 | 123 | 134 | 135 | 139 | 147 | 150 | 158 | 166 | 174 | 175 | tweetopic 186 | 187 | 188 | -------------------------------------------------------------------------------- /docs/_build/html/_static/icon_with_text_bellow.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 20 | 40 | 42 | 45 | 49 | 53 | 54 | 57 | 61 | 65 | 66 | 69 | 73 | 77 | 78 | 81 | 85 | 89 | 90 | 101 | 112 | 123 | 134 | 135 | 139 | 147 | 150 | 158 | 166 | 174 | 175 | tweetopic 186 | 187 | 188 | -------------------------------------------------------------------------------- /docs/_static/icon_with_text_below_dark.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 20 | 40 | 42 | 45 | 49 | 53 | 54 | 57 | 61 | 65 | 66 | 69 | 73 | 77 | 78 | 81 | 85 | 89 | 90 | 101 | 112 | 123 | 134 | 135 | 139 | 147 | 150 | 158 | 166 | 174 | 175 | tweetopic 186 | 187 | 188 | -------------------------------------------------------------------------------- /docs/_build/html/_static/icon_with_text_below_dark.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 20 | 40 | 42 | 45 | 49 | 53 | 54 | 57 | 61 | 65 | 66 | 69 | 73 | 77 | 78 | 81 | 85 | 89 | 90 | 101 | 112 | 123 | 134 | 135 | 139 | 147 | 150 | 158 | 166 | 174 | 175 | tweetopic 186 | 187 | 188 | -------------------------------------------------------------------------------- /docs/_build/html/_static/skeleton.css: -------------------------------------------------------------------------------- 1 | /* Some sane resets. */ 2 | html { 3 | height: 100%; 4 | } 5 | 6 | body { 7 | margin: 0; 8 | min-height: 100%; 9 | } 10 | 11 | /* All the flexbox magic! */ 12 | body, 13 | .sb-announcement, 14 | .sb-content, 15 | .sb-main, 16 | .sb-container, 17 | .sb-container__inner, 18 | .sb-article-container, 19 | .sb-footer-content, 20 | .sb-header, 21 | .sb-header-secondary, 22 | .sb-footer { 23 | display: flex; 24 | } 25 | 26 | /* These order things vertically */ 27 | body, 28 | .sb-main, 29 | .sb-article-container { 30 | flex-direction: column; 31 | } 32 | 33 | /* Put elements in the center */ 34 | .sb-header, 35 | .sb-header-secondary, 36 | .sb-container, 37 | .sb-content, 38 | .sb-footer, 39 | .sb-footer-content { 40 | justify-content: center; 41 | } 42 | /* Put elements at the ends */ 43 | .sb-article-container { 44 | justify-content: space-between; 45 | } 46 | 47 | /* These elements grow. */ 48 | .sb-main, 49 | .sb-content, 50 | .sb-container, 51 | article { 52 | flex-grow: 1; 53 | } 54 | 55 | /* Because padding making this wider is not fun */ 56 | article { 57 | box-sizing: border-box; 58 | } 59 | 60 | /* The announcements element should never be wider than the page. */ 61 | .sb-announcement { 62 | max-width: 100%; 63 | } 64 | 65 | .sb-sidebar-primary, 66 | .sb-sidebar-secondary { 67 | flex-shrink: 0; 68 | width: 17rem; 69 | } 70 | 71 | .sb-announcement__inner { 72 | justify-content: center; 73 | 74 | box-sizing: border-box; 75 | height: 3rem; 76 | 77 | overflow-x: auto; 78 | white-space: nowrap; 79 | } 80 | 81 | /* Sidebars, with checkbox-based toggle */ 82 | .sb-sidebar-primary, 83 | .sb-sidebar-secondary { 84 | position: fixed; 85 | height: 100%; 86 | top: 0; 87 | } 88 | 89 | .sb-sidebar-primary { 90 | left: -17rem; 91 | transition: left 250ms ease-in-out; 92 | } 93 | .sb-sidebar-secondary { 94 | right: -17rem; 95 | transition: right 250ms ease-in-out; 96 | } 97 | 98 | .sb-sidebar-toggle { 99 | display: none; 100 | } 101 | .sb-sidebar-overlay { 102 | position: fixed; 103 | top: 0; 104 | width: 0; 105 | height: 0; 106 | 107 | transition: width 0ms ease 250ms, height 0ms ease 250ms, opacity 250ms ease; 108 | 109 | opacity: 0; 110 | background-color: rgba(0, 0, 0, 0.54); 111 | } 112 | 113 | #sb-sidebar-toggle--primary:checked 114 | ~ .sb-sidebar-overlay[for="sb-sidebar-toggle--primary"], 115 | #sb-sidebar-toggle--secondary:checked 116 | ~ .sb-sidebar-overlay[for="sb-sidebar-toggle--secondary"] { 117 | width: 100%; 118 | height: 100%; 119 | opacity: 1; 120 | transition: width 0ms ease, height 0ms ease, opacity 250ms ease; 121 | } 122 | 123 | #sb-sidebar-toggle--primary:checked ~ .sb-container .sb-sidebar-primary { 124 | left: 0; 125 | } 126 | #sb-sidebar-toggle--secondary:checked ~ .sb-container .sb-sidebar-secondary { 127 | right: 0; 128 | } 129 | 130 | /* Full-width mode */ 131 | .drop-secondary-sidebar-for-full-width-content 132 | .hide-when-secondary-sidebar-shown { 133 | display: none !important; 134 | } 135 | .drop-secondary-sidebar-for-full-width-content .sb-sidebar-secondary { 136 | display: none !important; 137 | } 138 | 139 | /* Mobile views */ 140 | .sb-page-width { 141 | width: 100%; 142 | } 143 | 144 | .sb-article-container, 145 | .sb-footer-content__inner, 146 | .drop-secondary-sidebar-for-full-width-content .sb-article, 147 | .drop-secondary-sidebar-for-full-width-content .match-content-width { 148 | width: 100vw; 149 | } 150 | 151 | .sb-article, 152 | .match-content-width { 153 | padding: 0 1rem; 154 | box-sizing: border-box; 155 | } 156 | 157 | @media (min-width: 32rem) { 158 | .sb-article, 159 | .match-content-width { 160 | padding: 0 2rem; 161 | } 162 | } 163 | 164 | /* Tablet views */ 165 | @media (min-width: 42rem) { 166 | .sb-article-container { 167 | width: auto; 168 | } 169 | .sb-footer-content__inner, 170 | .drop-secondary-sidebar-for-full-width-content .sb-article, 171 | .drop-secondary-sidebar-for-full-width-content .match-content-width { 172 | width: 42rem; 173 | } 174 | .sb-article, 175 | .match-content-width { 176 | width: 42rem; 177 | } 178 | } 179 | @media (min-width: 46rem) { 180 | .sb-footer-content__inner, 181 | .drop-secondary-sidebar-for-full-width-content .sb-article, 182 | .drop-secondary-sidebar-for-full-width-content .match-content-width { 183 | width: 46rem; 184 | } 185 | .sb-article, 186 | .match-content-width { 187 | width: 46rem; 188 | } 189 | } 190 | @media (min-width: 50rem) { 191 | .sb-footer-content__inner, 192 | .drop-secondary-sidebar-for-full-width-content .sb-article, 193 | .drop-secondary-sidebar-for-full-width-content .match-content-width { 194 | width: 50rem; 195 | } 196 | .sb-article, 197 | .match-content-width { 198 | width: 50rem; 199 | } 200 | } 201 | 202 | /* Tablet views */ 203 | @media (min-width: 59rem) { 204 | .sb-sidebar-secondary { 205 | position: static; 206 | } 207 | .hide-when-secondary-sidebar-shown { 208 | display: none !important; 209 | } 210 | .sb-footer-content__inner, 211 | .drop-secondary-sidebar-for-full-width-content .sb-article, 212 | .drop-secondary-sidebar-for-full-width-content .match-content-width { 213 | width: 59rem; 214 | } 215 | .sb-article, 216 | .match-content-width { 217 | width: 42rem; 218 | } 219 | } 220 | @media (min-width: 63rem) { 221 | .sb-footer-content__inner, 222 | .drop-secondary-sidebar-for-full-width-content .sb-article, 223 | .drop-secondary-sidebar-for-full-width-content .match-content-width { 224 | width: 63rem; 225 | } 226 | .sb-article, 227 | .match-content-width { 228 | width: 46rem; 229 | } 230 | } 231 | @media (min-width: 67rem) { 232 | .sb-footer-content__inner, 233 | .drop-secondary-sidebar-for-full-width-content .sb-article, 234 | .drop-secondary-sidebar-for-full-width-content .match-content-width { 235 | width: 67rem; 236 | } 237 | .sb-article, 238 | .match-content-width { 239 | width: 50rem; 240 | } 241 | } 242 | 243 | /* Desktop views */ 244 | @media (min-width: 76rem) { 245 | .sb-sidebar-primary { 246 | position: static; 247 | } 248 | .hide-when-primary-sidebar-shown { 249 | display: none !important; 250 | } 251 | .sb-footer-content__inner, 252 | .drop-secondary-sidebar-for-full-width-content .sb-article, 253 | .drop-secondary-sidebar-for-full-width-content .match-content-width { 254 | width: 59rem; 255 | } 256 | .sb-article, 257 | .match-content-width { 258 | width: 42rem; 259 | } 260 | } 261 | 262 | /* Full desktop views */ 263 | @media (min-width: 80rem) { 264 | .sb-article, 265 | .match-content-width { 266 | width: 46rem; 267 | } 268 | .sb-footer-content__inner, 269 | .drop-secondary-sidebar-for-full-width-content .sb-article, 270 | .drop-secondary-sidebar-for-full-width-content .match-content-width { 271 | width: 63rem; 272 | } 273 | } 274 | 275 | @media (min-width: 84rem) { 276 | .sb-article, 277 | .match-content-width { 278 | width: 50rem; 279 | } 280 | .sb-footer-content__inner, 281 | .drop-secondary-sidebar-for-full-width-content .sb-article, 282 | .drop-secondary-sidebar-for-full-width-content .match-content-width { 283 | width: 67rem; 284 | } 285 | } 286 | 287 | @media (min-width: 88rem) { 288 | .sb-footer-content__inner, 289 | .drop-secondary-sidebar-for-full-width-content .sb-article, 290 | .drop-secondary-sidebar-for-full-width-content .match-content-width { 291 | width: 67rem; 292 | } 293 | .sb-page-width { 294 | width: 88rem; 295 | } 296 | } 297 | -------------------------------------------------------------------------------- /docs/_build/html/_static/styles/furo-extensions.css.map: -------------------------------------------------------------------------------- 1 | {"version":3,"file":"styles/furo-extensions.css","mappings":"AAGA,2BACE,oFACA,4CAKE,6CAHA,YACA,eAEA,CACA,kDACE,yCAEF,8CACE,sCAEJ,8CACE,kDAEJ,2BAGE,uBACA,cAHA,gBACA,UAEA,CAGA,yCACE,mBAEF,gDAEE,gDADA,YACA,CACA,sDACE,gDACF,yDACE,sCAEJ,+CACE,UACA,qDACE,UAGF,mDACE,eAEJ,yEAEE,4DAEA,mHASE,mBAPA,kBAEA,YADA,oBAGA,aADA,gBAIA,CAEA,qIAEE,WADA,UACA,CAEJ,uGACE,aAEF,iUAGE,cAEF,mHACE,aC1EJ,gCACE,mCAEF,0BAKE,mBAUA,8CACA,YAFA,mCAKA,eAZA,cALA,UASA,YADA,YAYA,iCAdA,YAcA,CAEA,gCAEE,8CADA,gCACA,CAEF,gCAGE,6BADA,mCADA,YAEA,CAEF,kCAEE,cADA,oBACA,CACA,wCACE,cAEJ,8BACE,UC5CN,KAEE,6CAA8C,CAC9C,uDAAwD,CACxD,uDAAwD,CAGxD,iCAAsC,CAGtC,+CAAgD,CAChD,uDAAwD,CACxD,uDAAwD,CACxD,oDAAqD,CACrD,6DAA8D,CAC9D,6DAA8D,CAG9D,uDAAwD,CACxD,yDAA0D,CAC1D,4DAA6D,CAC7D,2DAA4D,CAC5D,8DAA+D,CAC/D,iEAAkE,CAClE,uDAAwD,CACxD,wDAAyD,CAG3D,gBACE,qFAGF,SACE,6EAEF,cACE,uFAEF,cACE,uFAEF,cACE,uFAGF,qBACE,eAEF,mBACE,WACA,eChDF,KACE,gDAAiD,CACjD,uDAAwD,CACxD,qDAAsD,CACtD,4DAA6D,CAC7D,oCAAqC,CACrC,2CAA4C,CAC5C,4CAA6C,CAC7C,mDAAoD,CACpD,wBAAyB,CACzB,oBAAqB,CACrB,6CAA8C,CAC9C,gCAAiC,CACjC,yDAA0D,CAC1D,uDAAwD,CACxD,8DAA+D,CCbjE,uBACE,eACA,eACA,gBAGF,iBACE,YACA,+EAGF,iBACE,mDACA","sources":["webpack:///./src/furo/assets/styles/extensions/_readthedocs.sass","webpack:///./src/furo/assets/styles/extensions/_copybutton.sass","webpack:///./src/furo/assets/styles/extensions/_sphinx-design.sass","webpack:///./src/furo/assets/styles/extensions/_sphinx-inline-tabs.sass","webpack:///./src/furo/assets/styles/extensions/_sphinx-panels.sass"],"sourcesContent":["// This file contains the styles used for tweaking how ReadTheDoc's embedded\n// contents would show up inside the theme.\n\n#furo-sidebar-ad-placement\n padding: var(--sidebar-item-spacing-vertical) var(--sidebar-item-spacing-horizontal)\n .ethical-sidebar\n // Remove the border and box-shadow.\n border: none\n box-shadow: none\n // Manage the background colors.\n background: var(--color-background-secondary)\n &:hover\n background: var(--color-background-hover)\n // Ensure the text is legible.\n a\n color: var(--color-foreground-primary)\n\n .ethical-callout a\n color: var(--color-foreground-secondary) !important\n\n#furo-readthedocs-versions\n position: static\n width: 100%\n background: transparent\n display: block\n\n // Make the background color fit with the theme's aesthetic.\n .rst-versions\n background: rgb(26, 28, 30)\n\n .rst-current-version\n cursor: unset\n background: var(--color-sidebar-item-background)\n &:hover\n background: var(--color-sidebar-item-background)\n .fa-book\n color: var(--color-foreground-primary)\n\n > .rst-other-versions\n padding: 0\n small\n opacity: 1\n\n .injected\n .rst-versions\n position: unset\n\n &:hover,\n &:focus-within\n box-shadow: 0 0 0 1px var(--color-sidebar-background-border)\n\n .rst-current-version\n // Undo the tweaks done in RTD's CSS\n font-size: inherit\n line-height: inherit\n height: auto\n text-align: right\n padding: 12px\n\n // Match the rest of the body\n background: #1a1c1e\n\n .fa-book\n float: left\n color: white\n\n .fa-caret-down\n display: none\n\n .rst-current-version,\n .rst-other-versions,\n .injected\n display: block\n\n > .rst-current-version\n display: none\n",".highlight\n &:hover button.copybtn\n color: var(--color-code-foreground)\n\n button.copybtn\n // Make it visible\n opacity: 1\n\n // Align things correctly\n align-items: center\n\n height: 1.25em\n width: 1.25em\n\n top: 0.625rem // $code-spacing-vertical\n right: 0.5rem\n\n // Make it look better\n color: var(--color-background-item)\n background-color: var(--color-code-background)\n border: none\n\n // Change to cursor to make it obvious that you can click on it\n cursor: pointer\n\n // Transition smoothly, for aesthetics\n transition: color 300ms, opacity 300ms\n\n &:hover\n color: var(--color-brand-content)\n background-color: var(--color-code-background)\n\n &::after\n display: none\n color: var(--color-code-foreground)\n background-color: transparent\n\n &.success\n transition: color 0ms\n color: #22863a\n &::after\n display: block\n\n svg\n padding: 0\n","body\n // Colors\n --sd-color-primary: var(--color-brand-primary)\n --sd-color-primary-highlight: var(--color-brand-content)\n --sd-color-primary-text: var(--color-background-primary)\n\n // Shadows\n --sd-color-shadow: rgba(0, 0, 0, 0.05)\n\n // Cards\n --sd-color-card-border: var(--color-card-border)\n --sd-color-card-border-hover: var(--color-brand-content)\n --sd-color-card-background: var(--color-card-background)\n --sd-color-card-text: var(--color-foreground-primary)\n --sd-color-card-header: var(--color-card-marginals-background)\n --sd-color-card-footer: var(--color-card-marginals-background)\n\n // Tabs\n --sd-color-tabs-label-active: var(--color-brand-content)\n --sd-color-tabs-label-hover: var(--color-foreground-muted)\n --sd-color-tabs-label-inactive: var(--color-foreground-muted)\n --sd-color-tabs-underline-active: var(--color-brand-content)\n --sd-color-tabs-underline-hover: var(--color-foreground-border)\n --sd-color-tabs-underline-inactive: var(--color-background-border)\n --sd-color-tabs-overline: var(--color-background-border)\n --sd-color-tabs-underline: var(--color-background-border)\n\n// Tabs\n.sd-tab-content\n box-shadow: 0 -2px var(--sd-color-tabs-overline), 0 1px var(--sd-color-tabs-underline)\n\n// Shadows\n.sd-card // Have a shadow by default\n box-shadow: 0 0.1rem 0.25rem var(--sd-color-shadow), 0 0 0.0625rem rgba(0, 0, 0, 0.1)\n\n.sd-shadow-sm\n box-shadow: 0 0.1rem 0.25rem var(--sd-color-shadow), 0 0 0.0625rem rgba(0, 0, 0, 0.1) !important\n\n.sd-shadow-md\n box-shadow: 0 0.3rem 0.75rem var(--sd-color-shadow), 0 0 0.0625rem rgba(0, 0, 0, 0.1) !important\n\n.sd-shadow-lg\n box-shadow: 0 0.6rem 1.5rem var(--sd-color-shadow), 0 0 0.0625rem rgba(0, 0, 0, 0.1) !important\n\n// Cards\n.sd-card-hover:hover // Don't change scale on hover\n transform: none\n\n.sd-cards-carousel // Have a bit of gap in the carousel by default\n gap: 0.25rem\n padding: 0.25rem\n","// This file contains styles to tweak sphinx-inline-tabs to work well with Furo.\n\nbody\n --tabs--label-text: var(--color-foreground-muted)\n --tabs--label-text--hover: var(--color-foreground-muted)\n --tabs--label-text--active: var(--color-brand-content)\n --tabs--label-text--active--hover: var(--color-brand-content)\n --tabs--label-background: transparent\n --tabs--label-background--hover: transparent\n --tabs--label-background--active: transparent\n --tabs--label-background--active--hover: transparent\n --tabs--padding-x: 0.25em\n --tabs--margin-x: 1em\n --tabs--border: var(--color-background-border)\n --tabs--label-border: transparent\n --tabs--label-border--hover: var(--color-foreground-muted)\n --tabs--label-border--active: var(--color-brand-content)\n --tabs--label-border--active--hover: var(--color-brand-content)\n","// This file contains styles to tweak sphinx-panels to work well with Furo.\n\n// sphinx-panels includes Bootstrap 4, which uses .container which can conflict\n// with docutils' `.. container::` directive.\n[role=\"main\"] .container\n max-width: initial\n padding-left: initial\n padding-right: initial\n\n// Make the panels look nicer!\n.shadow.docutils\n border: none\n box-shadow: 0 0.2rem 0.5rem rgba(0, 0, 0, 0.05), 0 0 0.0625rem rgba(0, 0, 0, 0.1) !important\n\n// Make panel colors respond to dark mode\n.sphinx-bs .card\n background-color: var(--color-background-secondary)\n color: var(--color-foreground)\n"],"names":[],"sourceRoot":""} -------------------------------------------------------------------------------- /tweetopic/btm.py: -------------------------------------------------------------------------------- 1 | """Module containing sklearn compatible Biterm Topic Model.""" 2 | 3 | from __future__ import annotations 4 | 5 | from typing import Optional, Union 6 | 7 | import numpy as np 8 | import scipy.sparse as spr 9 | import sklearn 10 | from numpy.typing import ArrayLike 11 | 12 | from tweetopic._btm import (compute_biterm_set, corpus_unique_biterms, 13 | fit_model, predict_docs) 14 | from tweetopic._doc import init_doc_words 15 | from tweetopic.exceptions import NotFittedException 16 | from tweetopic.utils import set_numba_seed 17 | 18 | 19 | class BTM(sklearn.base.TransformerMixin, sklearn.base.BaseEstimator): 20 | """Implementation of the Biterm Topic Model with Gibbs Sampling 21 | solver. 22 | 23 | Parameters 24 | ---------- 25 | n_components: int 26 | Number of topics in the model. 27 | n_iterations: int, default 100 28 | Number of iterations furing fitting. 29 | alpha: float, default 6.0 30 | Dirichlet prior for topic distribution. 31 | beta: float, default 0.1 32 | Dirichlet prior for topic-word distribution. 33 | 34 | Attributes 35 | ---------- 36 | components_: array of shape (n_components, n_vocab) 37 | Conditional probabilities of all terms given a topic. 38 | topic_distribution: array of shape (n_components,) 39 | Prior probability of each topic. 40 | n_features_in_: int 41 | Number of total vocabulary items seen during fitting. 42 | random_state: int, default None 43 | Random seed to use for reproducibility. 44 | """ 45 | 46 | def __init__( 47 | self, 48 | n_components: int, 49 | n_iterations: int = 100, 50 | alpha: float = 6.0, 51 | beta: float = 0.1, 52 | random_state: Optional[int] = None, 53 | ): 54 | self.n_components = n_components 55 | self.n_iterations = n_iterations 56 | self.alpha = alpha 57 | self.beta = beta 58 | self.random_state = random_state 59 | # Not none for typing reasons 60 | self.components_ = np.array(0) 61 | self.topic_distribution = None 62 | self.n_features_in_ = 0 63 | 64 | @property 65 | def _fitted(self) -> bool: 66 | """Property describing whether the model is fitted.""" 67 | return self.topic_distribution is not None 68 | 69 | def _check_fitted(self) -> None: 70 | """Raise exception if the model is not fitted.""" 71 | if not self._fitted: 72 | raise NotFittedException 73 | 74 | def get_params(self, deep: bool = False) -> dict: 75 | """Get parameters for this estimator. 76 | 77 | Parameters 78 | ---------- 79 | deep: bool, default False 80 | Ignored, exists for sklearn compatibility. 81 | 82 | Returns 83 | ------- 84 | dict 85 | Parameter names mapped to their values. 86 | 87 | Note 88 | ---- 89 | Exists for sklearn compatibility. 90 | """ 91 | return { 92 | "n_components": self.n_components, 93 | "n_iterations": self.n_iterations, 94 | "alpha": self.alpha, 95 | "beta": self.beta, 96 | } 97 | 98 | def set_params(self, **params) -> BTM: 99 | """Set parameters for this estimator. 100 | 101 | Returns 102 | ------- 103 | BTM 104 | Estimator instance 105 | 106 | Note 107 | ---- 108 | Exists for sklearn compatibility. 109 | """ 110 | for param, value in params.items(): 111 | self.__setattr__(param, value) 112 | return self 113 | 114 | def fit(self, X: Union[spr.spmatrix, ArrayLike], y: None = None): 115 | """Fits the model using Gibbs Sampling. Detailed description of the 116 | algorithm in Yan et al. (2013). 117 | 118 | Parameters 119 | ---------- 120 | X: array-like or sparse matrix of shape (n_samples, n_features) 121 | BOW matrix of corpus. 122 | y: None 123 | Ignored, exists for sklearn compatibility. 124 | 125 | Returns 126 | ------- 127 | BTM 128 | The fitted model. 129 | 130 | Note 131 | ---- 132 | fit() works in-place too, the fitted model is returned for convenience. 133 | """ 134 | if self.random_state is not None: 135 | set_numba_seed(self.random_state) 136 | # Converting X into sparse array if it isn't one already. 137 | X = spr.csr_matrix(X) 138 | _, self.n_features_in_ = X.shape 139 | # Calculating the number of nonzero elements for each row 140 | # using the internal properties of CSR matrices. 141 | max_unique_words = np.max(np.diff(X.indptr)) 142 | print("Extracting biterms.") 143 | doc_unique_words, doc_unique_word_counts = init_doc_words( 144 | X.tolil(), 145 | max_unique_words=max_unique_words, 146 | ) 147 | biterms = corpus_unique_biterms(doc_unique_words, doc_unique_word_counts) 148 | biterm_set = compute_biterm_set(biterms) 149 | self.topic_distribution, self.components_ = fit_model( 150 | n_iter=self.n_iterations, 151 | alpha=self.alpha, 152 | beta=self.beta, 153 | n_components=self.n_components, 154 | n_vocab=self.n_features_in_, 155 | biterms=biterm_set, 156 | ) 157 | return self 158 | 159 | # TODO: Something goes terribly wrong here, fix this 160 | 161 | def transform(self, X: Union[spr.spmatrix, ArrayLike]) -> np.ndarray: 162 | """Predicts probabilities for each document belonging to each 163 | topic. 164 | 165 | Parameters 166 | ---------- 167 | X: array-like or sparse matrix of shape (n_samples, n_features) 168 | Document-term matrix. 169 | 170 | Returns 171 | ------- 172 | array of shape (n_samples, n_components) 173 | Probabilities for each document belonging to each cluster. 174 | 175 | Raises 176 | ------ 177 | NotFittedException 178 | If the model is not fitted, an exception will be raised 179 | """ 180 | self._check_fitted() 181 | # Converting X into sparse array if it isn't one already. 182 | X = spr.csr_matrix(X) 183 | # n_samples, _ = X.shape 184 | sample_max_unique_words = np.max(np.diff(X.indptr)) 185 | doc_unique_words, doc_unique_word_counts = init_doc_words( 186 | X.tolil(), 187 | max_unique_words=sample_max_unique_words, 188 | ) 189 | return predict_docs( 190 | topic_distribution=self.topic_distribution, # type: ignore 191 | topic_word_distribution=self.components_, 192 | doc_unique_words=doc_unique_words, 193 | doc_unique_word_counts=doc_unique_word_counts, 194 | ) 195 | 196 | def fit_transform( 197 | self, 198 | X: Union[spr.spmatrix, ArrayLike], 199 | y: None = None, 200 | ) -> np.ndarray: 201 | """Fits the model, then transforms the given data. 202 | 203 | Parameters 204 | ---------- 205 | X: array-like or sparse matrix of shape (n_samples, n_features) 206 | Document-term matrix. 207 | y: None 208 | Ignored, sklearn compatibility. 209 | 210 | Returns 211 | ------- 212 | array of shape (n_samples, n_components) 213 | Probabilities for each document belonging to each cluster. 214 | """ 215 | return self.fit(X).transform(X) 216 | -------------------------------------------------------------------------------- /docs/_build/html/searchindex.js: -------------------------------------------------------------------------------- 1 | Search.setIndex({"docnames": ["index", "installation", "tweetopic.btm", "tweetopic.dmm", "using_tweetopic.btm", "using_tweetopic.dmm", "using_tweetopic.model_persistence", "using_tweetopic.pipeline", "using_tweetopic.visualization"], "filenames": ["index.rst", "installation.rst", "tweetopic.btm.rst", "tweetopic.dmm.rst", "using_tweetopic.btm.rst", "using_tweetopic.dmm.rst", "using_tweetopic.model_persistence.rst", "using_tweetopic.pipeline.rst", "using_tweetopic.visualization.rst"], "titles": ["tweetopic", "Installation", "BTM", "DMM", "Biterm Topic Model", "Dirichlet Multinomial Mixture Model", "Model persistence", "Pipelines", "Visualization"], "terms": {"i": [0, 2, 3, 4, 5, 7], "python": 0, "librari": [0, 7], "blaze": 0, "fast": 0, "conveni": [0, 2, 3], "topic": [0, 2, 3, 5, 7, 8], "model": [0, 2, 3, 7, 8], "tweet": 0, "other": 0, "short": [0, 4, 5], "text": [0, 4, 5, 7, 8], "thi": [0, 2, 3, 4], "websit": 0, "contain": [0, 3, 4], "well": [0, 4, 5], "guid": [0, 2, 3], "instal": [0, 8], "dirichlet": [0, 2, 3], "multinomi": 0, "mixtur": [0, 3], "biterm": [0, 2], "pipelin": [0, 8], "visual": 0, "persist": 0, "dmm": [0, 4, 5, 6, 7, 8], "btm": [0, 4], "type": [2, 3], "github": 0, "repositori": 0, "tweetop": [1, 2, 3, 4, 5, 7], "can": [1, 4, 5, 8], "simpli": 1, "pypi": [1, 8], "packag": 1, "pip": [1, 8], "usag": [2, 3], "class": [2, 3, 4, 5], "n_compon": [2, 3, 4, 5, 7], "int": [2, 3], "n_iter": [2, 3, 4, 5, 7], "50": 3, "alpha": [2, 3, 4, 5, 7], "float": [2, 3], "0": [2, 3, 4, 5, 7], "1": [2, 3, 5, 7], "beta": [2, 3, 4, 5, 7], "implement": [2, 3], "gibb": [2, 3, 4, 5], "sampl": [2, 3, 4, 5], "solver": [2, 3], "The": [2, 3, 4, 5], "aim": 3, "achiev": 3, "full": 3, "compat": [2, 3, 8], "sklearn": [2, 3, 6, 7, 8], "paramet": [2, 3, 5], "number": [2, 3, 4, 5], "compon": [3, 5, 7, 8], "default": [2, 3], "iter": [2, 3, 4, 5], "dure": [2, 3], "fit": [2, 3, 4, 5, 7], "If": [2, 3], "you": [3, 6, 7, 8], "find": 3, "your": [3, 5, 8], "result": [3, 4, 5], "ar": [3, 4, 5], "unsatisfactori": 3, "increas": [3, 4, 5], "willing": [3, 5], "document": [2, 3, 4, 5, 6, 8], "join": [3, 5], "an": [2, 3, 4, 5, 7], "empti": 3, "cluster": [2, 3, 4, 5], "where": [3, 5], "term": [2, 3, 4, 5], "present": 3, "components_": [2, 3], "describ": [3, 4], "all": [2, 3, 5, 8], "distribut": [2, 3, 4, 5], "amount": 3, "each": [2, 3, 5], "word": [2, 3, 4, 5, 7], "ha": [3, 5], "been": [3, 5], "assign": 3, "arrai": [2, 3, 6], "shape": [2, 3], "n_vocab": [2, 3], "cluster_doc_count": 3, "how": [3, 4], "mani": 3, "n_features_in_": [2, 3], "total": [2, 3], "vocabulari": [2, 3], "item": [2, 3], "seen": [2, 3], "n_document": 3, "max_unique_word": 3, "maximum": 3, "uniqu": 3, "get_param": [2, 3], "deep": [2, 3], "bool": [2, 3], "fals": [2, 3], "dict": [2, 3], "get": [2, 3], "estim": [2, 3], "ignor": [2, 3], "exist": [2, 3], "return": [2, 3], "name": [2, 3], "map": [2, 3], "valu": [2, 3], "set_param": [2, 3], "param": [2, 3], "set": [2, 3], "instanc": [2, 3], "x": [2, 3, 4], "spmatrix": [2, 3], "arraylik": [2, 3], "y": [2, 3, 4], "none": [2, 3], "us": [2, 3, 4, 5, 6, 7, 8], "detail": [2, 3], "descript": [2, 3], "algorithm": [2, 3, 5], "yin": [3, 5], "wang": [3, 5], "2014": [3, 5], "like": [2, 3], "spars": [2, 3], "matrix": [2, 3, 4, 5], "n_sampl": [2, 3], "n_featur": [2, 3], "bow": [2, 3], "corpu": [2, 3, 8], "work": [2, 3, 4, 5, 8], "place": [2, 3], "too": [2, 3], "transform": [2, 3, 4, 5], "ndarrai": [2, 3], "predict": [2, 3, 4, 5], "probabl": [2, 3], "belong": [2, 3], "rais": [2, 3], "notfittedexcept": [2, 3], "except": [2, 3], "predict_proba": 3, "alia": 3, "mainli": 3, "densiti": 3, "label": [3, 4, 5], "fit_transform": [2, 3], "given": [2, 3], "data": [2, 3, 5, 7], "provid": [4, 5], "util": [4, 5], "gener": [4, 5], "probabilist": [4, 5], "instead": 4, "process": [4, 5, 7], "focu": 4, "coocurr": 4, "from": [4, 5, 6, 7, 8], "allow": 4, "them": 4, "captur": 4, "relat": 4, "better": [4, 5], "unlik": 4, "thei": 4, "also": [4, 5], "corpora": 4, "longer": 4, "graphic": [4, 5], "plate": [4, 5], "notat": [4, 5], "yan": [2, 4], "et": [2, 4], "al": [2, 4], "2013": [2, 4], "sinc": [4, 5, 6], "\u1e3fcmc": [4, 5], "method": [4, 5], "usual": [4, 5], "converg": [4, 5], "api": [4, 5], "creat": [4, 5, 7], "import": [4, 5, 6, 7, 8], "15": [4, 5, 7], "200": [4, 5, 7], "6": [2, 4], "2": [4, 5, 7], "doc_term_matrix": [4, 5], "unseen": [4, 5], "new_doc": [4, 5], "guo": 4, "j": [4, 5], "lan": 4, "cheng": 4, "A": [4, 5], "proceed": [4, 5], "22nd": 4, "intern": [4, 5], "confer": [4, 5], "world": 4, "wide": 4, "web": 4, "1445": 4, "1456": 4, "\u03c0\u03b1\u03c1\u03bf\u03c5\u03c3\u03b9\u03ac\u03c3\u03c4\u03b7\u03ba\u03b5": 4, "\u03c3\u03c4\u03bf": 4, "rio": 4, "de": 4, "janeiro": 4, "brazil": 4, "doi": 4, "10": 4, "1145": 4, "2488388": 4, "2488514": 4, "simul": 5, "1000": 5, "observ": 5, "drawn": 5, "sourc": 5, "wikipedia": 5, "assum": 5, "point": 5, "popul": 5, "unknown": 5, "thought": 5, "fuzzi": 5, "emploi": 5, "It": [5, 7], "demonstr": 5, "particularli": 5, "over": 5, "explain": 5, "movi": 5, "group": 5, "analogi": 5, "procedur": 5, "student": 5, "classroom": 5, "have": 5, "divid": 5, "themselv": 5, "up": 5, "base": 5, "prefer": 5, "express": 5, "form": 5, "list": 5, "favorit": 5, "analog": 5, "being": 5, "In": 5, "try": 5, "choos": 5, "tabl": 5, "two": [5, 7], "rule": 5, "more": [5, 6, 8], "similar": 5, "own": 5, "here": 5, "repres": 5, "lower": 5, "while": 5, "differ": 5, "approach": 5, "20th": 5, "acm": 5, "sigkdd": 5, "knowledg": 5, "discoveri": 5, "mine": 5, "pp": 5, "233": 5, "242": 5, "associ": 5, "comput": 5, "machineri": 5, "For": [6, 8], "we": [6, 7, 8], "suggest": 6, "joblib": 6, "store": 6, "numpi": 6, "much": 6, "effici": 6, "dump": 6, "load": 6, "dmm_model": 6, "mai": 6, "follow": 6, "comprehens": 6, "overview": 6, "limit": 6, "consult": [6, 8], "": [6, 7], "To": 7, "avoid": 7, "leakag": 7, "make": 7, "easier": 7, "oper": 7, "recommend": [7, 8], "scikit": 7, "learn": 7, "vector": [7, 8], "feature_extract": 7, "countvector": 7, "stop_word": 7, "english": 7, "max_df": 7, "3": 7, "min_df": 7, "add": 7, "stream": 7, "highli": 7, "advis": 7, "pre": 7, "nlp": 7, "spaci": 7, "nltk": 7, "remov": 7, "stop": 7, "function": 7, "lemmat": 7, "could": 7, "drastic": 7, "improv": 7, "qualiti": 7, "topicwizard": 8, "which": 8, "nativ": 8, "wizard": 8, "either": 8, "individu": 8, "pass": 8, "whole": 8, "topic_model": 8, "inform": 8, "100": 2, "fure": 2, "prior": 2, "condit": 2, "topic_distribut": 2}, "objects": {"tweetopic.btm": [[2, 0, 1, "", "BTM"]], "tweetopic.btm.BTM": [[2, 1, 1, "", "components_"], [2, 2, 1, "", "fit"], [2, 2, 1, "", "fit_transform"], [2, 2, 1, "", "get_params"], [2, 1, 1, "", "n_features_in_"], [2, 2, 1, "", "set_params"], [2, 1, 1, "", "topic_distribution"], [2, 2, 1, "", "transform"]], "tweetopic.dmm": [[3, 0, 1, "", "DMM"]], "tweetopic.dmm.DMM": [[3, 1, 1, "", "cluster_doc_count"], [3, 1, 1, "", "components_"], [3, 2, 1, "", "fit"], [3, 2, 1, "", "fit_transform"], [3, 2, 1, "", "get_params"], [3, 1, 1, "", "max_unique_words"], [3, 1, 1, "", "n_documents"], [3, 1, 1, "", "n_features_in_"], [3, 2, 1, "", "predict"], [3, 2, 1, "", "predict_proba"], [3, 2, 1, "", "set_params"], [3, 2, 1, "", "transform"]]}, "objtypes": {"0": "py:class", "1": "py:attribute", "2": "py:method"}, "objnames": {"0": ["py", "class", "Python class"], "1": ["py", "attribute", "Python attribute"], "2": ["py", "method", "Python method"]}, "titleterms": {"tweetop": 0, "get": 0, "start": 0, "usag": [0, 4, 5], "api": 0, "refer": [0, 4, 5], "instal": 1, "btm": 2, "dmm": 3, "type": [], "biterm": 4, "topic": 4, "model": [4, 5, 6], "dirichlet": 5, "multinomi": 5, "mixtur": 5, "persist": 6, "pipelin": 7, "visual": 8}, "envversion": {"sphinx.domains.c": 2, "sphinx.domains.changeset": 1, "sphinx.domains.citation": 1, "sphinx.domains.cpp": 8, "sphinx.domains.index": 1, "sphinx.domains.javascript": 2, "sphinx.domains.math": 2, "sphinx.domains.python": 3, "sphinx.domains.rst": 2, "sphinx.domains.std": 2, "sphinx": 57}, "alltitles": {"Installation": [[1, "installation"]], "DMM": [[3, "dmm"]], "Biterm Topic Model": [[4, "biterm-topic-model"]], "Usage": [[4, "usage"], [5, "usage"], [0, null]], "References": [[4, "references"], [5, "references"]], "Dirichlet Multinomial Mixture Model": [[5, "dirichlet-multinomial-mixture-model"]], "Model persistence": [[6, "model-persistence"]], "Pipelines": [[7, "pipelines"]], "Visualization": [[8, "visualization"]], "BTM": [[2, "btm"]], "tweetopic": [[0, "tweetopic"]], "Getting started": [[0, null]], "API reference": [[0, null]]}, "indexentries": {"btm (class in tweetopic.btm)": [[2, "tweetopic.btm.BTM"]], "components_ (tweetopic.btm.btm attribute)": [[2, "tweetopic.btm.BTM.components_"]], "fit() (tweetopic.btm.btm method)": [[2, "tweetopic.btm.BTM.fit"]], "fit_transform() (tweetopic.btm.btm method)": [[2, "tweetopic.btm.BTM.fit_transform"]], "get_params() (tweetopic.btm.btm method)": [[2, "tweetopic.btm.BTM.get_params"]], "n_features_in_ (tweetopic.btm.btm attribute)": [[2, "tweetopic.btm.BTM.n_features_in_"]], "set_params() (tweetopic.btm.btm method)": [[2, "tweetopic.btm.BTM.set_params"]], "topic_distribution (tweetopic.btm.btm attribute)": [[2, "tweetopic.btm.BTM.topic_distribution"]], "transform() (tweetopic.btm.btm method)": [[2, "tweetopic.btm.BTM.transform"]], "dmm (class in tweetopic.dmm)": [[3, "tweetopic.dmm.DMM"]], "cluster_doc_count (tweetopic.dmm.dmm attribute)": [[3, "tweetopic.dmm.DMM.cluster_doc_count"]], "components_ (tweetopic.dmm.dmm attribute)": [[3, "tweetopic.dmm.DMM.components_"]], "fit() (tweetopic.dmm.dmm method)": [[3, "tweetopic.dmm.DMM.fit"]], "fit_transform() (tweetopic.dmm.dmm method)": [[3, "tweetopic.dmm.DMM.fit_transform"]], "get_params() (tweetopic.dmm.dmm method)": [[3, "tweetopic.dmm.DMM.get_params"]], "max_unique_words (tweetopic.dmm.dmm attribute)": [[3, "tweetopic.dmm.DMM.max_unique_words"]], "n_documents (tweetopic.dmm.dmm attribute)": [[3, "tweetopic.dmm.DMM.n_documents"]], "n_features_in_ (tweetopic.dmm.dmm attribute)": [[3, "tweetopic.dmm.DMM.n_features_in_"]], "predict() (tweetopic.dmm.dmm method)": [[3, "tweetopic.dmm.DMM.predict"]], "predict_proba() (tweetopic.dmm.dmm method)": [[3, "tweetopic.dmm.DMM.predict_proba"]], "set_params() (tweetopic.dmm.dmm method)": [[3, "tweetopic.dmm.DMM.set_params"]], "transform() (tweetopic.dmm.dmm method)": [[3, "tweetopic.dmm.DMM.transform"]]}}) -------------------------------------------------------------------------------- /tweetopic/dmm.py: -------------------------------------------------------------------------------- 1 | """Module containing a fully sklearn compatible Dirichlet Mixture Model.""" 2 | 3 | from __future__ import annotations 4 | 5 | from typing import Optional, Union 6 | 7 | import numpy as np 8 | import scipy.sparse as spr 9 | import sklearn 10 | from numpy.typing import ArrayLike 11 | 12 | from tweetopic._dmm import fit_model, init_clusters, predict_doc 13 | from tweetopic._doc import init_doc_words 14 | from tweetopic.exceptions import NotFittedException 15 | from tweetopic.utils import set_numba_seed 16 | 17 | 18 | class DMM(sklearn.base.TransformerMixin, sklearn.base.BaseEstimator): 19 | """Implementation of the Dirichlet Mixture Model with Gibbs Sampling 20 | solver. The class aims to achieve full compatibility with sklearn. 21 | 22 | Parameters 23 | ---------- 24 | n_components: int 25 | Number of mixture components in the model. 26 | n_iterations: int, default 50 27 | Number of iterations during fitting. 28 | If you find your results are unsatisfactory, increase this number. 29 | alpha: float, default 0.1 30 | Willingness of a document joining an empty cluster. 31 | beta: float, default 0.1 32 | Willingness to join clusters, where the terms in the document 33 | are not present. 34 | random_state: int, default None 35 | Random seed to use for reproducibility. 36 | 37 | Attributes 38 | ---------- 39 | components_: array of shape (n_components, n_vocab) 40 | Describes all components of the topic distribution. 41 | Contains the amount each word has been assigned to each component 42 | during fitting. 43 | cluster_doc_count: array of shape (n_components,) 44 | Array containing how many documents there are in each cluster. 45 | n_features_in_: int 46 | Number of total vocabulary items seen during fitting. 47 | n_documents: int 48 | Total number of documents seen during fitting. 49 | max_unique_words: int 50 | Maximum number of unique words in a document seen during fitting. 51 | """ 52 | 53 | def __init__( 54 | self, 55 | n_components: int, 56 | n_iterations: int = 50, 57 | alpha: float = 0.1, 58 | beta: float = 0.1, 59 | random_state: Optional[int] = None, 60 | ): 61 | self.n_components = n_components 62 | self.n_iterations = n_iterations 63 | self.alpha = alpha 64 | self.beta = beta 65 | self.random_state = random_state 66 | # Not none for typing reasons 67 | self.components_ = np.array(0) 68 | self.cluster_doc_count = None 69 | self.cluster_word_count = None 70 | self.n_features_in_ = 0 71 | self.n_documents = 0 72 | self.max_unique_words = 0 73 | 74 | @property 75 | def _fitted(self) -> bool: 76 | """Property describing whether the model is fitted.""" 77 | # If the number of documents seen is more than 0 78 | # It can be assumed that the model is fitted. 79 | return bool(self.n_documents) 80 | 81 | def _check_fitted(self) -> None: 82 | """Raise exception if the model is not fitted.""" 83 | if not self._fitted: 84 | raise NotFittedException 85 | 86 | def get_params(self, deep: bool = False) -> dict: 87 | """Get parameters for this estimator. 88 | 89 | Parameters 90 | ---------- 91 | deep: bool, default False 92 | Ignored, exists for sklearn compatibility. 93 | 94 | Returns 95 | ------- 96 | dict 97 | Parameter names mapped to their values. 98 | 99 | Note 100 | ---- 101 | Exists for sklearn compatibility. 102 | """ 103 | return { 104 | "n_components": self.n_components, 105 | "n_iterations": self.n_iterations, 106 | "alpha": self.alpha, 107 | "beta": self.beta, 108 | } 109 | 110 | def set_params(self, **params) -> DMM: 111 | """Set parameters for this estimator. 112 | 113 | Returns 114 | ------- 115 | DMM 116 | Estimator instance 117 | 118 | Note 119 | ---- 120 | Exists for sklearn compatibility. 121 | """ 122 | for param, value in params: 123 | self.__setattr__(param, value) 124 | return self 125 | 126 | def fit(self, X: Union[spr.spmatrix, ArrayLike], y: None = None): 127 | """Fits the model using Gibbs Sampling. Detailed description of the 128 | algorithm in Yin and Wang (2014). 129 | 130 | Parameters 131 | ---------- 132 | X: array-like or sparse matrix of shape (n_samples, n_features) 133 | BOW matrix of corpus. 134 | y: None 135 | Ignored, exists for sklearn compatibility. 136 | 137 | Returns 138 | ------- 139 | DMM 140 | The fitted model. 141 | 142 | Note 143 | ---- 144 | fit() works in-place too, the fitted model is returned for convenience. 145 | """ 146 | if self.random_state is not None: 147 | set_numba_seed(self.random_state) 148 | # Converting X into sparse array if it isn't one already. 149 | X = spr.csr_matrix(X) 150 | self.n_documents, self.n_features_in_ = X.shape 151 | # Calculating the number of nonzero elements for each row 152 | # using the internal properties of CSR matrices. 153 | print("Initializing components.") 154 | self.max_unique_words = np.max(np.diff(X.indptr)) 155 | doc_unique_words, doc_unique_word_counts = init_doc_words( 156 | X.tolil(), 157 | max_unique_words=self.max_unique_words, 158 | ) 159 | initial_clusters = np.random.multinomial( 160 | 1, 161 | np.ones(self.n_components) / self.n_components, 162 | size=self.n_documents, 163 | ) 164 | doc_clusters = np.argmax(initial_clusters, axis=1) 165 | self.cluster_doc_count = np.zeros(self.n_components) 166 | self.components_ = np.zeros((self.n_components, self.n_features_in_)) 167 | self.cluster_word_count = np.zeros(self.n_components) 168 | init_clusters( 169 | cluster_word_distribution=self.components_, 170 | cluster_word_count=self.cluster_word_count, 171 | cluster_doc_count=self.cluster_doc_count, 172 | doc_clusters=doc_clusters, 173 | doc_unique_words=doc_unique_words, 174 | doc_unique_word_counts=doc_unique_word_counts, 175 | max_unique_words=self.max_unique_words, 176 | ) 177 | fit_model( 178 | n_iter=self.n_iterations, 179 | alpha=self.alpha, 180 | beta=self.beta, 181 | n_clusters=self.n_components, 182 | n_vocab=self.n_features_in_, 183 | n_docs=self.n_documents, 184 | doc_unique_words=doc_unique_words, 185 | doc_unique_word_counts=doc_unique_word_counts, 186 | doc_clusters=doc_clusters, 187 | cluster_doc_count=self.cluster_doc_count, 188 | cluster_word_count=self.cluster_word_count, 189 | cluster_word_distribution=self.components_, 190 | max_unique_words=self.max_unique_words, 191 | ) 192 | return self 193 | 194 | def transform(self, X: Union[spr.spmatrix, ArrayLike]) -> np.ndarray: 195 | """Predicts probabilities for each document belonging to each 196 | component. 197 | 198 | Parameters 199 | ---------- 200 | X: array-like or sparse matrix of shape (n_samples, n_features) 201 | Document-term matrix. 202 | 203 | Returns 204 | ------- 205 | array of shape (n_samples, n_components) 206 | Probabilities for each document belonging to each cluster. 207 | 208 | Raises 209 | ------ 210 | NotFittedException 211 | If the model is not fitted, an exception will be raised 212 | """ 213 | self._check_fitted() 214 | # Converting X into sparse array if it isn't one already. 215 | X = spr.csr_matrix(X) 216 | sample_max_unique_words = np.max(np.diff(X.indptr)) 217 | doc_unique_words, doc_unique_word_counts = init_doc_words( 218 | X.tolil(), 219 | max_unique_words=sample_max_unique_words, 220 | ) 221 | doc_words_count = np.sum(doc_unique_word_counts, axis=1) 222 | n_docs = X.shape[0] 223 | predictions = [] 224 | for i_doc in range(n_docs): 225 | pred = np.zeros(self.n_components) 226 | predict_doc( 227 | probabilities=pred, 228 | i_document=i_doc, 229 | doc_unique_words=doc_unique_words, 230 | doc_unique_word_counts=doc_unique_word_counts, 231 | n_words=doc_words_count[i_doc], 232 | alpha=self.alpha, 233 | beta=self.beta, 234 | n_clusters=self.n_components, 235 | n_vocab=self.n_features_in_, 236 | n_docs=n_docs, 237 | cluster_doc_count=self.cluster_doc_count, # type: ignore 238 | cluster_word_count=self.cluster_word_count, # type: ignore 239 | cluster_word_distribution=self.components_, # type: ignore 240 | max_unique_words=sample_max_unique_words, 241 | ) 242 | predictions.append(pred) 243 | return np.stack(predictions) 244 | 245 | def predict_proba(self, X: Union[spr.spmatrix, ArrayLike]) -> np.ndarray: 246 | """Alias of :meth:`~tweetopic.dmm.DMM.transform` . 247 | 248 | Mainly exists for compatibility with density estimators in 249 | sklearn. 250 | """ 251 | return self.transform(X) 252 | 253 | def predict(self, X: Union[spr.spmatrix, ArrayLike]) -> np.ndarray: 254 | """Predicts cluster labels for a set of documents. Mainly exists for 255 | compatibility with density estimators in sklearn. 256 | 257 | Parameters 258 | ---------- 259 | X: array-like or sparse matrix of shape (n_samples, n_features) 260 | Document-term matrix. 261 | 262 | Returns 263 | ------- 264 | array of shape (n_samples,) 265 | Cluster label for each document. 266 | 267 | Raises 268 | ------ 269 | NotFittedException 270 | If the model is not fitted, an exception will be raised 271 | """ 272 | return np.argmax(self.transform(X), axis=1) 273 | 274 | def fit_transform( 275 | self, 276 | X: Union[spr.spmatrix, ArrayLike], 277 | y: None = None, 278 | ) -> np.ndarray: 279 | """Fits the model, then transforms the given data. 280 | 281 | Parameters 282 | ---------- 283 | X: array-like or sparse matrix of shape (n_samples, n_features) 284 | Document-term matrix. 285 | y: None 286 | Ignored, sklearn compatibility. 287 | 288 | Returns 289 | ------- 290 | array of shape (n_samples, n_components) 291 | Probabilities for each document belonging to each cluster. 292 | """ 293 | return self.fit(X).transform(X) 294 | -------------------------------------------------------------------------------- /docs/_build/html/search.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | Search - 8 | 9 | 10 | 11 | 12 | 13 | 14 | 41 | 42 | 43 | 46 | 47 | 48 | 49 | 50 | Contents 51 | 52 | 53 | 54 | 55 | 56 | Menu 57 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | Expand 66 | 68 | 69 | 70 | 71 | 72 | Light mode 73 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | Dark mode 88 | 90 | 91 | 92 | 93 | 94 | 95 | Auto light/dark mode 96 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | 106 | 107 | 108 | 109 | 110 | 111 | 114 | 117 | 118 | 119 | 120 |
121 |
122 |
123 | 127 |
128 |
129 |
130 |
131 |
132 |
133 | 139 |
140 | 144 |
145 |
146 | 191 |
192 |
193 |
194 | 195 | 196 | 197 | 198 | Back to top 199 | 200 |
201 |
202 | 208 |
209 | 213 |
214 |
215 | 216 | 224 | 225 |
226 | 227 |
228 |
229 |
230 | 231 | 235 |
236 |
237 | 240 | Made with Sphinx and @pradyunsg's 241 | 242 | Furo 243 | 244 |
245 |
246 | 247 |
248 |
249 | 250 |
251 |
252 | 257 |
258 |
259 | 260 | 261 | 262 | 263 | 264 | 265 | 266 | -------------------------------------------------------------------------------- /docs/_build/html/tweetopic.typing.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | Typing - 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 44 | 45 | 46 | 49 | 50 | 51 | 52 | 53 | Contents 54 | 55 | 56 | 57 | 58 | 59 | Menu 60 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | Expand 69 | 71 | 72 | 73 | 74 | 75 | Light mode 76 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | Dark mode 91 | 93 | 94 | 95 | 96 | 97 | 98 | Auto light/dark mode 99 | 101 | 102 | 103 | 104 | 105 | 106 | 107 | 108 | 109 | 110 | 111 | 112 | 113 | 114 | 117 | 120 | 121 | 122 | 123 |
124 |
125 |
126 | 130 |
131 |
132 |
133 |
134 |
135 |
136 | 142 |
143 | 147 |
148 |
149 | 191 |
192 |
193 |
194 | 195 | 196 | 197 | 198 | Back to top 199 | 200 |
201 | 202 |
203 | 209 |
210 | 214 |
215 |
216 |
217 |

Typing#

218 |
219 | 220 |
221 |
222 | 254 |
255 | 260 |
261 |
262 | 263 | 264 | 265 | 266 | --------------------------------------------------------------------------------