├── .flake8
├── .gitignore
├── .travis.yml
├── AUTHORS.rst
├── CONTRIBUTING.rst
├── HISTORY.rst
├── LICENSE
├── MANIFEST.in
├── Makefile
├── Pipfile
├── Pipfile.lock
├── README.rst
├── docs
    ├── Makefile
    ├── authors.rst
    ├── conf.py
    ├── contributing.rst
    ├── history.rst
    ├── index.rst
    ├── make.bat
    ├── modules
    │   └── API.rst
    └── readme.rst
├── notebooks
    ├── Gensim Newsgroup.ipynb
    ├── GraphLab.ipynb
    ├── LDA model.ipynb
    ├── Movie Reviews, AP News, and Jeopardy.ipynb
    ├── data
    │   ├── ap_input.json
    │   ├── jeopardy_input.json
    │   └── movie_reviews_input.json
    ├── pyLDAvis
    └── pyLDAvis_overview.ipynb
├── pyLDAvis
    ├── __init__.py
    ├── _display.py
    ├── _prepare.py
    ├── _server.py
    ├── gensim_models.py
    ├── graphlab.py
    ├── js
    │   ├── d3.v5.min.js
    │   ├── ldavis.css
    │   ├── ldavis.js
    │   ├── ldavis.v1.0.0.css
    │   ├── ldavis.v1.0.0.js
    │   └── ldavis.v3.0.0.js
    ├── lda_model.py
    ├── urls.py
    └── utils.py
├── pyproject.toml
├── requirements.txt
├── setup.py
├── tests
    ├── data
    │   ├── .gitattributes
    │   ├── export_data.R
    │   ├── movie_reviews_input.json
    │   └── movie_reviews_output.json
    └── pyLDAvis
    │   ├── test_gensim_models.py
    │   └── test_prepare.py
└── tox.ini


/.flake8:
--------------------------------------------------------------------------------
1 | [flake8]
2 | max-line-length = 100
3 | exclude = mypy-stubs
4 | ignore = W,E731,F403
5 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | *.py[cod]
 2 | 
 3 | # C extensions
 4 | *.so
 5 | 
 6 | # Packages
 7 | *.egg
 8 | .eggs
 9 | *.egg-info
10 | dist
11 | build
12 | eggs
13 | parts
14 | bin
15 | var
16 | sdist
17 | develop-eggs
18 | .installed.cfg
19 | lib
20 | lib64
21 | 
22 | # Installer logs
23 | pip-log.txt
24 | 
25 | # Unit test / coverage reports
26 | .coverage
27 | .tox
28 | nosetests.xml
29 | htmlcov
30 | test/data/*.json
31 | 
32 | # Translations
33 | *.mo
34 | 
35 | # Mr Developer
36 | .mr.developer.cfg
37 | .project
38 | .pydevproject
39 | 
40 | # Complexity
41 | output/*.html
42 | output/*/index.html
43 | 
44 | # Sphinx
45 | docs/_build
46 | 
47 | # IPython
48 | .ipynb_checkpoints
49 | 
50 | # JetBrains
51 | .idea
52 | 
53 | # Mac OS
54 | .DS_Store
55 | 
56 | ### Python.VirtualEnv Stack ###
57 | # Virtualenv
58 | # http://iamzed.com/2009/05/07/a-primer-on-virtualenv/
59 | [Bb]in
60 | [Ii]nclude
61 | [Ll]ib
62 | [Ll]ib64
63 | [Ll]ocal
64 | [Ss]cripts
65 | pyvenv.cfg
66 | pip-selfcheck.json
67 | 
68 | # Pip
69 | # Pipfile
70 | # Pipfile.lock
71 | pypi_package.iml
72 | # pyproject.toml
73 | 
74 | # Distribution / packaging
75 | .Python
76 | build/
77 | develop-eggs/
78 | dist/
79 | downloads/
80 | eggs/
81 | .eggs/
82 | lib/
83 | lib64/
84 | parts/
85 | sdist/
86 | var/
87 | wheels/
88 | *.egg-info/
89 | .installed.cfg
90 | *.egg
91 | MANIFEST
92 | 
93 | # Notebooks
94 | notebooks/data
95 | notebooks/newsgroups*
96 | 


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
 1 | # Config file for automatic testing at travis-ci.org
 2 | 
 3 | language: python
 4 | 
 5 | python:
 6 |   - "3.11"
 7 |   - "3.10"
 8 |   - "3.9"
 9 | 
10 | env:
11 |   - DEPS="pytest gensim smart_open==2.0.0"
12 | 
13 | before_install:
14 |   # conda instructions from http://conda.pydata.org/docs/travis.html
15 |   - sudo apt-get update
16 |   - wget https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -O miniconda.sh
17 |   - bash miniconda.sh -b -p $HOME/miniconda
18 |   - source "$HOME/miniconda/etc/profile.d/conda.sh"
19 |   - hash -r
20 |   - conda config --set always_yes yes --set changeps1 no
21 |   - conda update -q conda
22 |   # Useful for debugging any issues with conda
23 |   - conda info -a
24 |   - export BOTO_CONFIG=/dev/null
25 | install:
26 |   # download JSON data from github since travis does not have git-lfs rolled out yet
27 |   - (cd tests/data; curl -L -O https://github.com/bmabey/pyLDAvis/raw/master/tests/data/movie_reviews_input.json && curl -L -O https://github.com/bmabey/pyLDAvis/raw/master/tests/data/movie_reviews_output.json)
28 |   - ls -la tests/data/
29 |   - conda create -n testenv --yes python=$TRAVIS_PYTHON_VERSION $DEPS
30 |   - conda activate testenv
31 |   - pip install .
32 | 
33 | # command to run tests, e.g. pytest
34 | script:
35 |   - pytest
36 | 


--------------------------------------------------------------------------------
/AUTHORS.rst:
--------------------------------------------------------------------------------
 1 | =======
 2 | Credits
 3 | =======
 4 | 
 5 | Development Lead
 6 | ----------------
 7 | 
 8 | * Ben Mabey <ben@benmabey.com>
 9 | 
10 | Contributors
11 | ------------
12 | 
13 | * Paul English <paul@onfrst.com> - JS and CSS fixes and improvements.
14 | * Mark Susol <marksusol@gmail.com> - Python and JSS improvements.
15 | 


--------------------------------------------------------------------------------
/CONTRIBUTING.rst:
--------------------------------------------------------------------------------
  1 | ============
  2 | Contributing
  3 | ============
  4 | 
  5 | Contributions are welcome, and they are greatly appreciated! Every
  6 | little bit helps, and credit will always be given.
  7 | 
  8 | You can contribute in many ways:
  9 | 
 10 | Types of Contributions
 11 | ----------------------
 12 | 
 13 | Report Bugs
 14 | ~~~~~~~~~~~
 15 | 
 16 | Report bugs at https://github.com/bmabey/pyLDAvis/issues.
 17 | 
 18 | If you are reporting a bug, please include:
 19 | 
 20 | * Your operating system name and version.
 21 | * Any details about your local setup that might be helpful in troubleshooting.
 22 | * Detailed steps to reproduce the bug.
 23 | 
 24 | Fix Bugs
 25 | ~~~~~~~~
 26 | 
 27 | Look through the GitHub issues for bugs. Anything tagged with "bug"
 28 | is open to whoever wants to implement it.
 29 | 
 30 | Implement Features
 31 | ~~~~~~~~~~~~~~~~~~
 32 | 
 33 | Look through the GitHub issues for features. Anything tagged with "feature"
 34 | is open to whoever wants to implement it.
 35 | 
 36 | Write Documentation
 37 | ~~~~~~~~~~~~~~~~~~~
 38 | 
 39 | pyLDAvis could always use more documentation, whether as part of the
 40 | official pyLDAvis docs, in docstrings, or even on the web in blog posts,
 41 | articles, and such.
 42 | 
 43 | Submit Feedback
 44 | ~~~~~~~~~~~~~~~
 45 | 
 46 | The best way to send feedback is to file an issue at https://github.com/bmabey/pyLDAvis/issues.
 47 | 
 48 | If you are proposing a feature:
 49 | 
 50 | * Explain in detail how it would work.
 51 | * Keep the scope as narrow as possible, to make it easier to implement.
 52 | * Remember that this is a volunteer-driven project, and that contributions
 53 |   are welcome :)
 54 | 
 55 | Get Started!
 56 | ------------
 57 | 
 58 | Ready to contribute? Here's how to set up `pyLDAvis` for local development.
 59 | 
 60 | 1. Fork the `pyLDAvis` repo on GitHub.
 61 | 2. Clone your fork locally::
 62 | 
 63 |     $ git clone git@github.com:your_name_here/pyLDAvis.git
 64 | 
 65 | 3. Install your local copy into a virtualenv. Assuming you have virtualenvwrapper installed, this is how you set up your fork for local development::
 66 | 
 67 |     $ mkvirtualenv pyLDAvis
 68 |     $ cd pyLDAvis/
 69 |     $ python setup.py develop
 70 | 
 71 | 4. Create a branch for local development::
 72 | 
 73 |     $ git checkout -b name-of-your-bugfix-or-feature
 74 | 
 75 |    Now you can make your changes locally.
 76 | 
 77 | 5. When you're done making changes, check that your changes pass flake8 and the tests, including testing other Python versions with tox::
 78 | 
 79 |     $ python -m flake8 pyLDAvis tests
 80 |     $ python -m pytest
 81 |     $ python -m tox
 82 | 
 83 |    To get flake8 and tox, just pip install them into your virtualenv.
 84 | 
 85 | 6. Commit your changes and push your branch to GitHub::
 86 | 
 87 |     $ git add .
 88 |     $ git commit -m "Your detailed description of your changes."
 89 |     $ git push origin name-of-your-bugfix-or-feature
 90 | 
 91 | 7. Submit a pull request through the GitHub website.
 92 | 
 93 | Pull Request Guidelines
 94 | -----------------------
 95 | 
 96 | Before you submit a pull request, check that it meets these guidelines:
 97 | 
 98 | 1. The pull request should include tests.
 99 | 2. If the pull request adds functionality, the docs should be updated. Put
100 |    your new functionality into a function with a docstring, and add the
101 |    feature to the list in README.rst.
102 | 3. The pull request should work for Python 3.9, 3.10, 3.11, and for PyPI. Check
103 |    https://travis-ci.org/bmabey/pyLDAvis/pull_requests
104 |    and make sure that the tests pass for all supported Python versions.
105 | 
106 | Maintainers
107 | ------------
108 | 
109 | Ready to publish a new version to PyPi? Here's how the workflow to follow.
110 | 
111 | 1. Ensure you are in the pyLDAvis directory
112 | 2. Pipenv workflow::
113 | 
114 |     $ pipenv install -e .
115 |     $ pipenv install --dev
116 |     $ pipenv shell
117 |     (pyLDAvis) $ flake8 pyLDAvis tests
118 |     (pyLDAvis) $ pytest
119 |     (pyLDAvis) $ tox
120 | 
121 |     -- TestPyPi
122 |     (pyLDAvis) $ python setup.py sdist bdist_wheel
123 |     (pyLDAvis) $ twine check dist/*
124 |     (pyLDAvis) $ twine upload --repository testpypi dist/*
125 | 
126 |     -- Publish
127 |     (pyLDAvis) $ twine upload --repository-url https://upload.pypi.org/legacy/ dist/*
128 |     (pyLDAvis) $ rm dist/*
129 | 
130 | Note: MacOS Big Sur is both 10.16 and 11.0 – it’s official (https://eclecticlight.co/2020/07/21/big-sur-is-both-10-16-and-11-0-its-official/) ::
131 | 
132 |     $ export SYSTEM_VERSION_COMPAT=1
133 | 


--------------------------------------------------------------------------------
/HISTORY.rst:
--------------------------------------------------------------------------------
  1 | .. :changelog:
  2 | 
  3 | History
  4 | -------
  5 | 
  6 | 3.4.1 (2023-04-23)
  7 | ~~~~~~~~~~~~~~~~~~
  8 | * Pandas 2.x release, the drop shall use .drop(saliency, axis=1) #247
  9 | 
 10 | 3.4.0 (2023-02-12)
 11 | ~~~~~~~~~~~~~~~~~~
 12 | 
 13 | * Adding testing for Python 3.10, 3.11, move default version to Python 3.10.
 14 | * Tox testing: No module named 'sklearn.manifold'; 'sklearn' is not a package.
 15 |   * Rename sklearn.py to lda_model.py.
 16 | * ValueError: The parameter init="pca" cannot be used with metric="precomputed".
 17 | * Update sklearn.py #239.
 18 | * fixes error of get_feature_names removal #235.
 19 | * Remove "sklearn" from requirements #234
 20 | * Bump joblib from 1.0.1 to 1.2.0 dependencies #231.
 21 | * Fixing for small number of topics #229.
 22 | * Bump numpy from 1.20.1 to 1.22.0 dependenciesv #227.
 23 | * License correction #224.
 24 | * Fix background color in Notebooks with dark themes #222.
 25 | * Start building Wheels alongside sdist #221
 26 | 
 27 | 3.3.1 (2021-03-24)
 28 | ~~~~~~~~~~~~~~~~~~
 29 | 
 30 | * Restored x-axis scale labels for term bars #200.
 31 | * import pyLDAvis.gensim_models as gensimvis
 32 | * Deleted orphaned files.
 33 | * Update .gitignore for notebooks/* models, data.
 34 | 
 35 | 3.3.0 (2021-03-16)
 36 | ~~~~~~~~~~~~~~~~~~
 37 | 
 38 | * Python 3.7, 3.8, 3.9: dropped 2.7, 3.5, 3.6 support.
 39 | * RuntimeWarning: divide by zero encountered in log #174.
 40 | * Deprecation warning due to invalid escape sequences #166.
 41 | * `python setup.py test` is deprecated.
 42 | * FutureWarning: pandas.util.testing is deprecated.
 43 | 
 44 | 3.2.2 (2021-02-19)
 45 | ~~~~~~~~~~~~~~~~~~
 46 | 
 47 | * Fix browser caching of cdn.jsdelivr files.
 48 | * update ldavis.js to match ldavis.3.0.0.js
 49 | 
 50 | 3.2.1 (2021-02-17)
 51 | ~~~~~~~~~~~~~~~~~~
 52 | 
 53 | * Fix missing labels and other D3.V3 to D3.V5 issues.
 54 | * Revert the indexing changes i.e. (startIndex - 1).
 55 | * Removed some unused GLOBALs.
 56 | 
 57 | 3.2.0 (2021-02-10)
 58 | ~~~~~~~~~~~~~~~~~~
 59 | 
 60 | * Switches the CDN to cdn.jsdelivr to get accurate mime types.
 61 | 
 62 | 3.1.0 (2021-02-07)
 63 | ~~~~~~~~~~~~~~~~~~
 64 | 
 65 | * Replaces rawgit CDN since it has been sunset.
 66 | 
 67 | 3.0.0 (2021-02-06)
 68 | ~~~~~~~~~~~~~~~~~~
 69 | 
 70 | * Upgrades D3 code to use the d3.v5.
 71 | 
 72 | 2.1.2 (2018-02-06)
 73 | ~~~~~~~~~~~~~~~~~~
 74 | 
 75 | * Fix pandas deprecation warnings.
 76 | 
 77 | 2.1.1 (2017-02-13)
 78 | ~~~~~~~~~~~~~~~~~~
 79 | 
 80 | * Fix `gensim` module to work with a sparse corpus #82.
 81 | 
 82 | 2.1.0 (2016-06-30)
 83 | ~~~~~~~~~~~~~~~~~~
 84 | 
 85 | * Added missing dependency on `scipy`.
 86 | * Fixed term sorting that was incompatible with pandas 0.19.x.
 87 | 
 88 | 2.0.0 (2016-06-30)
 89 | ~~~~~~~~~~~~~~~~~~
 90 | 
 91 | * Removed dependency on `scikit-bio` by adding an internal PCoA implementation.
 92 | * Added helper functions for scikit-learn LDA model! See the new notebook for details.
 93 | * Extended gensim helper functions to work with HDP models.
 94 | * Added scikit-learn's Multi-dimensional scaling as another MDS option when scikit-learn is installed.
 95 | 
 96 | 1.5.1 (2016-04-15)
 97 | ~~~~~~~~~~~~~~~~~~
 98 | 
 99 | * Add sort_topics option to prepare function to allow disabling of topic re-ordering.
100 | 
101 | 
102 | 1.5.0 (2016-02-20)
103 | ~~~~~~~~~~~~~~~~~~
104 | 
105 | * Red Bar Width bug fix
106 | 
107 |  In some cases, the widths of the red topic-term bars did not decrease (as they should have) from term \#1 to
108 |  term \#R under the relevance ranking with $\lambda = 1$. In other words, when $\lambda = 1$, there were topics
109 |  in which a narrow red bar was displayed above a wider red bar, which should never happen. The issue had to do
110 |  with the way topic-term bar widths are computed, and is discussed in detail in #32.
111 | 
112 | 
113 | In the end, we implemented a quick fix in which we compute term frequencies implicitly, rather than using those
114 | supplied in the createJSON() function. The upside is that the red bar widths are now explicitly controlled to
115 | produce the correct visualization. The downside is that the blue bar widths do not necessarily match the
116 | user-supplied term frequencies exactly -- in fact, the new version of LDAvis ignores the user-supplied term
117 | frequencies entirely. In a few experiments, the differences are small, and decrease (as a proportion of the true
118 | term frequencies) as the true term frequencies increase.
119 | 
120 | 
121 | 
122 | 1.4.1 (2016-01-31)
123 | ~~~~~~~~~~~~~~~~~~
124 | 
125 | * Included requirements.txt in MANIFEST to (hopefully) fix bad release.
126 | 
127 | 1.4.0 (2016-01-31)
128 | ~~~~~~~~~~~~~~~~~~
129 | 
130 | * Updated to newest version of skibio for PCoA mds.
131 | * requirements.txt cleanup
132 | * New 'tsne' option for prepare, see docs and notebook for more info.
133 | 
134 | 
135 | 1.3.5 (2015-12-18)
136 | ~~~~~~~~~~~~~~~~~~
137 | 
138 | * Add explicit version info for scikit-bio since the API has changed.
139 | 
140 | 
141 | 1.3.4 (2015-11-16)
142 | ~~~~~~~~~~~~~~~~~~
143 | 
144 | * Gensim Python typo fix in imports. :/
145 | 
146 | 1.3.3 (2015-11-13)
147 | ~~~~~~~~~~~~~~~~~~
148 | 
149 | * Gensim Python 2.x fix for absolute imports.
150 | 
151 | 1.3.2 (2015-11-09)
152 | ~~~~~~~~~~~~~~~~~~
153 | 
154 | * Gensim prepare 25% speed increase, thanks @mattilyra!
155 | * Pandas deprecation warnings are now gone.
156 | * Pandas v0.17 is now being used.
157 | 
158 | 1.3.1 (2015-11-02)
159 | ~~~~~~~~~~~~~~~~~~
160 | 
161 | * Updates gensim and other logic to be python 3 compatible.
162 | 
163 | 1.3.0 (2015-08-20)
164 | ~~~~~~~~~~~~~~~~~~
165 | 
166 | * Fixes gensim logic and makes it more robust.
167 | * Faster graphlab processing.
168 | * kargs for gensim and graphlab are passed down to underlying prepare function.
169 | * Requires recent version of pandas to avoid problems with our use of the newer `DataFrame.to_dict` API.
170 | 
171 | 1.2.0 (2015-06-13)
172 | ~~~~~~~~~~~~~~~~~~
173 | 
174 | * Updates gensim logic to be clearer and work with Python 3.x.
175 | 
176 | 1.1.0 (2015-06-02)
177 | ~~~~~~~~~~~~~~~~~~
178 | 
179 | * Fixes bug with GraphLab function that was producing bogus visualizations.
180 | 
181 | 1.0.0 (2015-05-29)
182 | ~~~~~~~~~~~~~~~~~~
183 | 
184 | * First release on PyPI. Faithful port of R version with IPython support and helper functions for GraphLab & gensim.
185 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | Copyright (c) 2015, Ben Mabey
 2 | All rights reserved.
 3 | 
 4 | Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
 5 | 
 6 | * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
 7 | 
 8 | * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
 9 | 
10 | * Neither the name of pyLDAvis nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission.
11 | 
12 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
 1 | include AUTHORS.rst
 2 | include CONTRIBUTING.rst
 3 | include HISTORY.rst
 4 | include LICENSE
 5 | include README.rst
 6 | include requirements.txt
 7 | 
 8 | recursive-include tests *.py
 9 | recursive-include notebooks *.ipynb
10 | recursive-exclude notebooks/.ipynb_checkpoints *
11 | recursive-exclude * __pycache__
12 | recursive-exclude * *.py[co]
13 | include tests/data/movie_reviews_input.json tests/data/movie_reviews_output.json tests/data/export_data.R
14 | 
15 | recursive-include docs *.rst conf.py Makefile make.bat
16 | recursive-include pyLDAvis *.py *.js *.css
17 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | .PHONY: clean-pyc clean-build docs clean
 2 | 
 3 | help:
 4 | 	@echo "clean - remove all build, test, coverage and Python artifacts"
 5 | 	@echo "clean-build - remove build artifacts"
 6 | 	@echo "clean-pyc - remove Python file artifacts"
 7 | 	@echo "clean-test - remove test and coverage artifacts"
 8 | 	@echo "lint - check style with flake8"
 9 | 	@echo "test - run tests quickly with the default Python"
10 | 	@echo "test-all - run tests on every Python version with tox"
11 | 	@echo "coverage - check code coverage quickly with the default Python"
12 | 	@echo "docs - generate Sphinx HTML documentation, including API docs"
13 | 	@echo "release - package and upload a release"
14 | 	@echo "dist - package"
15 | 	@echo "install - install the package to the active Python's site-packages"
16 | 
17 | clean: clean-build clean-pyc clean-test
18 | 
19 | clean-build:
20 | 	rm -rf build/
21 | 	rm -rf dist/
22 | 	rm -rf .eggs/
23 | 	find . -name '*.egg-info' -exec rm -rf {} +
24 | 	find . -name '*.egg' -exec rm -rf {} +
25 | 
26 | clean-pyc:
27 | 	find . -name '*.pyc' -exec rm -f {} +
28 | 	find . -name '*.pyo' -exec rm -f {} +
29 | 	find . -name '*~' -exec rm -f {} +
30 | 	find . -name '__pycache__' -exec rm -rf {} +
31 | 
32 | clean-test:
33 | 	rm -rf .tox/
34 | 	rm -f .coverage
35 | 	rm -rf htmlcov/
36 | 
37 | lint:
38 | 	flake8 pyLDAvis tests
39 | 
40 | test:
41 | 	pytest
42 | 
43 | test-all:
44 | 	tox
45 | 
46 | coverage:
47 | 	coverage run --source pyLDAvis setup.py test
48 | 	coverage report -m
49 | 	coverage html
50 | 	open htmlcov/index.html
51 | 
52 | docs:
53 | 	rm -f docs/pyLDAvis.rst
54 | 	rm -f docs/modules.rst
55 | 	sphinx-apidoc -o docs/ pyLDAvis
56 | 	$(MAKE) -C docs clean
57 | 	$(MAKE) -C docs html
58 | 	open docs/_build/html/index.html
59 | 
60 | release: clean
61 | 	python setup.py sdist upload
62 | 	#python setup.py bdist_wheel upload
63 | 
64 | dist: clean
65 | 	python setup.py sdist
66 | 	#python setup.py bdist_wheel
67 | 	ls -l dist
68 | 
69 | install: clean
70 | 	python setup.py install
71 | 


--------------------------------------------------------------------------------
/Pipfile:
--------------------------------------------------------------------------------
 1 | [[source]]
 2 | url = "https://pypi.org/simple"
 3 | verify_ssl = true
 4 | name = "pypi"
 5 | 
 6 | [packages]
 7 | pyLDAvis = {editable = true, path = "."}
 8 | numpy = ">=1.24.2"
 9 | scipy = "*"
10 | pandas = ">=2.0.0"
11 | joblib = ">=1.2.0"
12 | numexpr = "*"
13 | funcy = "*"
14 | scikit-learn = ">=1.0.0"
15 | gensim = "*"
16 | Jinja2 = "*"
17 | reproducer = {editable = true, path = "."}
18 | 
19 | [dev-packages]
20 | pytest = ">=3.9"
21 | 
22 | [requires]
23 | python_version = "3.11"
24 | 


--------------------------------------------------------------------------------
/Pipfile.lock:
--------------------------------------------------------------------------------
  1 | {
  2 |     "_meta": {
  3 |         "hash": {
  4 |             "sha256": "e27e982c64fe58fb5878295a2e6ca15753d1a5dab60d17674314b8a04a450ee4"
  5 |         },
  6 |         "pipfile-spec": 6,
  7 |         "requires": {
  8 |             "python_version": "3.9"
  9 |         },
 10 |         "sources": [
 11 |             {
 12 |                 "name": "pypi",
 13 |                 "url": "https://pypi.org/simple",
 14 |                 "verify_ssl": true
 15 |             }
 16 |         ]
 17 |     },
 18 |     "default": {
 19 |         "funcy": {
 20 |             "hashes": [
 21 |                 "sha256:1d3fc5d42cf7564a6b2be04042d0df7a50c77903cf760a34786d0c9ebd659b25",
 22 |                 "sha256:2775409b7dc9106283f1224d97e6df5f2c02e7291c8caed72764f5a115dffb50"
 23 |             ],
 24 |             "index": "pypi",
 25 |             "version": "==1.16"
 26 |         },
 27 |         "gensim": {
 28 |             "hashes": [
 29 |                 "sha256:1932c257de4eccbb64cc40d46e8577a25f5f47b94b96019a969fb36150f11d15",
 30 |                 "sha256:1ff0171ec5b7473facb1441426a6b41e8ec4599fd62e1820868ab965804e3d4c",
 31 |                 "sha256:36222dbf89aa57909131fc79654e92c918e1075b9ebd00532c3d23b76b6ce8eb",
 32 |                 "sha256:39139be83c3128e234216189a094f959ac2b052a808911b0b56d980d5f96981f",
 33 |                 "sha256:3e34cfe767a8db52812826136d6e94863081fd1456726bd1ff40b4e25965fbb5",
 34 |                 "sha256:58d9ab570b225f3aafec55286864560a25701f7446af9dbc0ad51aa5f61712fa",
 35 |                 "sha256:615d2a57efeaf97cd847e95f83b2fc168f9d22f4922aaa9cda9350f05648560c",
 36 |                 "sha256:66a9574f9f2bbf8fd8e6d7a120443793b96bfd4c153b41f266b6299aa3362de7",
 37 |                 "sha256:7bbc3d6c80c9fd97b89dfee2f44562b75542f72141f5fbacb91334597485f55c",
 38 |                 "sha256:804e18d76d9034bc70f93b8407680b7956c99f03914e85e31dd8b296623dc0ed",
 39 |                 "sha256:8bd89b791e6729a9dd1c345d32fc9e2ba51348cf54fbaa8d49259eb92e719084",
 40 |                 "sha256:8c6a4b271f4d554fdf14b9cb34d4da6cde7084f7f581c5c6dd5fcac648db35be",
 41 |                 "sha256:8d0bf4074ff467a0b22c5e4cecfb7d12afcca6246dac515d5a06ab7e4c775f8e",
 42 |                 "sha256:d4b4ca5d1408e2d89e0ac45cd8a432abf747d5b62eea68e6dccacefa03d759c9",
 43 |                 "sha256:d812dcdf2bfaf527a09ecf867303c117d6f497233db08f1d8209ffb71aaf3fdb",
 44 |                 "sha256:ea47999c7da97472fce8f0831a63e4089d85539c8e0cdb895f087aea1eed4a3b",
 45 |                 "sha256:f6133b0f76d0c262231465936cded8920df88edf079df1e7bfe95f049ad8301e"
 46 |             ],
 47 |             "index": "pypi",
 48 |             "version": "==4.1.2"
 49 |         },
 50 |         "jinja2": {
 51 |             "hashes": [
 52 |                 "sha256:077ce6014f7b40d03b47d1f1ca4b0fc8328a692bd284016f806ed0eaca390ad8",
 53 |                 "sha256:611bb273cd68f3b993fabdc4064fc858c5b47a973cb5aa7999ec1ba405c87cd7"
 54 |             ],
 55 |             "index": "pypi",
 56 |             "version": "==3.0.3"
 57 |         },
 58 |         "joblib": {
 59 |             "hashes": [
 60 |                 "sha256:091138ed78f800342968c523bdde947e7a305b8594b910a0fea2ab83c3c6d385",
 61 |                 "sha256:e1cee4a79e4af22881164f218d4311f60074197fb707e082e803b61f6d137018"
 62 |             ],
 63 |             "index": "pypi",
 64 |             "version": "==1.2.0"
 65 |         },
 66 |         "markupsafe": {
 67 |             "hashes": [
 68 |                 "sha256:0576fe974b40a400449768941d5d0858cc624e3249dfd1e0c33674e5c7ca7aed",
 69 |                 "sha256:085fd3201e7b12809f9e6e9bc1e5c96a368c8523fad5afb02afe3c051ae4afcc",
 70 |                 "sha256:090376d812fb6ac5f171e5938e82e7f2d7adc2b629101cec0db8b267815c85e2",
 71 |                 "sha256:0b462104ba25f1ac006fdab8b6a01ebbfbce9ed37fd37fd4acd70c67c973e460",
 72 |                 "sha256:137678c63c977754abe9086a3ec011e8fd985ab90631145dfb9294ad09c102a7",
 73 |                 "sha256:1bea30e9bf331f3fef67e0a3877b2288593c98a21ccb2cf29b74c581a4eb3af0",
 74 |                 "sha256:22152d00bf4a9c7c83960521fc558f55a1adbc0631fbb00a9471e097b19d72e1",
 75 |                 "sha256:22731d79ed2eb25059ae3df1dfc9cb1546691cc41f4e3130fe6bfbc3ecbbecfa",
 76 |                 "sha256:2298c859cfc5463f1b64bd55cb3e602528db6fa0f3cfd568d3605c50678f8f03",
 77 |                 "sha256:28057e985dace2f478e042eaa15606c7efccb700797660629da387eb289b9323",
 78 |                 "sha256:2e7821bffe00aa6bd07a23913b7f4e01328c3d5cc0b40b36c0bd81d362faeb65",
 79 |                 "sha256:2ec4f2d48ae59bbb9d1f9d7efb9236ab81429a764dedca114f5fdabbc3788013",
 80 |                 "sha256:340bea174e9761308703ae988e982005aedf427de816d1afe98147668cc03036",
 81 |                 "sha256:40627dcf047dadb22cd25ea7ecfe9cbf3bbbad0482ee5920b582f3809c97654f",
 82 |                 "sha256:40dfd3fefbef579ee058f139733ac336312663c6706d1163b82b3003fb1925c4",
 83 |                 "sha256:4cf06cdc1dda95223e9d2d3c58d3b178aa5dacb35ee7e3bbac10e4e1faacb419",
 84 |                 "sha256:50c42830a633fa0cf9e7d27664637532791bfc31c731a87b202d2d8ac40c3ea2",
 85 |                 "sha256:55f44b440d491028addb3b88f72207d71eeebfb7b5dbf0643f7c023ae1fba619",
 86 |                 "sha256:608e7073dfa9e38a85d38474c082d4281f4ce276ac0010224eaba11e929dd53a",
 87 |                 "sha256:63ba06c9941e46fa389d389644e2d8225e0e3e5ebcc4ff1ea8506dce646f8c8a",
 88 |                 "sha256:65608c35bfb8a76763f37036547f7adfd09270fbdbf96608be2bead319728fcd",
 89 |                 "sha256:665a36ae6f8f20a4676b53224e33d456a6f5a72657d9c83c2aa00765072f31f7",
 90 |                 "sha256:6d6607f98fcf17e534162f0709aaad3ab7a96032723d8ac8750ffe17ae5a0666",
 91 |                 "sha256:7313ce6a199651c4ed9d7e4cfb4aa56fe923b1adf9af3b420ee14e6d9a73df65",
 92 |                 "sha256:7668b52e102d0ed87cb082380a7e2e1e78737ddecdde129acadb0eccc5423859",
 93 |                 "sha256:7df70907e00c970c60b9ef2938d894a9381f38e6b9db73c5be35e59d92e06625",
 94 |                 "sha256:7e007132af78ea9df29495dbf7b5824cb71648d7133cf7848a2a5dd00d36f9ff",
 95 |                 "sha256:835fb5e38fd89328e9c81067fd642b3593c33e1e17e2fdbf77f5676abb14a156",
 96 |                 "sha256:8bca7e26c1dd751236cfb0c6c72d4ad61d986e9a41bbf76cb445f69488b2a2bd",
 97 |                 "sha256:8db032bf0ce9022a8e41a22598eefc802314e81b879ae093f36ce9ddf39ab1ba",
 98 |                 "sha256:99625a92da8229df6d44335e6fcc558a5037dd0a760e11d84be2260e6f37002f",
 99 |                 "sha256:9cad97ab29dfc3f0249b483412c85c8ef4766d96cdf9dcf5a1e3caa3f3661cf1",
100 |                 "sha256:a4abaec6ca3ad8660690236d11bfe28dfd707778e2442b45addd2f086d6ef094",
101 |                 "sha256:a6e40afa7f45939ca356f348c8e23048e02cb109ced1eb8420961b2f40fb373a",
102 |                 "sha256:a6f2fcca746e8d5910e18782f976489939d54a91f9411c32051b4aab2bd7c513",
103 |                 "sha256:a806db027852538d2ad7555b203300173dd1b77ba116de92da9afbc3a3be3eed",
104 |                 "sha256:abcabc8c2b26036d62d4c746381a6f7cf60aafcc653198ad678306986b09450d",
105 |                 "sha256:b8526c6d437855442cdd3d87eede9c425c4445ea011ca38d937db299382e6fa3",
106 |                 "sha256:bb06feb762bade6bf3c8b844462274db0c76acc95c52abe8dbed28ae3d44a147",
107 |                 "sha256:c0a33bc9f02c2b17c3ea382f91b4db0e6cde90b63b296422a939886a7a80de1c",
108 |                 "sha256:c4a549890a45f57f1ebf99c067a4ad0cb423a05544accaf2b065246827ed9603",
109 |                 "sha256:ca244fa73f50a800cf8c3ebf7fd93149ec37f5cb9596aa8873ae2c1d23498601",
110 |                 "sha256:cf877ab4ed6e302ec1d04952ca358b381a882fbd9d1b07cccbfd61783561f98a",
111 |                 "sha256:d9d971ec1e79906046aa3ca266de79eac42f1dbf3612a05dc9368125952bd1a1",
112 |                 "sha256:da25303d91526aac3672ee6d49a2f3db2d9502a4a60b55519feb1a4c7714e07d",
113 |                 "sha256:e55e40ff0cc8cc5c07996915ad367fa47da6b3fc091fdadca7f5403239c5fec3",
114 |                 "sha256:f03a532d7dee1bed20bc4884194a16160a2de9ffc6354b3878ec9682bb623c54",
115 |                 "sha256:f1cd098434e83e656abf198f103a8207a8187c0fc110306691a2e94a78d0abb2",
116 |                 "sha256:f2bfb563d0211ce16b63c7cb9395d2c682a23187f54c3d79bfec33e6705473c6",
117 |                 "sha256:f8ffb705ffcf5ddd0e80b65ddf7bed7ee4f5a441ea7d3419e861a12eaf41af58"
118 |             ],
119 |             "markers": "python_version >= '3.7'",
120 |             "version": "==2.1.2"
121 |         },
122 |         "numexpr": {
123 |             "hashes": [
124 |                 "sha256:05b97b19e864a5d1a0b106933b1637233a2444fd375685bead264a818f847ef2",
125 |                 "sha256:0732c9989bff8568ee78fa461f3698166d4ac79363860be22ff49eae1dcd15e7",
126 |                 "sha256:23718ac5f2ebae995f5899509624781b375da568f2b645b5d1fd6dbb17f41a56",
127 |                 "sha256:24cdb8c0e93f31387a4c2ddd09a687874c006e6139fd68bcf77b96e51d17cb01",
128 |                 "sha256:2e14b44a79030fbe25f16393162a4d21ced14056fac49ff73856f661a78db731",
129 |                 "sha256:3daa55515ee3cb40bf5ab8263c0c13fff8d484d64d107a9c414e8ca151dc08a6",
130 |                 "sha256:43616529f9b7d1afc83386f943dc66c4da5e052f00217ba7e3ad8dd1b5f3a825",
131 |                 "sha256:4527a0a7b04f858a73c348c9c4ce8441b7a54965db74a32ba808c51d9d53b7cd",
132 |                 "sha256:51277a530a353e0f94665b44615249d7e7075f0c73f78d4743da632fc44bc648",
133 |                 "sha256:5223a519f48754dd350723d9fbcadbcd0476881bc954a281a09a6538ecabfc27",
134 |                 "sha256:5d6dbf050a9b8ebff0b7706ebeaf1cd57d64ef4dfe61aef3790851b481daf6b5",
135 |                 "sha256:5f4122bd58aa4e4891814c2f72bd47b1cdb202c9d863ea96c5394dffb72a16e2",
136 |                 "sha256:602df9b5c500d0a887dc96b4cfd16fb60ae7ef39ccd6f013f4df2ee11ae70553",
137 |                 "sha256:618259287b8b81a352a7d088ad03fe3b393a842ccb45f0b3cfc6a712d41b7595",
138 |                 "sha256:74df157ab4577bfc83c14f4e39d14781b06ade5406d3efef049f90c88d8c28ea",
139 |                 "sha256:785065819ce98e3d3dd853794244e0de190d7ba36ab42c8fd79e0e9cd40de7af",
140 |                 "sha256:7ab40e2b438f4ea2ea8234c63639cdf5072cdb29d0ac521307854efe0281a567",
141 |                 "sha256:833a363c86266424349467b53f4060f77aaa7ec03c1e6f38c54e69c65ceebf30",
142 |                 "sha256:8b76bcca930cbf0db0fe98b6a51d6286dff77d525dad670cb7750e29a138d434",
143 |                 "sha256:8fc23a49f4266c24a23310c0cb92ff54c4b4f535635f90372b3a2d5cb1f83329",
144 |                 "sha256:90ea6d5813e1906bb203ef220a600b30d83e75aea2607a7e7037cceae9e93346",
145 |                 "sha256:97753d17d1ea39e082b1907b99b6cb63cac7d1dfa512d2ff5079eb7bfab1ea88",
146 |                 "sha256:99472731bc1111f5d73285dd2a4c228b5bfb176f785a567872e0fbfec6584f2b",
147 |                 "sha256:a3f1cec8657bd3920869a2ea27f98d68ac3000334f366d844a9670ae671fe4bd",
148 |                 "sha256:a8e0e48d72391543b68d0471fac2e31c614efdce4036e2a0a8a182fde1edb0e0",
149 |                 "sha256:aae4ce158da53ebc47df053de90fed9d0d51fa0df8cc481abc8a901ea4f0cec7",
150 |                 "sha256:b0a9124a66a61b05ea84b832358d6aa5561c30e69b4dcaea819b296f4f025f89",
151 |                 "sha256:c2605e5665b0d7362e0d2b92683387c12e15c7440daf702a7637f7502a967810",
152 |                 "sha256:c9218aeb76717768f617362b72a87e9219da95ba7cdec0732ccecc4a4719124c",
153 |                 "sha256:c978c49bd9dded6a4ba6b3501e3a34e3aba9312cbb7d800bed7ac6fcd2d5949d",
154 |                 "sha256:d14ae09318ad86579e35aacf1596c83d5db1139cd68615967ee23605e11f5d82",
155 |                 "sha256:d423441593a952ac56d1f774068b81fb22f514fb68873c066578345a6af74c0d",
156 |                 "sha256:dc707486b1f3dda18a39bc4d06a0a09d3c0ea47bd6b99fdb98adb26d1277253f",
157 |                 "sha256:dfdca3d1f4c83fa8fd3ee7573110efd13e838543896641b89367622ec6a67eb4",
158 |                 "sha256:e000570a6a704c594832ff4fc45f18864b721b7b444a185b365dbb03d3fe3abb",
159 |                 "sha256:e985026e64350dd59fd91a09bc364edf706d58b12e01362ddfa63829878bd434",
160 |                 "sha256:eeeb6325df6cf3f3ab7d9dbabf3bc03ac88b7e2f2aed21419c31e23c3048dce1",
161 |                 "sha256:f9df0a74d39616fd011071c5850418f244bac414f24ed55c00dcf3c5385e8374"
162 |             ],
163 |             "index": "pypi",
164 |             "version": "==2.7.3"
165 |         },
166 |         "numpy": {
167 |             "hashes": [
168 |                 "sha256:0cfe07133fd00b27edee5e6385e333e9eeb010607e8a46e1cd673f05f8596595",
169 |                 "sha256:11a1f3816ea82eed4178102c56281782690ab5993251fdfd75039aad4d20385f",
170 |                 "sha256:2762331de395739c91f1abb88041f94a080cb1143aeec791b3b223976228af3f",
171 |                 "sha256:283d9de87c0133ef98f93dfc09fad3fb382f2a15580de75c02b5bb36a5a159a5",
172 |                 "sha256:3d22662b4b10112c545c91a0741f2436f8ca979ab3d69d03d19322aa970f9695",
173 |                 "sha256:41388e32e40b41dd56eb37fcaa7488b2b47b0adf77c66154d6b89622c110dfe9",
174 |                 "sha256:42c16cec1c8cf2728f1d539bd55aaa9d6bb48a7de2f41eb944697293ef65a559",
175 |                 "sha256:47ee7a839f5885bc0c63a74aabb91f6f40d7d7b639253768c4199b37aede7982",
176 |                 "sha256:5a311ee4d983c487a0ab546708edbdd759393a3dc9cd30305170149fedd23c88",
177 |                 "sha256:5dc65644f75a4c2970f21394ad8bea1a844104f0fe01f278631be1c7eae27226",
178 |                 "sha256:6ed0d073a9c54ac40c41a9c2d53fcc3d4d4ed607670b9e7b0de1ba13b4cbfe6f",
179 |                 "sha256:76ba7c40e80f9dc815c5e896330700fd6e20814e69da9c1267d65a4d051080f1",
180 |                 "sha256:818b9be7900e8dc23e013a92779135623476f44a0de58b40c32a15368c01d471",
181 |                 "sha256:a024181d7aef0004d76fb3bce2a4c9f2e67a609a9e2a6ff2571d30e9976aa383",
182 |                 "sha256:a955e4128ac36797aaffd49ab44ec74a71c11d6938df83b1285492d277db5397",
183 |                 "sha256:a97a954a8c2f046d3817c2bce16e3c7e9a9c2afffaf0400f5c16df5172a67c9c",
184 |                 "sha256:a97e82c39d9856fe7d4f9b86d8a1e66eff99cf3a8b7ba48202f659703d27c46f",
185 |                 "sha256:b55b953a1bdb465f4dc181758570d321db4ac23005f90ffd2b434cc6609a63dd",
186 |                 "sha256:bb02929b0d6bfab4c48a79bd805bd7419114606947ec8284476167415171f55b",
187 |                 "sha256:bece0a4a49e60e472a6d1f70ac6cdea00f9ab80ff01132f96bd970cdd8a9e5a9",
188 |                 "sha256:e41e8951749c4b5c9a2dc5fdbc1a4eec6ab2a140fdae9b460b0f557eed870f4d",
189 |                 "sha256:f71d57cc8645f14816ae249407d309be250ad8de93ef61d9709b45a0ddf4050c"
190 |             ],
191 |             "index": "pypi",
192 |             "version": "==1.22.0"
193 |         },
194 |         "pandas": {
195 |             "hashes": [
196 |                 "sha256:003ba92db58b71a5f8add604a17a059f3068ef4e8c0c365b088468d0d64935fd",
197 |                 "sha256:10e10a2527db79af6e830c3d5842a4d60383b162885270f8cffc15abca4ba4a9",
198 |                 "sha256:22808afb8f96e2269dcc5b846decacb2f526dd0b47baebc63d913bf847317c8f",
199 |                 "sha256:2d1dc09c0013d8faa7474574d61b575f9af6257ab95c93dcf33a14fd8d2c1bab",
200 |                 "sha256:35c77609acd2e4d517da41bae0c11c70d31c87aae8dd1aabd2670906c6d2c143",
201 |                 "sha256:372d72a3d8a5f2dbaf566a5fa5fa7f230842ac80f29a931fb4b071502cf86b9a",
202 |                 "sha256:42493f8ae67918bf129869abea8204df899902287a7f5eaf596c8e54e0ac7ff4",
203 |                 "sha256:4acc28364863127bca1029fb72228e6f473bb50c32e77155e80b410e2068eeac",
204 |                 "sha256:5298a733e5bfbb761181fd4672c36d0c627320eb999c59c65156c6a90c7e1b4f",
205 |                 "sha256:5ba0aac1397e1d7b654fccf263a4798a9e84ef749866060d19e577e927d66e1b",
206 |                 "sha256:9707bdc1ea9639c886b4d3be6e2a45812c1ac0c2080f94c31b71c9fa35556f9b",
207 |                 "sha256:a2aa18d3f0b7d538e21932f637fbfe8518d085238b429e4790a35e1e44a96ffc",
208 |                 "sha256:a388960f979665b447f0847626e40f99af8cf191bce9dc571d716433130cb3a7",
209 |                 "sha256:a51528192755f7429c5bcc9e80832c517340317c861318fea9cea081b57c9afd",
210 |                 "sha256:b528e126c13816a4374e56b7b18bfe91f7a7f6576d1aadba5dee6a87a7f479ae",
211 |                 "sha256:c1aa4de4919358c5ef119f6377bc5964b3a7023c23e845d9db7d9016fa0c5b1c",
212 |                 "sha256:c2646458e1dce44df9f71a01dc65f7e8fa4307f29e5c0f2f92c97f47a5bf22f5",
213 |                 "sha256:c2f44425594ae85e119459bb5abb0748d76ef01d9c08583a667e3339e134218e",
214 |                 "sha256:d47750cf07dee6b55d8423471be70d627314277976ff2edd1381f02d52dbadf9",
215 |                 "sha256:d99d2350adb7b6c3f7f8f0e5dfb7d34ff8dd4bc0a53e62c445b7e43e163fce63",
216 |                 "sha256:dd324f8ee05925ee85de0ea3f0d66e1362e8c80799eb4eb04927d32335a3e44a",
217 |                 "sha256:eaca36a80acaacb8183930e2e5ad7f71539a66805d6204ea88736570b2876a7b",
218 |                 "sha256:f567e972dce3bbc3a8076e0b675273b4a9e8576ac629149cf8286ee13c259ae5",
219 |                 "sha256:fe48e4925455c964db914b958f6e7032d285848b7538a5e1b19aeb26ffaea3ec"
220 |             ],
221 |             "index": "pypi",
222 |             "version": "==1.3.4"
223 |         },
224 |         "pyldavis": {
225 |             "editable": true,
226 |             "path": "."
227 |         },
228 |         "python-dateutil": {
229 |             "hashes": [
230 |                 "sha256:0123cacc1627ae19ddf3c27a5de5bd67ee4586fbdd6440d9748f8abb483d3e86",
231 |                 "sha256:961d03dc3453ebbc59dbdea9e4e11c5651520a876d0f4db161e8674aae935da9"
232 |             ],
233 |             "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'",
234 |             "version": "==2.8.2"
235 |         },
236 |         "pytz": {
237 |             "hashes": [
238 |                 "sha256:01a0681c4b9684a28304615eba55d1ab31ae00bf68ec157ec3708a8182dbbcd0",
239 |                 "sha256:78f4f37d8198e0627c5f1143240bb0206b8691d8d7ac6d78fee88b78733f8c4a"
240 |             ],
241 |             "version": "==2022.7.1"
242 |         },
243 |         "reproducer": {
244 |             "editable": true,
245 |             "path": "."
246 |         },
247 |         "sanitized-package": {
248 |             "editable": true,
249 |             "path": "."
250 |         },
251 |         "scikit-learn": {
252 |             "hashes": [
253 |                 "sha256:02aee3b257617da0ec98dee9572b10523dc00c25b68c195ddf100c1a93b1854b",
254 |                 "sha256:059c5be0c0365321ddbcac7abf0db806fad8ecb64ee6c7cbcd58313c7d61634d",
255 |                 "sha256:116e05fd990d9b363fc29bd3699ec2117d7da9088f6ca9a90173b240c5a063f1",
256 |                 "sha256:11a57405c1c3514227d0c6a0bee561c94cd1284b41e236f7a1d76b3975f77593",
257 |                 "sha256:32d941f12fd7e245f01da2b82943c5ce6f1133fa5375eb80caa51457532b3e7e",
258 |                 "sha256:46248cc6a8b72490f723c73ff2e65e62633d14cafe9d2df3a7b3f87d332a6f7e",
259 |                 "sha256:515b227f01f569145dc9f86e56f4cea9f00a613fc4d074bbfc0a92ca00bff467",
260 |                 "sha256:538f3a85c4980c7572f3e754f0ba8489363976ef3e7f6a94e8f1af5ae45f6f6a",
261 |                 "sha256:53bb7c605427ab187869d7a05cd3f524a3015a90e351c1788fc3a662e7f92b69",
262 |                 "sha256:59b1d6df8724003fa16b7365a3b43449ee152aa6e488dd7a19f933640bb2d7fb",
263 |                 "sha256:62ce4e3ddb6e6e9dcdb3e5ac7f0575dbaf56f79ce2b2edee55192b12b52df5be",
264 |                 "sha256:648f4dbfdd0a1b45bf6e2e4afe3f431774c55dee05e2d28f8394d6648296f373",
265 |                 "sha256:944f47b2d881b9d24aee40d643bfdc4bd2b6dc3d25b62964411c6d8882f940a1",
266 |                 "sha256:a51fdbc116974d9715957366df73e5ec6f0a7a2afa017864c2e5f5834e6f494d",
267 |                 "sha256:a800665527c1a63f7395a0baae3c89b0d97b54d2c23769c1c9879061bb80bc19",
268 |                 "sha256:ac2ca9dbb754d61cfe1c83ba8483498ef951d29b93ec09d6f002847f210a99da",
269 |                 "sha256:bd78a2442c948536f677e2744917c37cff014559648102038822c23863741c27",
270 |                 "sha256:c604a813df8e7d6dfca3ae0db0a8fd7e5dff4ea9d94081ab263c81bf0b61ab4b",
271 |                 "sha256:c6b9510fd2e1642314efb7aa951a0d05d963f3523e01c30b2dadde2395ebe6b4",
272 |                 "sha256:ebbe4275556d3c02707bd93ae8b96d9651acd4165126e0ae64b336afa2a6dcb1",
273 |                 "sha256:ee59da47e18b703f6de17d5d51b16ce086c50969d5a83db5217f0ae9372de232",
274 |                 "sha256:fb7214103f6c36c1371dd8c166897e3528264a28f2e2e42573ba8c61ed4d7142",
275 |                 "sha256:fc60e0371e521995a6af2ef3f5d911568506124c272889b318b8b6e497251231",
276 |                 "sha256:fc75f81571137b39f9b31766e15a0e525331637e7fe8f8000a3fbfba7da3add9",
277 |                 "sha256:fecb5102f0a36c16c1361ec519a7bb0260776ef40e17393a81f530569c916a7b"
278 |             ],
279 |             "index": "pypi",
280 |             "version": "==1.0.1"
281 |         },
282 |         "scipy": {
283 |             "hashes": [
284 |                 "sha256:1437073f1d4664990879aa8f9547524764372e0fef84a077be4b19e82bba7a8d",
285 |                 "sha256:17fd991a275e4283453f89d404209aa92059ac68d76d804b4bc1716a3742e1b5",
286 |                 "sha256:1ea6233f5a365cb7945b4304bd06323ece3ece85d6a3fa8598d2f53e513467c9",
287 |                 "sha256:2d25272c03ee3c0fe5e0dff1bb7889280bb6c9e1766fa9c7bde81ad8a5f78694",
288 |                 "sha256:30bdda199667e74b50208a793eb1ba47a04e5e3fa16f5ff06c6f7969ae78e4da",
289 |                 "sha256:359b60a0cccd17723b9d5e329a5212a710e771a3ddde800e472fb93732756c46",
290 |                 "sha256:39f838ea5ce8da868785193d88d05cf5a6d5c390804ec99de29a28e1dcdd53e6",
291 |                 "sha256:4d175ba93e00d8eef8f7cd70d4d88a9106a86800c82ea03cf2268c36d6545483",
292 |                 "sha256:5273d832fb9cd5724ee0d335c16a903b923441107dd973d27fc4293075a9f4e3",
293 |                 "sha256:54951f51d731c832b1b8885e0a92e89f33d087de7e40d02078bf0d49c7cbdbb5",
294 |                 "sha256:74f518ce542533054695f743e4271cb8986b63f95bb51d70fcee4f3929cbff7d",
295 |                 "sha256:7b1d0f5f524518f1a86f288443528e4ff4a739c0966db663af4129b7ac7849f8",
296 |                 "sha256:82c5befebf54d799d77e5f0205c03030f57f69ba2541baa44d2e6ad138c28cd3",
297 |                 "sha256:8482c8e45857ab0a5446eb7460d2307a27cbbe659d6d2257820c6d6eb950fd0f",
298 |                 "sha256:87cf3964db0f1cce17aeed5bfc1b89a6b4b07dbfc48e50d21fa3549e00456803",
299 |                 "sha256:8b5726a0fedeaa6beb1095e4466998bdd1d1e960b28db9b5a16c89cbd7b2ebf1",
300 |                 "sha256:97eb573e361a73a553b915dc195c6f72a08249964b1a33f157f9659f3b6210d1",
301 |                 "sha256:a80eb01c43fd98257ec7a49ff5cec0edba32031b5f86503f55399a48cb2c5379",
302 |                 "sha256:cac71d5476a6f56b50459da21f6221707e0051ebd428b2137db32ef4a43bb15e",
303 |                 "sha256:d86abd1ddf421dea5e9cebfeb4de0d205b3dc04e78249afedba9c6c3b2227ff2",
304 |                 "sha256:dc2d1bf41294e63c7302bf499973ac0c7f73c93c01763db43055f6525234bf11",
305 |                 "sha256:e08b81fcd9bf98740b58dc6fdd7879e33a64dcb682201c1135f7d4a75216bb05",
306 |                 "sha256:e3efe7ef75dfe627b354ab0af0dbc918eadee97cc80ff1aabea6d3e01114ebdd",
307 |                 "sha256:fa2dbabaaecdb502641b0b3c00dec05fb475ae48655c66da16c9ed24eda1e711"
308 |             ],
309 |             "index": "pypi",
310 |             "version": "==1.7.2"
311 |         },
312 |         "six": {
313 |             "hashes": [
314 |                 "sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926",
315 |                 "sha256:8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254"
316 |             ],
317 |             "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'",
318 |             "version": "==1.16.0"
319 |         },
320 |         "smart-open": {
321 |             "hashes": [
322 |                 "sha256:b4c9ae193ad6d3e7add50944b86afa0d150bd821ab8ec21edb26d9a06b66f6a8",
323 |                 "sha256:d5238825fe9a9340645fac3d75b287c08fbb99fb2b422477de781c9f5f09e019"
324 |             ],
325 |             "markers": "python_version >= '3.6' and python_version < '4.0'",
326 |             "version": "==6.3.0"
327 |         },
328 |         "threadpoolctl": {
329 |             "hashes": [
330 |                 "sha256:8b99adda265feb6773280df41eece7b2e6561b772d21ffd52e372f999024907b",
331 |                 "sha256:a335baacfaa4400ae1f0d8e3a58d6674d2f8828e3716bb2802c44955ad391380"
332 |             ],
333 |             "markers": "python_version >= '3.6'",
334 |             "version": "==3.1.0"
335 |         }
336 |     },
337 |     "develop": {
338 |         "attrs": {
339 |             "hashes": [
340 |                 "sha256:29e95c7f6778868dbd49170f98f8818f78f3dc5e0e37c0b1f474e3561b240836",
341 |                 "sha256:c9227bfc2f01993c03f68db37d1d15c9690188323c067c641f1a35ca58185f99"
342 |             ],
343 |             "markers": "python_version >= '3.6'",
344 |             "version": "==22.2.0"
345 |         },
346 |         "importlib-metadata": {
347 |             "hashes": [
348 |                 "sha256:53ccfd5c134223e497627b9815d5030edf77d2ed573922f7a0b8f8bb81a1c100",
349 |                 "sha256:75bdec14c397f528724c1bfd9709d660b33a4d2e77387a3358f20b848bb5e5fb"
350 |             ],
351 |             "markers": "python_version < '3.8'",
352 |             "version": "==4.8.2"
353 |         },
354 |         "iniconfig": {
355 |             "hashes": [
356 |                 "sha256:2d91e135bf72d31a410b17c16da610a82cb55f6b0477d1a902134b24a455b8b3",
357 |                 "sha256:b6a85871a79d2e3b22d2d1b94ac2824226a63c6b741c88f7ae975f18b6778374"
358 |             ],
359 |             "markers": "python_version >= '3.7'",
360 |             "version": "==2.0.0"
361 |         },
362 |         "packaging": {
363 |             "hashes": [
364 |                 "sha256:714ac14496c3e68c99c29b00845f7a2b85f3bb6f1078fd9f72fd20f0570002b2",
365 |                 "sha256:b6ad297f8907de0fa2fe1ccbd26fdaf387f5f47c7275fedf8cce89f99446cf97"
366 |             ],
367 |             "markers": "python_version >= '3.7'",
368 |             "version": "==23.0"
369 |         },
370 |         "pluggy": {
371 |             "hashes": [
372 |                 "sha256:4224373bacce55f955a878bf9cfa763c1e360858e330072059e10bad68531159",
373 |                 "sha256:74134bbf457f031a36d68416e1509f34bd5ccc019f0bcc952c7b909d06b37bd3"
374 |             ],
375 |             "markers": "python_version >= '3.6'",
376 |             "version": "==1.0.0"
377 |         },
378 |         "py": {
379 |             "hashes": [
380 |                 "sha256:51c75c4126074b472f746a24399ad32f6053d1b34b68d2fa41e558e6f4a98719",
381 |                 "sha256:607c53218732647dff4acdfcd50cb62615cedf612e72d1724fb1a0cc6405b378"
382 |             ],
383 |             "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'",
384 |             "version": "==1.11.0"
385 |         },
386 |         "pyparsing": {
387 |             "hashes": [
388 |                 "sha256:04ff808a5b90911829c55c4e26f75fa5ca8a2f5f36aa3a51f68e27033341d3e4",
389 |                 "sha256:d9bdec0013ef1eb5a84ab39a3b3868911598afa494f5faa038647101504e2b81"
390 |             ],
391 |             "markers": "python_version >= '3.6'",
392 |             "version": "==3.0.6"
393 |         },
394 |         "pytest": {
395 |             "hashes": [
396 |                 "sha256:131b36680866a76e6781d13f101efb86cf674ebb9762eb70d3082b6f29889e89",
397 |                 "sha256:7310f8d27bc79ced999e760ca304d69f6ba6c6649c0b60fb0e04a4a77cacc134"
398 |             ],
399 |             "index": "pypi",
400 |             "version": "==6.2.5"
401 |         },
402 |         "toml": {
403 |             "hashes": [
404 |                 "sha256:806143ae5bfb6a3c6e736a764057db0e6a0e05e338b5630894a5f779cabb4f9b",
405 |                 "sha256:b3bda1d108d5dd99f4a20d24d9c348e91c4db7ab1b749200bded2f839ccbe68f"
406 |             ],
407 |             "markers": "python_version >= '2.6' and python_version not in '3.0, 3.1, 3.2, 3.3'",
408 |             "version": "==0.10.2"
409 |         },
410 |         "typing-extensions": {
411 |             "hashes": [
412 |                 "sha256:2cdf80e4e04866a9b3689a51869016d36db0814d84b8d8a568d22781d45d27ed",
413 |                 "sha256:829704698b22e13ec9eaf959122315eabb370b0884400e9818334d8b677023d9"
414 |             ],
415 |             "markers": "python_version < '3.8'",
416 |             "version": "==4.0.0"
417 |         },
418 |         "zipp": {
419 |             "hashes": [
420 |                 "sha256:71c644c5369f4a6e07636f0aa966270449561fcea2e3d6747b8d23efaa9d7832",
421 |                 "sha256:9fe5ea21568a0a70e50f273397638d39b03353731e6cbbb3fd8502a33fec40bc"
422 |             ],
423 |             "markers": "python_version >= '3.6'",
424 |             "version": "==3.6.0"
425 |         }
426 |     }
427 | }
428 | 


--------------------------------------------------------------------------------
/README.rst:
--------------------------------------------------------------------------------
 1 | pyLDAvis
 2 | ========
 3 | 
 4 | Python library for interactive topic model visualization.
 5 | This is a port of the fabulous `R package <https://github.com/cpsievert/LDAvis>`_ by `Carson Sievert <https://cpsievert.me/>`__ and `Kenny Shirley <http://www.kennyshirley.com/>`__.
 6 | 
 7 | .. figure:: http://www.kennyshirley.com/figures/ldavis-pic.png
 8 |    :alt: LDAvis icon
 9 | 
10 | **pyLDAvis** is designed to help users interpret the topics in a topic model that has been fit to a corpus of text data. The package extracts information from a fitted LDA topic model to inform an interactive web-based visualization.
11 | 
12 | The visualization is intended to be used within an IPython notebook but can also be saved to a stand-alone HTML file for easy sharing.
13 | 
14 | Note: LDA stands for `latent Dirichlet allocation <https://en.wikipedia.org/wiki/Latent_Dirichlet_allocation>`_.
15 | 
16 | |version status| |build status| |docs|
17 | 
18 | Installation
19 | ~~~~~~~~~~~~~~~~~~~~~~
20 | 
21 | -  Stable version using pip:
22 | 
23 | ::
24 | 
25 |     pip install pyldavis
26 | 
27 | -  Development version on GitHub
28 | 
29 | Clone the repository and run ``python setup.py``
30 | 
31 | Usage
32 | ~~~~~~~~~~~~~~~~~~~~~~
33 | 
34 | The best way to learn how to use **pyLDAvis** is to see it in action.
35 | Check out this `notebook for an overview <http://nbviewer.ipython.org/github/bmabey/pyLDAvis/blob/master/notebooks/pyLDAvis_overview.ipynb>`__.
36 | Refer to the `documentation <https://pyLDAvis.readthedocs.org>`__ for details.
37 | 
38 | For a concise explanation of the visualization see this
39 | `vignette <http://cran.r-project.org/web/packages/LDAvis/vignettes/details.pdf>`__ from the LDAvis R package.
40 | 
41 | Video demos
42 | ~~~~~~~~~~~
43 | 
44 | Ben Mabey walked through the visualization in this short talk using a Hacker News corpus:
45 | 
46 | -  `Visualizing Topic Models <https://www.youtube.com/watch?v=tGxW2BzC_DU&index=4&list=PLykRMO7ZuHwP5cWnbEmP_mUIVgzd5DZgH>`__
47 | -  `Notebook and visualization used in the demo <http://nbviewer.ipython.org/github/bmabey/hacker_news_topic_modelling/blob/master/HN%20Topic%20Model%20Talk.ipynb>`__
48 | -  `Slide deck <https://speakerdeck.com/bmabey/visualizing-topic-models>`__
49 | 
50 | 
51 | `Carson Sievert <https://cpsievert.me/>`__ created a video demoing the R package. The visualization is the same and so it applies equally to pyLDAvis:
52 | 
53 | -  `Visualizing & Exploring the Twenty Newsgroup Data <https://www.youtube.com/watch?v=IksL96ls4o0>`__
54 | 
55 | More documentation
56 | ~~~~~~~~~~~~~~~~~~
57 | 
58 | To read about the methodology behind pyLDAvis, see `the original
59 | paper <http://nlp.stanford.edu/events/illvi2014/papers/sievert-illvi2014.pdf>`__,
60 | which was presented at the `2014 ACL Workshop on Interactive Language
61 | Learning, Visualization, and
62 | Interfaces <http://nlp.stanford.edu/events/illvi2014/>`__ in Baltimore
63 | on June 27, 2014.
64 | 
65 | 
66 | 
67 | 
68 | .. |version status| image:: https://img.shields.io/pypi/v/pyLDAvis.svg
69 |    :target: https://pypi.python.org/pypi/pyLDAvis
70 | .. |build status| image:: https://travis-ci.org/bmabey/pyLDAvis.png?branch=master
71 |    :target: https://travis-ci.org/bmabey/pyLDAvis
72 | .. |docs| image:: https://readthedocs.org/projects/pyldavis/badge/?version=latest
73 |    :target: https://pyLDAvis.readthedocs.org
74 | 


--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
  1 | # Makefile for Sphinx documentation
  2 | #
  3 | 
  4 | # You can set these variables from the command line.
  5 | SPHINXOPTS    =
  6 | SPHINXBUILD   = sphinx-build
  7 | PAPER         =
  8 | BUILDDIR      = _build
  9 | 
 10 | # User-friendly check for sphinx-build
 11 | ifeq ($(shell which $(SPHINXBUILD) >/dev/null 2>&1; echo $$?), 1)
 12 | $(error The '$(SPHINXBUILD)' command was not found. Make sure you have Sphinx installed, then set the SPHINXBUILD environment variable to point to the full path of the '$(SPHINXBUILD)' executable. Alternatively you can add the directory with the executable to your PATH. If you don't have Sphinx installed, grab it from http://sphinx-doc.org/)
 13 | endif
 14 | 
 15 | # Internal variables.
 16 | PAPEROPT_a4     = -D latex_paper_size=a4
 17 | PAPEROPT_letter = -D latex_paper_size=letter
 18 | ALLSPHINXOPTS   = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) .
 19 | # the i18n builder cannot share the environment and doctrees with the others
 20 | I18NSPHINXOPTS  = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) .
 21 | 
 22 | .PHONY: help clean html dirhtml singlehtml pickle json htmlhelp qthelp devhelp epub latex latexpdf text man changes linkcheck doctest gettext
 23 | 
 24 | help:
 25 | 	@echo "Please use \`make <target>' where <target> is one of"
 26 | 	@echo "  html       to make standalone HTML files"
 27 | 	@echo "  dirhtml    to make HTML files named index.html in directories"
 28 | 	@echo "  singlehtml to make a single large HTML file"
 29 | 	@echo "  pickle     to make pickle files"
 30 | 	@echo "  json       to make JSON files"
 31 | 	@echo "  htmlhelp   to make HTML files and a HTML help project"
 32 | 	@echo "  qthelp     to make HTML files and a qthelp project"
 33 | 	@echo "  devhelp    to make HTML files and a Devhelp project"
 34 | 	@echo "  epub       to make an epub"
 35 | 	@echo "  latex      to make LaTeX files, you can set PAPER=a4 or PAPER=letter"
 36 | 	@echo "  latexpdf   to make LaTeX files and run them through pdflatex"
 37 | 	@echo "  latexpdfja to make LaTeX files and run them through platex/dvipdfmx"
 38 | 	@echo "  text       to make text files"
 39 | 	@echo "  man        to make manual pages"
 40 | 	@echo "  texinfo    to make Texinfo files"
 41 | 	@echo "  info       to make Texinfo files and run them through makeinfo"
 42 | 	@echo "  gettext    to make PO message catalogs"
 43 | 	@echo "  changes    to make an overview of all changed/added/deprecated items"
 44 | 	@echo "  xml        to make Docutils-native XML files"
 45 | 	@echo "  pseudoxml  to make pseudoxml-XML files for display purposes"
 46 | 	@echo "  linkcheck  to check all external links for integrity"
 47 | 	@echo "  doctest    to run all doctests embedded in the documentation (if enabled)"
 48 | 
 49 | clean:
 50 | 	rm -rf $(BUILDDIR)/*
 51 | 
 52 | html:
 53 | 	$(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html
 54 | 	@echo
 55 | 	@echo "Build finished. The HTML pages are in $(BUILDDIR)/html."
 56 | 
 57 | dirhtml:
 58 | 	$(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml
 59 | 	@echo
 60 | 	@echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml."
 61 | 
 62 | singlehtml:
 63 | 	$(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml
 64 | 	@echo
 65 | 	@echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml."
 66 | 
 67 | pickle:
 68 | 	$(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle
 69 | 	@echo
 70 | 	@echo "Build finished; now you can process the pickle files."
 71 | 
 72 | json:
 73 | 	$(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json
 74 | 	@echo
 75 | 	@echo "Build finished; now you can process the JSON files."
 76 | 
 77 | htmlhelp:
 78 | 	$(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp
 79 | 	@echo
 80 | 	@echo "Build finished; now you can run HTML Help Workshop with the" \
 81 | 	      ".hhp project file in $(BUILDDIR)/htmlhelp."
 82 | 
 83 | qthelp:
 84 | 	$(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp
 85 | 	@echo
 86 | 	@echo "Build finished; now you can run "qcollectiongenerator" with the" \
 87 | 	      ".qhcp project file in $(BUILDDIR)/qthelp, like this:"
 88 | 	@echo "# qcollectiongenerator $(BUILDDIR)/qthelp/pyLDAvis.qhcp"
 89 | 	@echo "To view the help file:"
 90 | 	@echo "# assistant -collectionFile $(BUILDDIR)/qthelp/pyLDAvis.qhc"
 91 | 
 92 | devhelp:
 93 | 	$(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp
 94 | 	@echo
 95 | 	@echo "Build finished."
 96 | 	@echo "To view the help file:"
 97 | 	@echo "# mkdir -p $$HOME/.local/share/devhelp/pyLDAvis"
 98 | 	@echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/pyLDAvis"
 99 | 	@echo "# devhelp"
100 | 
101 | epub:
102 | 	$(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub
103 | 	@echo
104 | 	@echo "Build finished. The epub file is in $(BUILDDIR)/epub."
105 | 
106 | latex:
107 | 	$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
108 | 	@echo
109 | 	@echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex."
110 | 	@echo "Run \`make' in that directory to run these through (pdf)latex" \
111 | 	      "(use \`make latexpdf' here to do that automatically)."
112 | 
113 | latexpdf:
114 | 	$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
115 | 	@echo "Running LaTeX files through pdflatex..."
116 | 	$(MAKE) -C $(BUILDDIR)/latex all-pdf
117 | 	@echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex."
118 | 
119 | latexpdfja:
120 | 	$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
121 | 	@echo "Running LaTeX files through platex and dvipdfmx..."
122 | 	$(MAKE) -C $(BUILDDIR)/latex all-pdf-ja
123 | 	@echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex."
124 | 
125 | text:
126 | 	$(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text
127 | 	@echo
128 | 	@echo "Build finished. The text files are in $(BUILDDIR)/text."
129 | 
130 | man:
131 | 	$(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man
132 | 	@echo
133 | 	@echo "Build finished. The manual pages are in $(BUILDDIR)/man."
134 | 
135 | texinfo:
136 | 	$(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
137 | 	@echo
138 | 	@echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo."
139 | 	@echo "Run \`make' in that directory to run these through makeinfo" \
140 | 	      "(use \`make info' here to do that automatically)."
141 | 
142 | info:
143 | 	$(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
144 | 	@echo "Running Texinfo files through makeinfo..."
145 | 	make -C $(BUILDDIR)/texinfo info
146 | 	@echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo."
147 | 
148 | gettext:
149 | 	$(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale
150 | 	@echo
151 | 	@echo "Build finished. The message catalogs are in $(BUILDDIR)/locale."
152 | 
153 | changes:
154 | 	$(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes
155 | 	@echo
156 | 	@echo "The overview file is in $(BUILDDIR)/changes."
157 | 
158 | linkcheck:
159 | 	$(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck
160 | 	@echo
161 | 	@echo "Link check complete; look for any errors in the above output " \
162 | 	      "or in $(BUILDDIR)/linkcheck/output.txt."
163 | 
164 | doctest:
165 | 	$(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest
166 | 	@echo "Testing of doctests in the sources finished, look at the " \
167 | 	      "results in $(BUILDDIR)/doctest/output.txt."
168 | 
169 | xml:
170 | 	$(SPHINXBUILD) -b xml $(ALLSPHINXOPTS) $(BUILDDIR)/xml
171 | 	@echo
172 | 	@echo "Build finished. The XML files are in $(BUILDDIR)/xml."
173 | 
174 | pseudoxml:
175 | 	$(SPHINXBUILD) -b pseudoxml $(ALLSPHINXOPTS) $(BUILDDIR)/pseudoxml
176 | 	@echo
177 | 	@echo "Build finished. The pseudo-XML files are in $(BUILDDIR)/pseudoxml."
178 | 


--------------------------------------------------------------------------------
/docs/authors.rst:
--------------------------------------------------------------------------------
1 | .. include:: ../AUTHORS.rst
2 | 


--------------------------------------------------------------------------------
/docs/conf.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- coding: utf-8 -*-
  3 | #
  4 | # pyLDAvis documentation build configuration file, created by
  5 | # sphinx-quickstart on Tue Jul  9 22:26:36 2013.
  6 | #
  7 | # This file is execfile()d with the current directory set to its
  8 | # containing dir.
  9 | #
 10 | # Note that not all possible configuration values are present in this
 11 | # autogenerated file.
 12 | #
 13 | # All configuration values have a default; values that are commented out
 14 | # serve to show the default.
 15 | 
 16 | import sys
 17 | import os
 18 | 
 19 | import mock
 20 | 
 21 | # If extensions (or modules to document with autodoc) are in another directory,
 22 | # add these directories to sys.path here. If the directory is relative to the
 23 | # documentation root, use os.path.abspath to make it absolute, like shown here.
 24 | #sys.path.insert(0, os.path.abspath('.'))
 25 | 
 26 | # Get the project root dir, which is the parent dir of this
 27 | cwd = os.getcwd()
 28 | project_root = os.path.dirname(cwd)
 29 | 
 30 | # Insert the project root dir as the first element in the PYTHONPATH.
 31 | # This lets us ensure that the source package is imported, and that its
 32 | # version is used.
 33 | sys.path.insert(0, project_root)
 34 | 
 35 | MOCK_MODULES = ['numpy','joblib', 'funcy', 'scipy', 'scipy.stats', 'scipy.spatial',
 36 |                 'scipy.spatial.distance', 'pandas', 'skbio', 'skbio.stats',
 37 |                 'skbio.stats.distance', 'skbio.stats.ordination']
 38 | for mod_name in MOCK_MODULES:
 39 |     sys.modules[mod_name] = mock.Mock()
 40 | 
 41 | import pyLDAvis
 42 | 
 43 | 
 44 | 
 45 | # -- General configuration ---------------------------------------------
 46 | 
 47 | # If your documentation needs a minimal Sphinx version, state it here.
 48 | #needs_sphinx = '1.0'
 49 | 
 50 | # Add any Sphinx extension module names here, as strings. They can be
 51 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom ones.
 52 | extensions = [
 53 |     'sphinx.ext.autodoc',
 54 |     'sphinx.ext.autosummary',
 55 |     'sphinx.ext.doctest',
 56 |     'sphinx.ext.coverage',
 57 |     'sphinx.ext.viewcode',
 58 |     'numpydoc']
 59 | 
 60 | # Add any paths that contain templates here, relative to this directory.
 61 | templates_path = ['_templates']
 62 | 
 63 | # The suffix of source filenames.
 64 | source_suffix = '.rst'
 65 | 
 66 | # The encoding of source files.
 67 | #source_encoding = 'utf-8-sig'
 68 | 
 69 | # The master toctree document.
 70 | master_doc = 'index'
 71 | 
 72 | # General information about the project.
 73 | project = u'pyLDAvis'
 74 | copyright = u'2015, Ben Mabey'
 75 | 
 76 | # The version info for the project you're documenting, acts as replacement
 77 | # for |version| and |release|, also used in various other places throughout
 78 | # the built documents.
 79 | #
 80 | # The short X.Y version.
 81 | version = pyLDAvis.__version__
 82 | # The full version, including alpha/beta/rc tags.
 83 | release = pyLDAvis.__version__
 84 | 
 85 | # The language for content autogenerated by Sphinx. Refer to documentation
 86 | # for a list of supported languages.
 87 | #language = None
 88 | 
 89 | # There are two options for replacing |today|: either, you set today to
 90 | # some non-false value, then it is used:
 91 | #today = ''
 92 | # Else, today_fmt is used as the format for a strftime call.
 93 | #today_fmt = '%B %d, %Y'
 94 | 
 95 | # List of patterns, relative to source directory, that match files and
 96 | # directories to ignore when looking for source files.
 97 | exclude_patterns = ['_build']
 98 | 
 99 | # The reST default role (used for this markup: `text`) to use for all
100 | # documents.
101 | #default_role = None
102 | 
103 | # If true, '()' will be appended to :func: etc. cross-reference text.
104 | #add_function_parentheses = True
105 | 
106 | # If true, the current module name will be prepended to all description
107 | # unit titles (such as .. function::).
108 | #add_module_names = True
109 | 
110 | # If true, sectionauthor and moduleauthor directives will be shown in the
111 | # output. They are ignored by default.
112 | #show_authors = False
113 | 
114 | # The name of the Pygments (syntax highlighting) style to use.
115 | pygments_style = 'sphinx'
116 | 
117 | # A list of ignored prefixes for module index sorting.
118 | #modindex_common_prefix = []
119 | 
120 | # If true, keep warnings as "system message" paragraphs in the built
121 | # documents.
122 | #keep_warnings = False
123 | 
124 | 
125 | # -- Options for HTML output -------------------------------------------
126 | 
127 | # The theme to use for HTML and HTML Help pages.  See the documentation for
128 | # a list of builtin themes.
129 | html_theme = 'default'
130 | 
131 | # Theme options are theme-specific and customize the look and feel of a
132 | # theme further.  For a list of options available for each theme, see the
133 | # documentation.
134 | #html_theme_options = {}
135 | 
136 | # Add any paths that contain custom themes here, relative to this directory.
137 | #html_theme_path = []
138 | 
139 | # The name for this set of Sphinx documents.  If None, it defaults to
140 | # "<project> v<release> documentation".
141 | #html_title = None
142 | 
143 | # A shorter title for the navigation bar.  Default is the same as
144 | # html_title.
145 | #html_short_title = None
146 | 
147 | # The name of an image file (relative to this directory) to place at the
148 | # top of the sidebar.
149 | #html_logo = None
150 | 
151 | # The name of an image file (within the static path) to use as favicon
152 | # of the docs.  This file should be a Windows icon file (.ico) being
153 | # 16x16 or 32x32 pixels large.
154 | #html_favicon = None
155 | 
156 | # Add any paths that contain custom static files (such as style sheets)
157 | # here, relative to this directory. They are copied after the builtin
158 | # static files, so a file named "default.css" will overwrite the builtin
159 | # "default.css".
160 | html_static_path = ['_static']
161 | 
162 | # If not '', a 'Last updated on:' timestamp is inserted at every page
163 | # bottom, using the given strftime format.
164 | #html_last_updated_fmt = '%b %d, %Y'
165 | 
166 | # If true, SmartyPants will be used to convert quotes and dashes to
167 | # typographically correct entities.
168 | #html_use_smartypants = True
169 | 
170 | # Custom sidebar templates, maps document names to template names.
171 | #html_sidebars = {}
172 | 
173 | # Additional templates that should be rendered to pages, maps page names
174 | # to template names.
175 | #html_additional_pages = {}
176 | 
177 | # If false, no module index is generated.
178 | #html_domain_indices = True
179 | 
180 | # If false, no index is generated.
181 | #html_use_index = True
182 | 
183 | # If true, the index is split into individual pages for each letter.
184 | #html_split_index = False
185 | 
186 | # If true, links to the reST sources are added to the pages.
187 | #html_show_sourcelink = True
188 | 
189 | # If true, "Created using Sphinx" is shown in the HTML footer.
190 | # Default is True.
191 | #html_show_sphinx = True
192 | 
193 | # If true, "(C) Copyright ..." is shown in the HTML footer.
194 | # Default is True.
195 | #html_show_copyright = True
196 | 
197 | # If true, an OpenSearch description file will be output, and all pages
198 | # will contain a <link> tag referring to it.  The value of this option
199 | # must be the base URL from which the finished HTML is served.
200 | #html_use_opensearch = ''
201 | 
202 | # This is the file name suffix for HTML files (e.g. ".xhtml").
203 | #html_file_suffix = None
204 | 
205 | # Output file base name for HTML help builder.
206 | htmlhelp_basename = 'pyLDAvisdoc'
207 | 
208 | 
209 | # -- Options for LaTeX output ------------------------------------------
210 | 
211 | latex_elements = {
212 |     # The paper size ('letterpaper' or 'a4paper').
213 |     #'papersize': 'letterpaper',
214 | 
215 |     # The font size ('10pt', '11pt' or '12pt').
216 |     #'pointsize': '10pt',
217 | 
218 |     # Additional stuff for the LaTeX preamble.
219 |     #'preamble': '',
220 | }
221 | 
222 | # Grouping the document tree into LaTeX files. List of tuples
223 | # (source start file, target name, title, author, documentclass
224 | # [howto/manual]).
225 | latex_documents = [
226 |     ('index', 'pyLDAvis.tex',
227 |      u'pyLDAvis Documentation',
228 |      u'Ben Mabey', 'manual'),
229 | ]
230 | 
231 | # The name of an image file (relative to this directory) to place at
232 | # the top of the title page.
233 | #latex_logo = None
234 | 
235 | # For "manual" documents, if this is true, then toplevel headings
236 | # are parts, not chapters.
237 | #latex_use_parts = False
238 | 
239 | # If true, show page references after internal links.
240 | #latex_show_pagerefs = False
241 | 
242 | # If true, show URL addresses after external links.
243 | #latex_show_urls = False
244 | 
245 | # Documents to append as an appendix to all manuals.
246 | #latex_appendices = []
247 | 
248 | # If false, no module index is generated.
249 | #latex_domain_indices = True
250 | 
251 | 
252 | # -- Options for manual page output ------------------------------------
253 | 
254 | # One entry per manual page. List of tuples
255 | # (source start file, name, description, authors, manual section).
256 | man_pages = [
257 |     ('index', 'pyLDAvis',
258 |      u'pyLDAvis Documentation',
259 |      [u'Ben Mabey'], 1)
260 | ]
261 | 
262 | # If true, show URL addresses after external links.
263 | #man_show_urls = False
264 | 
265 | 
266 | # -- Options for Texinfo output ----------------------------------------
267 | 
268 | # Grouping the document tree into Texinfo files. List of tuples
269 | # (source start file, target name, title, author,
270 | #  dir menu entry, description, category)
271 | texinfo_documents = [
272 |     ('index', 'pyLDAvis',
273 |      u'pyLDAvis Documentation',
274 |      u'Ben Mabey',
275 |      'pyLDAvis',
276 |      'One line description of project.',
277 |      'Miscellaneous'),
278 | ]
279 | 
280 | # Documents to append as an appendix to all manuals.
281 | #texinfo_appendices = []
282 | 
283 | # If false, no module index is generated.
284 | #texinfo_domain_indices = True
285 | 
286 | # How to display URL addresses: 'footnote', 'no', or 'inline'.
287 | #texinfo_show_urls = 'footnote'
288 | 
289 | # If true, do not generate a @detailmenu in the "Top" node's menu.
290 | #texinfo_no_detailmenu = False
291 | 


--------------------------------------------------------------------------------
/docs/contributing.rst:
--------------------------------------------------------------------------------
1 | .. include:: ../CONTRIBUTING.rst
2 | 


--------------------------------------------------------------------------------
/docs/history.rst:
--------------------------------------------------------------------------------
1 | .. include:: ../HISTORY.rst
2 | 


--------------------------------------------------------------------------------
/docs/index.rst:
--------------------------------------------------------------------------------
 1 | .. pyLDAvis documentation master file, created by
 2 |    sphinx-quickstart on Tue Jul  9 22:26:36 2013.
 3 |    You can adapt this file completely to your liking, but it should at least
 4 |    contain the root `toctree` directive.
 5 | 
 6 | Welcome to pyLDAvis's documentation!
 7 | ======================================
 8 | 
 9 | Contents:
10 | 
11 | .. toctree::
12 |    :maxdepth: 2
13 | 
14 |    readme
15 |    installation
16 |    usage
17 |    contributing
18 |    authors
19 |    modules/API
20 |    history
21 | 
22 | Indices and tables
23 | ==================
24 | 
25 | * :ref:`genindex`
26 | * :ref:`modindex`
27 | * :ref:`search`
28 | 
29 | 


--------------------------------------------------------------------------------
/docs/make.bat:
--------------------------------------------------------------------------------
  1 | @ECHO OFF
  2 | 
  3 | REM Command file for Sphinx documentation
  4 | 
  5 | if "%SPHINXBUILD%" == "" (
  6 | 	set SPHINXBUILD=sphinx-build
  7 | )
  8 | set BUILDDIR=_build
  9 | set ALLSPHINXOPTS=-d %BUILDDIR%/doctrees %SPHINXOPTS% .
 10 | set I18NSPHINXOPTS=%SPHINXOPTS% .
 11 | if NOT "%PAPER%" == "" (
 12 | 	set ALLSPHINXOPTS=-D latex_paper_size=%PAPER% %ALLSPHINXOPTS%
 13 | 	set I18NSPHINXOPTS=-D latex_paper_size=%PAPER% %I18NSPHINXOPTS%
 14 | )
 15 | 
 16 | if "%1" == "" goto help
 17 | 
 18 | if "%1" == "help" (
 19 | 	:help
 20 | 	echo.Please use `make ^<target^>` where ^<target^> is one of
 21 | 	echo.  html       to make standalone HTML files
 22 | 	echo.  dirhtml    to make HTML files named index.html in directories
 23 | 	echo.  singlehtml to make a single large HTML file
 24 | 	echo.  pickle     to make pickle files
 25 | 	echo.  json       to make JSON files
 26 | 	echo.  htmlhelp   to make HTML files and a HTML help project
 27 | 	echo.  qthelp     to make HTML files and a qthelp project
 28 | 	echo.  devhelp    to make HTML files and a Devhelp project
 29 | 	echo.  epub       to make an epub
 30 | 	echo.  latex      to make LaTeX files, you can set PAPER=a4 or PAPER=letter
 31 | 	echo.  text       to make text files
 32 | 	echo.  man        to make manual pages
 33 | 	echo.  texinfo    to make Texinfo files
 34 | 	echo.  gettext    to make PO message catalogs
 35 | 	echo.  changes    to make an overview over all changed/added/deprecated items
 36 | 	echo.  xml        to make Docutils-native XML files
 37 | 	echo.  pseudoxml  to make pseudoxml-XML files for display purposes
 38 | 	echo.  linkcheck  to check all external links for integrity
 39 | 	echo.  doctest    to run all doctests embedded in the documentation if enabled
 40 | 	goto end
 41 | )
 42 | 
 43 | if "%1" == "clean" (
 44 | 	for /d %%i in (%BUILDDIR%\*) do rmdir /q /s %%i
 45 | 	del /q /s %BUILDDIR%\*
 46 | 	goto end
 47 | )
 48 | 
 49 | 
 50 | %SPHINXBUILD% 2> nul
 51 | if errorlevel 9009 (
 52 | 	echo.
 53 | 	echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
 54 | 	echo.installed, then set the SPHINXBUILD environment variable to point
 55 | 	echo.to the full path of the 'sphinx-build' executable. Alternatively you
 56 | 	echo.may add the Sphinx directory to PATH.
 57 | 	echo.
 58 | 	echo.If you don't have Sphinx installed, grab it from
 59 | 	echo.http://sphinx-doc.org/
 60 | 	exit /b 1
 61 | )
 62 | 
 63 | if "%1" == "html" (
 64 | 	%SPHINXBUILD% -b html %ALLSPHINXOPTS% %BUILDDIR%/html
 65 | 	if errorlevel 1 exit /b 1
 66 | 	echo.
 67 | 	echo.Build finished. The HTML pages are in %BUILDDIR%/html.
 68 | 	goto end
 69 | )
 70 | 
 71 | if "%1" == "dirhtml" (
 72 | 	%SPHINXBUILD% -b dirhtml %ALLSPHINXOPTS% %BUILDDIR%/dirhtml
 73 | 	if errorlevel 1 exit /b 1
 74 | 	echo.
 75 | 	echo.Build finished. The HTML pages are in %BUILDDIR%/dirhtml.
 76 | 	goto end
 77 | )
 78 | 
 79 | if "%1" == "singlehtml" (
 80 | 	%SPHINXBUILD% -b singlehtml %ALLSPHINXOPTS% %BUILDDIR%/singlehtml
 81 | 	if errorlevel 1 exit /b 1
 82 | 	echo.
 83 | 	echo.Build finished. The HTML pages are in %BUILDDIR%/singlehtml.
 84 | 	goto end
 85 | )
 86 | 
 87 | if "%1" == "pickle" (
 88 | 	%SPHINXBUILD% -b pickle %ALLSPHINXOPTS% %BUILDDIR%/pickle
 89 | 	if errorlevel 1 exit /b 1
 90 | 	echo.
 91 | 	echo.Build finished; now you can process the pickle files.
 92 | 	goto end
 93 | )
 94 | 
 95 | if "%1" == "json" (
 96 | 	%SPHINXBUILD% -b json %ALLSPHINXOPTS% %BUILDDIR%/json
 97 | 	if errorlevel 1 exit /b 1
 98 | 	echo.
 99 | 	echo.Build finished; now you can process the JSON files.
100 | 	goto end
101 | )
102 | 
103 | if "%1" == "htmlhelp" (
104 | 	%SPHINXBUILD% -b htmlhelp %ALLSPHINXOPTS% %BUILDDIR%/htmlhelp
105 | 	if errorlevel 1 exit /b 1
106 | 	echo.
107 | 	echo.Build finished; now you can run HTML Help Workshop with the ^
108 | .hhp project file in %BUILDDIR%/htmlhelp.
109 | 	goto end
110 | )
111 | 
112 | if "%1" == "qthelp" (
113 | 	%SPHINXBUILD% -b qthelp %ALLSPHINXOPTS% %BUILDDIR%/qthelp
114 | 	if errorlevel 1 exit /b 1
115 | 	echo.
116 | 	echo.Build finished; now you can run "qcollectiongenerator" with the ^
117 | .qhcp project file in %BUILDDIR%/qthelp, like this:
118 | 	echo.^> qcollectiongenerator %BUILDDIR%\qthelp\pyLDAvis.qhcp
119 | 	echo.To view the help file:
120 | 	echo.^> assistant -collectionFile %BUILDDIR%\qthelp\pyLDAvis.ghc
121 | 	goto end
122 | )
123 | 
124 | if "%1" == "devhelp" (
125 | 	%SPHINXBUILD% -b devhelp %ALLSPHINXOPTS% %BUILDDIR%/devhelp
126 | 	if errorlevel 1 exit /b 1
127 | 	echo.
128 | 	echo.Build finished.
129 | 	goto end
130 | )
131 | 
132 | if "%1" == "epub" (
133 | 	%SPHINXBUILD% -b epub %ALLSPHINXOPTS% %BUILDDIR%/epub
134 | 	if errorlevel 1 exit /b 1
135 | 	echo.
136 | 	echo.Build finished. The epub file is in %BUILDDIR%/epub.
137 | 	goto end
138 | )
139 | 
140 | if "%1" == "latex" (
141 | 	%SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex
142 | 	if errorlevel 1 exit /b 1
143 | 	echo.
144 | 	echo.Build finished; the LaTeX files are in %BUILDDIR%/latex.
145 | 	goto end
146 | )
147 | 
148 | if "%1" == "latexpdf" (
149 | 	%SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex
150 | 	cd %BUILDDIR%/latex
151 | 	make all-pdf
152 | 	cd %BUILDDIR%/..
153 | 	echo.
154 | 	echo.Build finished; the PDF files are in %BUILDDIR%/latex.
155 | 	goto end
156 | )
157 | 
158 | if "%1" == "latexpdfja" (
159 | 	%SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex
160 | 	cd %BUILDDIR%/latex
161 | 	make all-pdf-ja
162 | 	cd %BUILDDIR%/..
163 | 	echo.
164 | 	echo.Build finished; the PDF files are in %BUILDDIR%/latex.
165 | 	goto end
166 | )
167 | 
168 | if "%1" == "text" (
169 | 	%SPHINXBUILD% -b text %ALLSPHINXOPTS% %BUILDDIR%/text
170 | 	if errorlevel 1 exit /b 1
171 | 	echo.
172 | 	echo.Build finished. The text files are in %BUILDDIR%/text.
173 | 	goto end
174 | )
175 | 
176 | if "%1" == "man" (
177 | 	%SPHINXBUILD% -b man %ALLSPHINXOPTS% %BUILDDIR%/man
178 | 	if errorlevel 1 exit /b 1
179 | 	echo.
180 | 	echo.Build finished. The manual pages are in %BUILDDIR%/man.
181 | 	goto end
182 | )
183 | 
184 | if "%1" == "texinfo" (
185 | 	%SPHINXBUILD% -b texinfo %ALLSPHINXOPTS% %BUILDDIR%/texinfo
186 | 	if errorlevel 1 exit /b 1
187 | 	echo.
188 | 	echo.Build finished. The Texinfo files are in %BUILDDIR%/texinfo.
189 | 	goto end
190 | )
191 | 
192 | if "%1" == "gettext" (
193 | 	%SPHINXBUILD% -b gettext %I18NSPHINXOPTS% %BUILDDIR%/locale
194 | 	if errorlevel 1 exit /b 1
195 | 	echo.
196 | 	echo.Build finished. The message catalogs are in %BUILDDIR%/locale.
197 | 	goto end
198 | )
199 | 
200 | if "%1" == "changes" (
201 | 	%SPHINXBUILD% -b changes %ALLSPHINXOPTS% %BUILDDIR%/changes
202 | 	if errorlevel 1 exit /b 1
203 | 	echo.
204 | 	echo.The overview file is in %BUILDDIR%/changes.
205 | 	goto end
206 | )
207 | 
208 | if "%1" == "linkcheck" (
209 | 	%SPHINXBUILD% -b linkcheck %ALLSPHINXOPTS% %BUILDDIR%/linkcheck
210 | 	if errorlevel 1 exit /b 1
211 | 	echo.
212 | 	echo.Link check complete; look for any errors in the above output ^
213 | or in %BUILDDIR%/linkcheck/output.txt.
214 | 	goto end
215 | )
216 | 
217 | if "%1" == "doctest" (
218 | 	%SPHINXBUILD% -b doctest %ALLSPHINXOPTS% %BUILDDIR%/doctest
219 | 	if errorlevel 1 exit /b 1
220 | 	echo.
221 | 	echo.Testing of doctests in the sources finished, look at the ^
222 | results in %BUILDDIR%/doctest/output.txt.
223 | 	goto end
224 | )
225 | 
226 | if "%1" == "xml" (
227 | 	%SPHINXBUILD% -b xml %ALLSPHINXOPTS% %BUILDDIR%/xml
228 | 	if errorlevel 1 exit /b 1
229 | 	echo.
230 | 	echo.Build finished. The XML files are in %BUILDDIR%/xml.
231 | 	goto end
232 | )
233 | 
234 | if "%1" == "pseudoxml" (
235 | 	%SPHINXBUILD% -b pseudoxml %ALLSPHINXOPTS% %BUILDDIR%/pseudoxml
236 | 	if errorlevel 1 exit /b 1
237 | 	echo.
238 | 	echo.Build finished. The pseudo-XML files are in %BUILDDIR%/pseudoxml.
239 | 	goto end
240 | )
241 | 
242 | :end
243 | 


--------------------------------------------------------------------------------
/docs/modules/API.rst:
--------------------------------------------------------------------------------
 1 | API documentation
 2 | =================
 3 | 
 4 | .. automodule:: pyLDAvis
 5 |    :members:
 6 | 
 7 | .. automodule:: pyLDAvis.gensim
 8 |    :members:
 9 | 
10 | .. automodule:: pyLDAvis.graphlab
11 |    :members:
12 | 
13 | .. automodule:: pyLDAvis.utils
14 |    :members:
15 | 
16 | .. automodule:: pyLDAvis.urls
17 |    :members:
18 | 


--------------------------------------------------------------------------------
/docs/readme.rst:
--------------------------------------------------------------------------------
1 | .. include:: ../README.rst
2 | 


--------------------------------------------------------------------------------
/notebooks/data/ap_input.json:
--------------------------------------------------------------------------------
1 | ../../tests/data/ap_input.json


--------------------------------------------------------------------------------
/notebooks/data/jeopardy_input.json:
--------------------------------------------------------------------------------
1 | ../../tests/data/jeopardy_input.json


--------------------------------------------------------------------------------
/notebooks/data/movie_reviews_input.json:
--------------------------------------------------------------------------------
1 | ../../tests/data/movie_reviews_input.json


--------------------------------------------------------------------------------
/notebooks/pyLDAvis:
--------------------------------------------------------------------------------
1 | ../pyLDAvis


--------------------------------------------------------------------------------
/pyLDAvis/__init__.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | Topic Models (e.g. LDA) visualization using D3
 4 | =============================================
 5 | 
 6 | Functions: General Use
 7 | ----------------------
 8 | :func:`prepare`
 9 |     transform and prepare a LDA model's data for visualization
10 | 
11 | :func:`prepared_data_to_html`
12 |     convert prepared data to an html string
13 | 
14 | :func:`show`
15 |     launch a web server to view the visualization
16 | 
17 | :func:`save_html`
18 |     save a visualization to a standalone html file
19 | 
20 | :func:`save_json`
21 |     save the visualization JSON data of to a file
22 | 
23 | 
24 | Functions: IPython Notebook
25 | ---------------------------
26 | :func:`display`
27 |     display a figure in an IPython notebook
28 | 
29 | :func:`enable_notebook`
30 |     enable automatic D3 display of prepared model data in the IPython notebook.
31 | 
32 | :func:`disable_notebook`
33 |     disable automatic D3 display of prepared model data in the IPython notebook.
34 | """
35 | 
36 | __all__ = ["__version__",
37 |            "prepare", "js_PCoA",
38 |            "PreparedData", "prepared_data_to_html",
39 |            "display", "show", "save_html", "save_json",
40 |            "enable_notebook", "disable_notebook"]
41 | 
42 | __version__ = "3.4.1"
43 | 
44 | from pyLDAvis._display import *
45 | from pyLDAvis._prepare import prepare, js_PCoA, PreparedData
46 | 


--------------------------------------------------------------------------------
/pyLDAvis/_display.py:
--------------------------------------------------------------------------------
  1 | # this file is largely based on https://github.com/jakevdp/mpld3/blob/master/mpld3/_display.py
  2 | # Copyright (c) 2013, Jake Vanderplas
  3 | # It was adapted for pyLDAvis by Ben Mabey
  4 | import warnings
  5 | import random
  6 | import json
  7 | import jinja2
  8 | import re
  9 | from pyLDAvis._server import serve
 10 | from pyLDAvis.utils import get_id, write_ipynb_local_js, NumPyEncoder
 11 | from pyLDAvis._prepare import PreparedData
 12 | import pyLDAvis.urls as urls
 13 | 
 14 | __all__ = ["prepared_data_to_html", "display",
 15 |            "show", "save_html", "save_json",
 16 |            "enable_notebook", "disable_notebook"]
 17 | 
 18 | 
 19 | # Simple HTML template. This works in standalone web pages for single visualizations,
 20 | # but will not work within the IPython notebook due to the presence of
 21 | # requirejs
 22 | SIMPLE_HTML = jinja2.Template("""
 23 | <script type="text/javascript" src="{{ d3_url }}"></script>
 24 | <script type="text/javascript" src="{{ ldavis_url }}"></script>
 25 | <link rel="stylesheet" type="text/css" href="{{ ldavis_css_url }}">
 26 | 
 27 | <div id={{ visid }} style="background-color:white;"></div>
 28 | <script type="text/javascript">
 29 |    !function(LDAvis){
 30 |        new LDAvis("#" + {{ visid }}, {{ vis_json }});
 31 |    }(LDAvis);
 32 | </script>
 33 | """)
 34 | 
 35 | 
 36 | # RequireJS template.  If requirejs and jquery are not defined, this will
 37 | # result in an error.  This is suitable for use within the IPython notebook.
 38 | REQUIREJS_HTML = jinja2.Template("""
 39 | 
 40 | <link rel="stylesheet" type="text/css" href="{{ ldavis_css_url }}">
 41 | 
 42 | <div id={{ visid }} style="background-color:white;"></div>
 43 | <script type="text/javascript">
 44 | 
 45 | var {{ visid_raw }}_data = {{ vis_json }};
 46 | if(typeof(window.LDAvis) !== "undefined"){
 47 |    !function(LDAvis){
 48 |        new LDAvis("#" + {{ visid }}, {{ visid_raw }}_data);
 49 |    }(LDAvis);
 50 | }else{
 51 |   require.config({paths: {d3: "{{ d3_url[:-3] }}"}});
 52 |   require(["d3"], function(d3){
 53 |     window.d3 = d3;
 54 |     $.getScript("{{ ldavis_url }}", function(){
 55 |        new LDAvis("#" + {{ visid }}, {{ visid_raw }}_data);
 56 |     });
 57 |   });
 58 | }
 59 | </script>
 60 | """)
 61 | 
 62 | 
 63 | # General HTML template.  This should work correctly whether or not requirejs
 64 | # is defined, and whether it's embedded in a notebook or in a standalone
 65 | # HTML page.
 66 | GENERAL_HTML = jinja2.Template("""
 67 | <link rel="stylesheet" type="text/css" href="{{ ldavis_css_url }}">
 68 | 
 69 | 
 70 | <div id={{ visid }} style="background-color:white;"></div>
 71 | <script type="text/javascript">
 72 | 
 73 | var {{ visid_raw }}_data = {{ vis_json }};
 74 | 
 75 | function LDAvis_load_lib(url, callback){
 76 |   var s = document.createElement('script');
 77 |   s.src = url;
 78 |   s.async = true;
 79 |   s.onreadystatechange = s.onload = callback;
 80 |   s.onerror = function(){console.warn("failed to load library " + url);};
 81 |   document.getElementsByTagName("head")[0].appendChild(s);
 82 | }
 83 | 
 84 | if(typeof(LDAvis) !== "undefined"){
 85 |    // already loaded: just create the visualization
 86 |    !function(LDAvis){
 87 |        new LDAvis("#" + {{ visid }}, {{ visid_raw }}_data);
 88 |    }(LDAvis);
 89 | }else if(typeof define === "function" && define.amd){
 90 |    // require.js is available: use it to load d3/LDAvis
 91 |    require.config({paths: {d3: "{{ d3_url[:-3] }}"}});
 92 |    require(["d3"], function(d3){
 93 |       window.d3 = d3;
 94 |       LDAvis_load_lib("{{ ldavis_url }}", function(){
 95 |         new LDAvis("#" + {{ visid }}, {{ visid_raw }}_data);
 96 |       });
 97 |     });
 98 | }else{
 99 |     // require.js not available: dynamically load d3 & LDAvis
100 |     LDAvis_load_lib("{{ d3_url }}", function(){
101 |          LDAvis_load_lib("{{ ldavis_url }}", function(){
102 |                  new LDAvis("#" + {{ visid }}, {{ visid_raw }}_data);
103 |             })
104 |          });
105 | }
106 | </script>
107 | """)
108 | 
109 | TEMPLATE_DICT = {"simple": SIMPLE_HTML,
110 |                  "notebook": REQUIREJS_HTML,
111 |                  "general": GENERAL_HTML}
112 | 
113 | 
114 | def prepared_data_to_html(data, d3_url=None, ldavis_url=None, ldavis_css_url=None,
115 |                           template_type="general", visid=None, use_http=False):
116 |     """Output HTML with embedded visualization
117 | 
118 |     Parameters
119 |     ----------
120 |     data : PreparedData, created using :func:`prepare`
121 |         The data for the visualization.
122 |     d3_url : string (optional)
123 |         The URL of the d3 library.  If not specified, a standard web path
124 |         will be used.
125 |     ldavis_url : string (optional)
126 |         The URL of the LDAvis library.  If not specified, a standard web path
127 |         will be used.
128 |     template_type : string
129 |         string specifying the type of HTML template to use. Options are:
130 | 
131 |         ``"simple"``
132 |              suitable for a simple html page with one visualization.  Will
133 |              fail if require.js is available on the page.
134 |         ``"notebook"``
135 |              assumes require.js and jquery are available.
136 |         ``"general"``
137 |              more complicated, but works both in and out of the
138 |              notebook, whether or not require.js and jquery are available
139 |     visid : string (optional)
140 |         The html/css id of the visualization div, which must not contain spaces.
141 |         If not specified, a random id will be generated.
142 |     use_http : boolean (optional)
143 |         If true, use http:// instead of https:// for d3_url and ldavis_url.
144 | 
145 |     Returns
146 |     -------
147 |     vis_html : string
148 |         the HTML visualization
149 | 
150 |     See Also
151 |     --------
152 |     :func:`save_json`: save json representation of visualization to file
153 |     :func:`save_html` : save html representation of a visualization to file
154 |     :func:`show` : launch a local server and show a visualization in a browser
155 |     :func:`display` : embed visualization within the IPython notebook
156 |     :func:`enable_notebook` : automatically embed visualizations in IPython notebook
157 |     """
158 |     template = TEMPLATE_DICT[template_type]
159 | 
160 |     d3_url = d3_url or urls.D3_URL
161 |     ldavis_url = ldavis_url or urls.LDAVIS_URL
162 |     ldavis_css_url = ldavis_css_url or urls.LDAVIS_CSS_URL
163 | 
164 |     if use_http:
165 |         d3_url = d3_url.replace('https://', 'http://')
166 |         ldavis_url = ldavis_url.replace('https://', 'http://')
167 | 
168 |     if visid is None:
169 |         visid = 'ldavis_' + get_id(data) + str(int(random.random() * 1E10))
170 |     elif re.search(r'\s', visid):
171 |         raise ValueError("visid must not contain spaces")
172 | 
173 |     return template.render(visid=json.dumps(visid),
174 |                            visid_raw=visid,
175 |                            d3_url=d3_url,
176 |                            ldavis_url=ldavis_url,
177 |                            vis_json=data.to_json(),
178 |                            ldavis_css_url=ldavis_css_url)
179 | 
180 | 
181 | def display(data, local=False, **kwargs):
182 |     """Display visualization in IPython notebook via the HTML display hook
183 | 
184 |     Parameters
185 |     ----------
186 |     data : PreparedData, created using :func:`prepare`
187 |         The data for the visualization.
188 |     local : boolean (optional, default=False)
189 |         if True, then copy the d3 & mpld3 libraries to a location visible to
190 |         the notebook server, and source them from there. See Notes below.
191 |     **kwargs :
192 |         additional keyword arguments are passed through to :func:`prepared_data_to_html`.
193 | 
194 |     Returns
195 |     -------
196 |     vis_d3 : IPython.display.HTML object
197 |         the IPython HTML rich display of the visualization.
198 | 
199 |     Notes
200 |     -----
201 |     Known issues: using ``local=True`` may not work correctly in certain cases:
202 | 
203 |     - In IPython < 2.0, ``local=True`` may fail if the current working
204 |       directory is changed within the notebook (e.g. with the %cd command).
205 |     - In IPython 2.0+, ``local=True`` may fail if a url prefix is added
206 |       (e.g. by setting NotebookApp.base_url).
207 | 
208 |     See Also
209 |     --------
210 |     :func:`show` : launch a local server and show a visualization in a browser
211 |     :func:`enable_notebook` : automatically embed visualizations in IPython notebook
212 |     """
213 |     # import here, in case users don't have requirements installed
214 |     from IPython.display import HTML
215 | 
216 |     if local:
217 |         if 'ldavis_url' in kwargs or 'd3_url' in kwargs:
218 |             warnings.warn(
219 |                 "display: specified urls are ignored when local=True")
220 |         kwargs['d3_url'], kwargs['ldavis_url'], kwargs['ldavis_css_url'] = write_ipynb_local_js()
221 | 
222 |     return HTML(prepared_data_to_html(data, **kwargs))
223 | 
224 | 
225 | def show(data, ip='127.0.0.1', port=8888, n_retries=50,
226 |          local=True, open_browser=True, http_server=None, **kwargs):
227 |     """Starts a local webserver and opens the visualization in a browser.
228 | 
229 |     Parameters
230 |     ----------
231 |     data : PreparedData, created using :func:`prepare`
232 |         The data for the visualization.
233 |     ip : string, default = '127.0.0.1'
234 |         the ip address used for the local server
235 |     port : int, default = 8888
236 |         the port number to use for the local server.  If already in use,
237 |         a nearby open port will be found (see n_retries)
238 |     n_retries : int, default = 50
239 |         the maximum number of ports to try when locating an empty port.
240 |     local : bool, default = True
241 |         if True, use the local d3 & LDAvis javascript versions, within the
242 |         js/ folder.  If False, use the standard urls.
243 |     open_browser : bool (optional)
244 |         if True (default), then open a web browser to the given HTML
245 |     http_server : class (optional)
246 |         optionally specify an HTTPServer class to use for showing the
247 |         visualization. The default is Python's basic HTTPServer.
248 |     **kwargs :
249 |         additional keyword arguments are passed through to :func:`prepared_data_to_html`
250 | 
251 |     See Also
252 |     --------
253 |     :func:`display` : embed visualization within the IPython notebook
254 |     :func:`enable_notebook` : automatically embed visualizations in IPython notebook
255 |     """
256 |     files = None
257 |     if local:
258 |         kwargs['ldavis_url'] = urls.LDAVIS_URL
259 |         kwargs['d3_url'] = urls.D3_URL
260 |         kwargs['ldavis_css_url'] = urls.LDAVIS_CSS_URL
261 |         files = {'/LDAvis.js': ["text/javascript", open(urls.LDAVIS_LOCAL, 'r').read()],
262 |                  '/LDAvis.css': ["text/css", open(urls.LDAVIS_CSS_URL, 'r').read()],
263 |                  '/d3.js': ["text/javascript", open(urls.D3_URL, 'r').read()]}
264 |     html = prepared_data_to_html(data, **kwargs)
265 |     serve(html, ip=ip, port=port, n_retries=n_retries, files=files,
266 |           open_browser=open_browser, http_server=http_server)
267 | 
268 | 
269 | def enable_notebook(local=False, **kwargs):
270 |     """Enable the automatic display of visualizations in the IPython Notebook.
271 | 
272 |     Parameters
273 |     ----------
274 |     local : boolean (optional, default=False)
275 |         if True, then copy the d3 & LDAvis libraries to a location visible to
276 |         the notebook server, and source them from there. See Notes below.
277 |     **kwargs :
278 |         all keyword parameters are passed through to :func:`prepared_data_to_html`
279 | 
280 |     Notes
281 |     -----
282 |     Known issues: using ``local=True`` may not work correctly in certain cases:
283 | 
284 |     - In IPython < 2.0, ``local=True`` may fail if the current working
285 |       directory is changed within the notebook (e.g. with the %cd command).
286 |     - In IPython 2.0+, ``local=True`` may fail if a url prefix is added
287 |       (e.g. by setting NotebookApp.base_url).
288 | 
289 |     See Also
290 |     --------
291 |     :func:`disable_notebook` : undo the action of enable_notebook
292 |     :func:`display` : embed visualization within the IPython notebook
293 |     :func:`show` : launch a local server and show a visualization in a browser
294 |     """
295 |     try:
296 |         from IPython.core.getipython import get_ipython
297 |     except ImportError:
298 |         raise ImportError('This feature requires IPython 1.0+')
299 | 
300 |     if local:
301 |         if 'ldavis_url' in kwargs or 'd3_url' in kwargs:
302 |             warnings.warn("enable_notebook: specified urls are ignored when local=True")
303 |         kwargs['d3_url'], kwargs['ldavis_url'], kwargs['ldavis_css_url'] = write_ipynb_local_js()
304 | 
305 |     ip = get_ipython()
306 |     formatter = ip.display_formatter.formatters['text/html']
307 |     formatter.for_type(PreparedData,
308 |                        lambda data, kwds=kwargs: prepared_data_to_html(data, **kwds))
309 | 
310 | 
311 | def disable_notebook():
312 |     """Disable the automatic display of visualizations in the IPython Notebook.
313 | 
314 |     See Also
315 |     --------
316 |     :func:`enable_notebook` : automatically embed visualizations in IPython notebook
317 |     """
318 |     try:
319 |         from IPython.core.getipython import get_ipython
320 |     except ImportError:
321 |         raise ImportError('This feature requires IPython 1.0+')
322 |     ip = get_ipython()
323 |     formatter = ip.display_formatter.formatters['text/html']
324 |     formatter.type_printers.pop(PreparedData, None)
325 | 
326 | 
327 | def save_html(data, fileobj, **kwargs):
328 |     """Save an embedded visualization to file.
329 | 
330 |     This will produce a self-contained HTML file. Internet access is still required
331 |     for the D3 and LDAvis libraries.
332 | 
333 |     Parameters
334 |     ----------
335 |     data : PreparedData, created using :func:`prepare`
336 |         The data for the visualization.
337 |     fileobj : filename or file object
338 |         The filename or file-like object in which to write the HTML
339 |         representation of the visualization.
340 |     **kwargs :
341 |         additional keyword arguments will be passed to :func:`prepared_data_to_html`
342 | 
343 |     See Also
344 |     --------
345 |     :func:`save_json`: save json representation of a visualization to file
346 |     :func:`prepared_data_to_html` : output html representation of the visualization
347 |     :func:`fig_to_dict` : output dictionary representation of the visualization
348 |     """
349 |     try:
350 |         if isinstance(fileobj, basestring):
351 |             fileobj = open(fileobj, 'w')
352 |     except NameError:
353 |         if isinstance(fileobj, str):
354 |             fileobj = open(fileobj, 'w')
355 |     if not hasattr(fileobj, 'write'):
356 |         raise ValueError("fileobj should be a filename or a writable file")
357 |     fileobj.write(prepared_data_to_html(data, **kwargs))
358 | 
359 | 
360 | def save_json(data, fileobj):
361 |     """Save the visualization's data a json file.
362 | 
363 |     Parameters
364 |     ----------
365 |     data : PreparedData, created using :func:`prepare`
366 |         The data for the visualization.
367 |     fileobj : filename or file object
368 |         The filename or file-like object in which to write the HTML
369 |         representation of the visualization.
370 | 
371 |     See Also
372 |     --------
373 |     :func:`save_html` : save html representation of a visualization to file
374 |     :func:`prepared_data_to_html` : output html representation of the visualization
375 |     """
376 |     try:
377 |         if isinstance(fileobj, basestring):
378 |             fileobj = open(fileobj, 'w')
379 |     except NameError:
380 |         if isinstance(fileobj, str):
381 |             fileobj = open(fileobj, 'w')
382 |     if not hasattr(fileobj, 'write'):
383 |         raise ValueError("fileobj should be a filename or a writable file")
384 |     json.dump(data.to_dict(), fileobj, cls=NumPyEncoder)
385 | 


--------------------------------------------------------------------------------
/pyLDAvis/_prepare.py:
--------------------------------------------------------------------------------
  1 | """
  2 | pyLDAvis Prepare
  3 | ===============
  4 | Main transformation functions for preparing LDAdata to the visualization's data structures
  5 | """
  6 | import json
  7 | import logging
  8 | import numpy as np
  9 | import pandas as pd
 10 | from collections import namedtuple
 11 | from joblib import Parallel, delayed, cpu_count
 12 | from scipy.stats import entropy
 13 | from scipy.spatial.distance import pdist, squareform
 14 | from sklearn.manifold import MDS, TSNE
 15 | 
 16 | from pyLDAvis.utils import NumPyEncoder
 17 | 
 18 | 
 19 | def __num_dist_rows__(array, ndigits=2):
 20 |     return array.shape[0] - int((pd.DataFrame(array).sum(axis=1) < 0.999).sum())
 21 | 
 22 | 
 23 | class ValidationError(ValueError):
 24 |     pass
 25 | 
 26 | 
 27 | def _input_check(topic_term_dists, doc_topic_dists, doc_lengths, vocab, term_frequency):
 28 |     ttds = topic_term_dists.shape
 29 |     dtds = doc_topic_dists.shape
 30 |     errors = []
 31 | 
 32 |     def err(msg):
 33 |         errors.append(msg)
 34 | 
 35 |     if dtds[1] != ttds[0]:
 36 |         err_msg = ('Number of rows of topic_term_dists does not match number of columns of '
 37 |                    'doc_topic_dists; both should be equal to the number of topics in the model.')
 38 |         err(err_msg)
 39 | 
 40 |     if len(doc_lengths) != dtds[0]:
 41 |         err_msg = ('Length of doc_lengths not equal to the number of rows in doc_topic_dists;'
 42 |                    'both should be equal to the number of documents in the data.')
 43 |         err(err_msg)
 44 | 
 45 |     W = len(vocab)
 46 |     if ttds[1] != W:
 47 |         err_msg = ('Number of terms in vocabulary does not match the number of columns of '
 48 |                    'topic_term_dists (where each row of topic_term_dists is a probability '
 49 |                    'distribution of terms for a given topic)')
 50 |         err(err_msg)
 51 |     if len(term_frequency) != W:
 52 |         err_msg = ('Length of term_frequency not equal to the number of terms in the '
 53 |                    'number of terms in the vocabulary (len of vocab)')
 54 |         err(err_msg)
 55 | 
 56 |     if __num_dist_rows__(topic_term_dists) != ttds[0]:
 57 |         err('Not all rows (distributions) in topic_term_dists sum to 1.')
 58 | 
 59 |     if __num_dist_rows__(doc_topic_dists) != dtds[0]:
 60 |         err('Not all rows (distributions) in doc_topic_dists sum to 1.')
 61 | 
 62 |     if len(errors) > 0:
 63 |         return errors
 64 | 
 65 | 
 66 | def _input_validate(*args):
 67 |     res = _input_check(*args)
 68 |     if res:
 69 |         raise ValidationError('\n' + '\n'.join([' * ' + s for s in res]))
 70 | 
 71 | 
 72 | def _jensen_shannon(_P, _Q):
 73 |     _M = 0.5 * (_P + _Q)
 74 |     return 0.5 * (entropy(_P, _M) + entropy(_Q, _M))
 75 | 
 76 | 
 77 | def _pcoa(pair_dists, n_components=2):
 78 |     """Principal Coordinate Analysis,
 79 |     aka Classical Multidimensional Scaling
 80 |     """
 81 |     # code referenced from skbio.stats.ordination.pcoa
 82 |     # https://github.com/biocore/scikit-bio/blob/0.5.0/skbio/stats/ordination/_principal_coordinate_analysis.py
 83 | 
 84 |     # pairwise distance matrix is assumed symmetric
 85 |     pair_dists = np.asarray(pair_dists, np.float64)
 86 | 
 87 |     # perform SVD on double centred distance matrix
 88 |     n = pair_dists.shape[0]
 89 |     H = np.eye(n) - np.ones((n, n)) / n
 90 |     B = - H.dot(pair_dists ** 2).dot(H) / 2
 91 |     eigvals, eigvecs = np.linalg.eig(B)
 92 | 
 93 |     # Take first n_components of eigenvalues and eigenvectors
 94 |     # sorted in decreasing order
 95 |     ix = eigvals.argsort()[::-1][:n_components]
 96 |     eigvals = eigvals[ix]
 97 |     eigvecs = eigvecs[:, ix]
 98 | 
 99 |     # replace any remaining negative eigenvalues and associated eigenvectors with zeroes
100 |     # at least 1 eigenvalue must be zero
101 |     eigvals[np.isclose(eigvals, 0)] = 0
102 |     if np.any(eigvals < 0):
103 |         ix_neg = eigvals < 0
104 |         eigvals[ix_neg] = np.zeros(eigvals[ix_neg].shape)
105 |         eigvecs[:, ix_neg] = np.zeros(eigvecs[:, ix_neg].shape)
106 | 
107 |     return np.sqrt(eigvals) * eigvecs
108 | 
109 | 
110 | def js_PCoA(distributions):
111 |     """Dimension reduction via Jensen-Shannon Divergence & Principal Coordinate Analysis
112 |     (aka Classical Multidimensional Scaling)
113 | 
114 |     Parameters
115 |     ----------
116 |     distributions : array-like, shape (`n_dists`, `k`)
117 |         Matrix of distributions probabilities.
118 | 
119 |     Returns
120 |     -------
121 |     pcoa : array, shape (`n_dists`, 2)
122 |     """
123 |     dist_matrix = squareform(pdist(distributions, metric=_jensen_shannon))
124 |     return _pcoa(dist_matrix)
125 | 
126 | 
127 | def js_MMDS(distributions, **kwargs):
128 |     """Dimension reduction via Jensen-Shannon Divergence & Metric Multidimensional Scaling
129 | 
130 |     Parameters
131 |     ----------
132 |     distributions : array-like, shape (`n_dists`, `k`)
133 |         Matrix of distributions probabilities.
134 | 
135 |     **kwargs : Keyword argument to be passed to `sklearn.manifold.MDS()`
136 | 
137 |     Returns
138 |     -------
139 |     mmds : array, shape (`n_dists`, 2)
140 |     """
141 |     dist_matrix = squareform(pdist(distributions, metric=_jensen_shannon))
142 |     model = MDS(n_components=2, random_state=0, dissimilarity='precomputed', **kwargs)
143 |     return model.fit_transform(dist_matrix)
144 | 
145 | 
146 | def js_TSNE(distributions, **kwargs):
147 |     """Dimension reduction via Jensen-Shannon Divergence & t-distributed Stochastic Neighbor Embedding
148 | 
149 |     Parameters
150 |     ----------
151 |     distributions : array-like, shape (`n_dists`, `k`)
152 |         Matrix of distributions probabilities.
153 | 
154 |     **kwargs : Keyword argument to be passed to `sklearn.manifold.TSNE()`
155 | 
156 |     Returns
157 |     -------
158 |     tsne : array, shape (`n_dists`, 2)
159 |     """
160 |     dist_matrix = squareform(pdist(distributions, metric=_jensen_shannon))
161 |     model = TSNE(n_components=2, random_state=0, metric='precomputed', init='random',
162 |                  perplexity=min(len(dist_matrix) - 1, 30), **kwargs)
163 |     return model.fit_transform(dist_matrix)
164 | 
165 | 
166 | def _df_with_names(data, index_name, columns_name):
167 |     if type(data) == pd.DataFrame:
168 |         # we want our index to be numbered
169 |         df = pd.DataFrame(data.values)
170 |     else:
171 |         df = pd.DataFrame(data)
172 |     df.index.name = index_name
173 |     df.columns.name = columns_name
174 |     return df
175 | 
176 | 
177 | def _series_with_name(data, name):
178 |     if type(data) == pd.Series:
179 |         data.name = name
180 |         # ensures a numeric index
181 |         return data.reset_index()[name]
182 |     else:
183 |         return pd.Series(data, name=name)
184 | 
185 | 
186 | def _topic_coordinates(mds, topic_term_dists, topic_proportion, start_index=1):
187 |     K = topic_term_dists.shape[0]
188 |     mds_res = mds(topic_term_dists)
189 |     assert mds_res.shape == (K, 2)
190 |     mds_df = pd.DataFrame({'x': mds_res[:, 0], 'y': mds_res[:, 1],
191 |                            'topics': range(start_index, K + start_index),
192 |                            'cluster': 1, 'Freq': topic_proportion * 100})
193 |     # note: cluster (should?) be deprecated soon. See: https://github.com/cpsievert/LDAvis/issues/26
194 |     return mds_df
195 | 
196 | 
197 | def _chunks(lambda_seq, n):
198 |     """ Yield successive n-sized chunks from lambda_seq.
199 |     """
200 |     for i in range(0, len(lambda_seq), n):
201 |         yield lambda_seq[i:i + n]
202 | 
203 | 
204 | def _job_chunks(lambda_seq, n_jobs):
205 |     n_chunks = n_jobs
206 |     if n_jobs < 0:
207 |         # so, have n chunks if we are using all n cores/cpus
208 |         n_chunks = cpu_count() + 1 - n_jobs
209 | 
210 |     return _chunks(lambda_seq, n_chunks)
211 | 
212 | 
213 | def _find_relevance(log_ttd, log_lift, R, lambda_):
214 |     relevance = lambda_ * log_ttd + (1 - lambda_) * log_lift
215 |     return relevance.T.apply(lambda topic: topic.nlargest(R).index)
216 | 
217 | 
218 | def _find_relevance_chunks(log_ttd, log_lift, R, lambda_seq):
219 |     return pd.concat([_find_relevance(log_ttd, log_lift, R, seq) for seq in lambda_seq])
220 | 
221 | 
222 | def _topic_info(topic_term_dists, topic_proportion, term_frequency, term_topic_freq,
223 |                 vocab, lambda_step, R, n_jobs, start_index=1):
224 |     # marginal distribution over terms (width of blue bars)
225 |     term_proportion = term_frequency / term_frequency.sum()
226 | 
227 |     # compute the distinctiveness and saliency of the terms:
228 |     # this determines the R terms that are displayed when no topic is selected.
229 |     # TODO(msusol): Make flake8 test pass here with 'unused' variables.
230 |     tt_sum = topic_term_dists.sum()
231 |     topic_given_term = pd.eval("topic_term_dists / tt_sum")
232 |     log_1 = np.log(pd.eval("topic_given_term.T / topic_proportion"))
233 |     kernel = pd.eval("topic_given_term * log_1.T")
234 |     distinctiveness = kernel.sum()
235 |     saliency = term_proportion * distinctiveness
236 |     # Order the terms for the "default" view by decreasing saliency:
237 |     default_term_info = pd.DataFrame({
238 |         'saliency': saliency,
239 |         'Term': vocab,
240 |         'Freq': term_frequency,
241 |         'Total': term_frequency,
242 |         'Category': 'Default'})
243 |     default_term_info = default_term_info.sort_values(
244 |         by='saliency', ascending=False).head(R).drop('saliency', axis=1)
245 |     # Rounding Freq and Total to integer values to match LDAvis code:
246 |     default_term_info['Freq'] = np.floor(default_term_info['Freq'])
247 |     default_term_info['Total'] = np.floor(default_term_info['Total'])
248 |     ranks = np.arange(R, 0, -1)
249 |     default_term_info['logprob'] = default_term_info['loglift'] = ranks
250 |     default_term_info = default_term_info.reindex(columns=[
251 |         "Term", "Freq", "Total", "Category", "logprob", "loglift"
252 |     ])
253 | 
254 |     # compute relevance and top terms for each topic
255 |     log_lift = np.log(pd.eval("topic_term_dists / term_proportion")).astype("float64")
256 |     log_ttd = np.log(pd.eval("topic_term_dists")).astype("float64")
257 |     lambda_seq = np.arange(0, 1 + lambda_step, lambda_step)
258 | 
259 |     def topic_top_term_df(tup):
260 |         new_topic_id, (original_topic_id, topic_terms) = tup
261 |         term_ix = topic_terms.unique()
262 |         df = pd.DataFrame({'Term': vocab[term_ix],
263 |                            'Freq': term_topic_freq.loc[original_topic_id, term_ix],
264 |                            'Total': term_frequency[term_ix],
265 |                            'Category': 'Topic%d' % new_topic_id,
266 |                            'logprob': log_ttd.loc[original_topic_id, term_ix].round(4),
267 |                            'loglift': log_lift.loc[original_topic_id, term_ix].round(4),
268 |                            })
269 |         return df.reindex(columns=[
270 |             "Term", "Freq", "Total", "Category", "logprob", "loglift"
271 |         ])
272 | 
273 |     top_terms = pd.concat(Parallel(n_jobs=n_jobs)
274 |                           (delayed(_find_relevance_chunks)(log_ttd, log_lift, R, ls)
275 |                           for ls in _job_chunks(lambda_seq, n_jobs)))
276 |     topic_dfs = map(topic_top_term_df, enumerate(top_terms.T.iterrows(), start_index))
277 |     return pd.concat([default_term_info] + list(topic_dfs))
278 | 
279 | 
280 | def _token_table(topic_info, term_topic_freq, vocab, term_frequency, start_index=1):
281 |     # last, to compute the areas of the circles when a term is highlighted
282 |     # we must gather all unique terms that could show up (for every combination
283 |     # of topic and value of lambda) and compute its distribution over topics.
284 | 
285 |     # term-topic frequency table of unique terms across all topics and all values of lambda
286 |     term_ix = topic_info.index.unique()
287 |     term_ix = np.sort(term_ix)
288 | 
289 |     top_topic_terms_freq = term_topic_freq[term_ix]
290 |     # use the new ordering for the topics
291 |     K = len(term_topic_freq)
292 |     top_topic_terms_freq.index = range(start_index, K + start_index)
293 |     top_topic_terms_freq.index.name = 'Topic'
294 | 
295 |     # we filter to Freq >= 0.5 to avoid sending too much data to the browser
296 |     token_table = pd.DataFrame({'Freq': top_topic_terms_freq.unstack()})\
297 |         .reset_index().set_index('term').query('Freq >= 0.5')
298 | 
299 |     token_table['Freq'] = token_table['Freq'].round()
300 |     token_table['Term'] = vocab[token_table.index.values].values
301 |     # Normalize token frequencies:
302 |     token_table['Freq'] = token_table.Freq / term_frequency[token_table.index]
303 |     return token_table.sort_values(by=['Term', 'Topic'])
304 | 
305 | 
306 | def prepare(topic_term_dists, doc_topic_dists, doc_lengths, vocab, term_frequency,
307 |             R=30, lambda_step=0.01, mds=js_PCoA, n_jobs=-1,
308 |             plot_opts=None, sort_topics=True, start_index=1):
309 |     """Transforms the topic model distributions and related corpus data into
310 |     the data structures needed for the visualization.
311 | 
312 |     Parameters
313 |     ----------
314 |     topic_term_dists : array-like, shape (`n_topics`, `n_terms`)
315 |         Matrix of topic-term probabilities. Where `n_terms` is `len(vocab)`.
316 |     doc_topic_dists : array-like, shape (`n_docs`, `n_topics`)
317 |         Matrix of document-topic probabilities.
318 |     doc_lengths : array-like, shape `n_docs`
319 |         The length of each document, i.e. the number of words in each document.
320 |         The order of the numbers should be consistent with the ordering of the
321 |         docs in `doc_topic_dists`.
322 |     vocab : array-like, shape `n_terms`
323 |         List of all the words in the corpus used to train the model.
324 |     term_frequency : array-like, shape `n_terms`
325 |         The count of each particular term over the entire corpus. The ordering
326 |         of these counts should correspond with `vocab` and `topic_term_dists`.
327 |     R : int
328 |         The number of terms to display in the barcharts of the visualization.
329 |         Default is 30. Recommended to be roughly between 10 and 50.
330 |     lambda_step : float, between 0 and 1
331 |         Determines the interstep distance in the grid of lambda values over
332 |         which to iterate when computing relevance.
333 |         Default is 0.01. Recommended to be between 0.01 and 0.1.
334 |     mds : function or a string representation of function
335 |         A function that takes `topic_term_dists` as an input and outputs a
336 |         `n_topics` by `2`  distance matrix. The output approximates the distance
337 |         between topics. See :func:`js_PCoA` for details on the default function.
338 |         A string representation currently accepts `pcoa` (or upper case variant),
339 |         `mmds` (or upper case variant) and `tsne` (or upper case variant),
340 |         if `sklearn` package is installed for the latter two.
341 |     n_jobs : int
342 |         The number of cores to be used to do the computations. The regular
343 |         joblib conventions are followed so `-1`, which is the default, will
344 |         use all cores.
345 |     plot_opts : dict, with keys 'xlab' and `ylab`
346 |         Dictionary of plotting options, right now only used for the axis labels.
347 |     sort_topics : sort topics by topic proportion (percentage of tokens covered). Set to false to
348 |         to keep original topic order.
349 |     start_index: how to number topics for prepared data. Defaults to one-based indexing.
350 |         Set to 0 for zero-based indexing.
351 | 
352 |     Returns
353 |     -------
354 |     prepared_data : PreparedData
355 |         A named tuple containing all the data structures required to create
356 |         the visualization. To be passed on to functions like :func:`display`.
357 |         This named tuple can be represented as json or a python dictionary.
358 |         There is a helper function 'sorted_terms' that can be used to get
359 |         the terms of a topic using lambda to rank their relevance.
360 | 
361 | 
362 |     Notes
363 |     -----
364 |     This implements the method of `Sievert, C. and Shirley, K. (2014):
365 |     LDAvis: A Method for Visualizing and Interpreting Topics, ACL Workshop on
366 |     Interactive Language Learning, Visualization, and Interfaces.`
367 | 
368 |     http://nlp.stanford.edu/events/illvi2014/papers/sievert-illvi2014.pdf
369 | 
370 |     See Also
371 |     --------
372 |     :func:`save_json`: save json representation of a figure to file
373 |     :func:`save_html` : save html representation of a figure to file
374 |     :func:`show` : launch a local server and show a figure in a browser
375 |     :func:`display` : embed figure within the IPython notebook
376 |     :func:`enable_notebook` : automatically embed visualizations in IPython notebook
377 |    """
378 |     if plot_opts is None:
379 |         plot_opts = {'xlab': 'PC1', 'ylab': 'PC2'}
380 | 
381 |     # parse mds
382 |     if isinstance(mds, str):
383 |         mds = mds.lower()
384 |         if mds == 'pcoa':
385 |             mds = js_PCoA
386 |         elif mds in ('mmds', 'tsne'):
387 |             mds_opts = {'mmds': js_MMDS, 'tsne': js_TSNE}
388 |             mds = mds_opts[mds]
389 |         else:
390 |             logging.warning('Unknown mds `%s`, switch to PCoA' % mds)
391 |             mds = js_PCoA
392 | 
393 |     # Conceptually, the items in `topic_term_dists` end up as individual rows in the
394 |     # DataFrame, but we can speed up ingestion by treating them as columns and
395 |     # transposing at the end. (This is especially true when the number of terms far
396 |     # exceeds the number of topics.)
397 |     topic_term_dist_cols = [
398 |         pd.Series(topic_term_dist, dtype="float64")
399 |         for topic_term_dist in topic_term_dists
400 |     ]
401 |     topic_term_dists = pd.concat(topic_term_dist_cols, axis=1).T
402 | 
403 |     topic_term_dists = _df_with_names(topic_term_dists, 'topic', 'term')
404 |     doc_topic_dists = _df_with_names(doc_topic_dists, 'doc', 'topic')
405 |     term_frequency = _series_with_name(term_frequency, 'term_frequency')
406 |     doc_lengths = _series_with_name(doc_lengths, 'doc_length')
407 |     vocab = _series_with_name(vocab, 'vocab')
408 |     _input_validate(topic_term_dists, doc_topic_dists, doc_lengths, vocab, term_frequency)
409 |     R = min(R, len(vocab))
410 | 
411 |     topic_freq = doc_topic_dists.mul(doc_lengths, axis="index").sum()
412 |     # topic_freq       = np.dot(doc_topic_dists.T, doc_lengths)
413 |     if (sort_topics):
414 |         topic_proportion = (topic_freq / topic_freq.sum()).sort_values(ascending=False)
415 |     else:
416 |         topic_proportion = (topic_freq / topic_freq.sum())
417 | 
418 |     topic_order = topic_proportion.index
419 |     # reorder all data based on new ordering of topics
420 |     topic_freq = topic_freq[topic_order]
421 |     topic_term_dists = topic_term_dists.iloc[topic_order]
422 |     # Unused: doc_topic_dists = doc_topic_dists[topic_order]
423 | 
424 |     # token counts for each term-topic combination (widths of red bars)
425 |     term_topic_freq = (topic_term_dists.T * topic_freq).T
426 |     # Quick fix for red bar width bug.  We calculate the
427 |     # term frequencies internally, using the topic term distributions and the
428 |     # topic frequencies, rather than using the user-supplied term frequencies.
429 |     # For a detailed discussion, see: https://github.com/cpsievert/LDAvis/pull/41
430 |     term_frequency = np.sum(term_topic_freq, axis=0)
431 | 
432 |     topic_info = _topic_info(topic_term_dists, topic_proportion,
433 |                              term_frequency, term_topic_freq, vocab, lambda_step, R,
434 |                              n_jobs, start_index)
435 |     token_table = _token_table(topic_info, term_topic_freq, vocab, term_frequency, start_index)
436 |     topic_coordinates = _topic_coordinates(mds, topic_term_dists, topic_proportion, start_index)
437 |     client_topic_order = [x + start_index for x in topic_order]
438 | 
439 |     return PreparedData(topic_coordinates, topic_info,
440 |                         token_table, R, lambda_step, plot_opts, client_topic_order)
441 | 
442 | 
443 | class PreparedData(namedtuple('PreparedData', ['topic_coordinates', 'topic_info', 'token_table',
444 |                                                'R', 'lambda_step', 'plot_opts', 'topic_order'])):
445 | 
446 |     def sorted_terms(self, topic=1, _lambda=1):
447 |         """Returns a dataframe using _lambda to calculate term relevance of a given topic."""
448 |         tdf = pd.DataFrame(self.topic_info[self.topic_info.Category == 'Topic' + str(topic)])
449 |         if _lambda < 0 or _lambda > 1:
450 |             _lambda = 1
451 |         stdf = tdf.assign(relevance=_lambda * tdf['logprob'] + (1 - _lambda) * tdf['loglift'])
452 |         return stdf.sort_values('relevance', ascending=False)
453 | 
454 |     def to_dict(self):
455 |         return {'mdsDat': self.topic_coordinates.to_dict(orient='list'),
456 |                 'tinfo': self.topic_info.to_dict(orient='list'),
457 |                 'token.table': self.token_table.to_dict(orient='list'),
458 |                 'R': self.R,
459 |                 'lambda.step': self.lambda_step,
460 |                 'plot.opts': self.plot_opts,
461 |                 'topic.order': self.topic_order}
462 | 
463 |     def to_json(self):
464 |         return json.dumps(self.to_dict(), cls=NumPyEncoder)
465 | 


--------------------------------------------------------------------------------
/pyLDAvis/_server.py:
--------------------------------------------------------------------------------
  1 | # this file is largely based on https://github.com/jakevdp/mpld3/blob/master/mpld3/_server.py
  2 | # Copyright (c) 2013, Jake Vanderplas
  3 | """
  4 | A Simple server used to serve LDAvis visualizations
  5 | """
  6 | import sys
  7 | import threading
  8 | import webbrowser
  9 | import socket
 10 | import itertools
 11 | import random
 12 | from http import server
 13 | 
 14 | IPYTHON_WARNING = """
 15 | Note: if you're in the IPython notebook, pyLDAvis.show() is not the best command
 16 |       to use. Consider using pyLDAvis.display(), or pyLDAvis.enable_notebook().
 17 |       See more information at http://pyLDAvis.github.io/quickstart.html .
 18 | 
 19 | You must interrupt the kernel to end this command
 20 | """
 21 | 
 22 | 
 23 | def generate_handler(html, files=None):
 24 |     if files is None:
 25 |         files = {}
 26 | 
 27 |     class MyHandler(server.BaseHTTPRequestHandler):
 28 |         def do_GET(self):
 29 |             """Respond to a GET request."""
 30 |             if self.path == '/':
 31 |                 self.send_response(200)
 32 |                 self.send_header("Content-type", "text/html")
 33 |                 self.end_headers()
 34 |                 self.wfile.write("<html><head>"
 35 |                                  "<title>LDAvis</title>"
 36 |                                  "</head><body>\n".encode())
 37 |                 self.wfile.write(html.encode())
 38 |                 self.wfile.write("</body></html>".encode())
 39 |             elif self.path in files:
 40 |                 content_type, content = files[self.path]
 41 |                 self.send_response(200)
 42 |                 self.send_header("Content-type", content_type)
 43 |                 self.end_headers()
 44 |                 self.wfile.write(content.encode())
 45 |             else:
 46 |                 self.send_error(404)
 47 | 
 48 |     return MyHandler
 49 | 
 50 | 
 51 | def find_open_port(ip, port, n=50):
 52 |     """Find an open port near the specified port"""
 53 |     ports = itertools.chain((port + i for i in range(n)),
 54 |                             (port + random.randint(-2 * n, 2 * n)))
 55 | 
 56 |     for port in ports:
 57 |         s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
 58 |         result = s.connect_ex((ip, port))
 59 |         s.close()
 60 |         if result != 0:
 61 |             return port
 62 |     raise ValueError("no open ports found")
 63 | 
 64 | 
 65 | def serve(html, ip='127.0.0.1', port=8888, n_retries=50, files=None,
 66 |           ipython_warning=False, open_browser=True, http_server=None):
 67 |     """Start a server serving the given HTML, and (optionally) open a
 68 |     browser
 69 | 
 70 |     Parameters
 71 |     ----------
 72 |     html : string
 73 |         HTML to serve
 74 |     ip : string (default = '127.0.0.1')
 75 |         ip address at which the HTML will be served.
 76 |     port : int (default = 8888)
 77 |         the port at which to serve the HTML
 78 |     n_retries : int (default = 50)
 79 |         the number of nearby ports to search if the specified port is in use.
 80 |     files : dictionary (optional)
 81 |         dictionary of extra content to serve
 82 |     ipython_warning : bool (optional)
 83 |         if True (default), then print a warning if this is used within IPython
 84 |     open_browser : bool (optional)
 85 |         if True (default), then open a web browser to the given HTML
 86 |     http_server : class (optional)
 87 |         optionally specify an HTTPServer class to use for showing the
 88 |         figure. The default is Python's basic HTTPServer.
 89 |     """
 90 |     port = find_open_port(ip, port, n_retries)
 91 |     Handler = generate_handler(html, files)
 92 | 
 93 |     if http_server is None:
 94 |         srvr = server.HTTPServer((ip, port), Handler)
 95 |     else:
 96 |         srvr = http_server((ip, port), Handler)
 97 | 
 98 |     if ipython_warning:
 99 |         print(IPYTHON_WARNING)
100 | 
101 |     # Start the server
102 |     print("Serving to http://{0}:{1}/    [Ctrl-C to exit]".format(ip, port))
103 |     sys.stdout.flush()
104 | 
105 |     if open_browser:
106 |         # Use a thread to open a web browser pointing to the server
107 |         b = lambda: webbrowser.open('http://{0}:{1}'.format(ip, port))
108 |         threading.Thread(target=b).start()
109 | 
110 |     try:
111 |         srvr.serve_forever()
112 |     except (KeyboardInterrupt, SystemExit):
113 |         print("\nstopping Server...")
114 | 
115 |     srvr.server_close()
116 | 


--------------------------------------------------------------------------------
/pyLDAvis/gensim_models.py:
--------------------------------------------------------------------------------
  1 | """
  2 | pyLDAvis Gensim
  3 | ===============
  4 | Helper functions to visualize LDA models trained by Gensim
  5 | """
  6 | 
  7 | import funcy as fp
  8 | import numpy as np
  9 | from scipy.sparse import issparse
 10 | import pyLDAvis._prepare
 11 | 
 12 | 
 13 | def _extract_data(topic_model, corpus, dictionary, doc_topic_dists=None):
 14 |     import gensim
 15 | 
 16 |     if not gensim.matutils.ismatrix(corpus):
 17 |         corpus_csc = gensim.matutils.corpus2csc(corpus, num_terms=len(dictionary))
 18 |     else:
 19 |         corpus_csc = corpus
 20 |         # Need corpus to be a streaming gensim list corpus for len and inference functions below:
 21 |         corpus = gensim.matutils.Sparse2Corpus(corpus_csc)
 22 | 
 23 |     vocab = list(dictionary.token2id.keys())
 24 |     # TODO: add the hyperparam to smooth it out? no beta in online LDA impl.. hmm..
 25 |     # for now, I'll just make sure we don't ever get zeros...
 26 |     beta = 0.01
 27 |     fnames_argsort = np.asarray(list(dictionary.token2id.values()), dtype=np.int_)
 28 |     term_freqs = corpus_csc.sum(axis=1).A.ravel()[fnames_argsort]
 29 |     term_freqs[term_freqs == 0] = beta
 30 |     doc_lengths = corpus_csc.sum(axis=0).A.ravel()
 31 | 
 32 |     assert term_freqs.shape[0] == len(dictionary),\
 33 |         'Term frequencies and dictionary have different shape {} != {}'.format(
 34 |         term_freqs.shape[0], len(dictionary))
 35 |     assert doc_lengths.shape[0] == len(corpus),\
 36 |         'Document lengths and corpus have different sizes {} != {}'.format(
 37 |         doc_lengths.shape[0], len(corpus))
 38 | 
 39 |     if hasattr(topic_model, 'lda_alpha'):
 40 |         num_topics = len(topic_model.lda_alpha)
 41 |     else:
 42 |         num_topics = topic_model.num_topics
 43 | 
 44 |     if doc_topic_dists is None:
 45 |         # If its an HDP model.
 46 |         if hasattr(topic_model, 'lda_beta'):
 47 |             gamma = topic_model.inference(corpus)
 48 |         else:
 49 |             gamma, _ = topic_model.inference(corpus)
 50 |         doc_topic_dists = gamma / gamma.sum(axis=1)[:, None]
 51 |     else:
 52 |         if isinstance(doc_topic_dists, list):
 53 |             doc_topic_dists = gensim.matutils.corpus2dense(doc_topic_dists, num_topics).T
 54 |         elif issparse(doc_topic_dists):
 55 |             doc_topic_dists = doc_topic_dists.T.todense()
 56 |         doc_topic_dists = doc_topic_dists / doc_topic_dists.sum(axis=1)
 57 | 
 58 |     assert doc_topic_dists.shape[1] == num_topics,\
 59 |         'Document topics and number of topics do not match {} != {}'.format(
 60 |         doc_topic_dists.shape[1], num_topics)
 61 | 
 62 |     # get the topic-term distribution straight from gensim without
 63 |     # iterating over tuples
 64 |     if hasattr(topic_model, 'lda_beta'):
 65 |         topic = topic_model.lda_beta
 66 |     else:
 67 |         topic = topic_model.state.get_lambda()
 68 |     topic = topic / topic.sum(axis=1)[:, None]
 69 |     topic_term_dists = topic[:, fnames_argsort]
 70 | 
 71 |     assert topic_term_dists.shape[0] == doc_topic_dists.shape[1]
 72 | 
 73 |     return {'topic_term_dists': topic_term_dists, 'doc_topic_dists': doc_topic_dists,
 74 |             'doc_lengths': doc_lengths, 'vocab': vocab, 'term_frequency': term_freqs}
 75 | 
 76 | 
 77 | def prepare(topic_model, corpus, dictionary, doc_topic_dist=None, **kwargs):
 78 |     """Transforms the Gensim TopicModel and related corpus and dictionary into
 79 |     the data structures needed for the visualization.
 80 | 
 81 |     Parameters
 82 |     ----------
 83 |     topic_model : gensim.models.ldamodel.LdaModel
 84 |         An already trained Gensim LdaModel. The other gensim model types are
 85 |         not supported (PRs welcome).
 86 | 
 87 |     corpus : array-like list of bag of word docs in tuple form or scipy CSC matrix
 88 |         The corpus in bag of word form, the same docs used to train the model.
 89 |         The corpus is transformed into a csc matrix internally, if you intend to
 90 |         call prepare multiple times it is a good idea to first call
 91 |         `gensim.matutils.corpus2csc(corpus)` and pass in the csc matrix instead.
 92 | 
 93 |     For example: [(50, 3), (63, 5), ....]
 94 | 
 95 |     dictionary: gensim.corpora.Dictionary
 96 |         The dictionary object used to create the corpus. Needed to extract the
 97 |         actual terms (not ids).
 98 | 
 99 |     doc_topic_dist (optional): Document topic distribution from LDA (default=None)
100 |         The document topic distribution that is eventually visualised, if you will
101 |         be calling `prepare` multiple times it's a good idea to explicitly pass in
102 |         `doc_topic_dist` as inferring this for large corpora can be quite
103 |         expensive.
104 | 
105 |     **kwargs :
106 |         additional keyword arguments are passed through to :func:`pyldavis.prepare`.
107 | 
108 |     Returns
109 |     -------
110 |     prepared_data : PreparedData
111 |         the data structures used in the visualization
112 | 
113 |     Example
114 |     --------
115 |     For example usage please see this notebook:
116 |     http://nbviewer.ipython.org/github/bmabey/pyLDAvis/blob/master/notebooks/Gensim%20Newsgroup.ipynb
117 | 
118 |     See
119 |     ------
120 |     See `pyLDAvis.prepare` for **kwargs.
121 |     """
122 |     opts = fp.merge(_extract_data(topic_model, corpus, dictionary, doc_topic_dist), kwargs)
123 |     return pyLDAvis.prepare(**opts)
124 | 


--------------------------------------------------------------------------------
/pyLDAvis/graphlab.py:
--------------------------------------------------------------------------------
 1 | """
 2 | pyLDAvis GraphLab
 3 | ===============
 4 | Helper functions to visualize GraphLab Create's TopicModel (an implementation of LDA)
 5 | """
 6 | 
 7 | import funcy as fp
 8 | import numpy as np
 9 | import pandas as pd
10 | import graphlab as gl
11 | import pyLDAvis
12 | 
13 | 
14 | def _topics_as_df(topic_model):
15 |     tdf = topic_model['topics'].to_dataframe()
16 |     return pd.DataFrame(np.vstack(tdf['topic_probabilities'].values), index=tdf['vocabulary'])
17 | 
18 | 
19 | def _sum_sarray_dicts(sarray):
20 |     counts_sf = gl.SFrame({
21 |         'count_dicts': sarray}).stack('count_dicts').groupby(
22 |         key_columns='X1',
23 |         operations={'count': gl.aggregate.SUM('X2')})
24 |     return counts_sf.unstack(column=['X1', 'count'])[0].values()[0]
25 | 
26 | 
27 | def _extract_doc_data(docs):
28 |     doc_lengths = list(docs.apply(lambda d: np.array(d.values()).sum()))
29 |     term_freqs_dict = _sum_sarray_dicts(docs)
30 | 
31 |     vocab = term_freqs_dict.keys()
32 |     term_freqs = term_freqs_dict.values()
33 | 
34 |     return {'doc_lengths': doc_lengths, 'vocab': vocab, 'term_frequency': term_freqs}
35 | 
36 | 
37 | def _extract_model_data(topic_model, docs, vocab):
38 |     doc_topic_dists = np.vstack(topic_model.predict(docs, output_type='probabilities'))
39 | 
40 |     topics = _topics_as_df(topic_model)
41 |     topic_term_dists = topics.T[vocab].values
42 | 
43 |     return {'topic_term_dists': topic_term_dists, 'doc_topic_dists': doc_topic_dists}
44 | 
45 | 
46 | def _extract_data(topic_model, docs):
47 |     doc_data = _extract_doc_data(docs)
48 |     model_data = _extract_model_data(topic_model, docs, doc_data['vocab'])
49 |     return fp.merge(doc_data, model_data)
50 | 
51 | 
52 | def prepare(topic_model, docs, **kargs):
53 |     """Transforms the GraphLab TopicModel and related corpus data into
54 |     the data structures needed for the visualization.
55 | 
56 |     Parameters
57 |     ----------
58 |     topic_model : graphlab.toolkits.topic_model.topic_model.TopicModel
59 |         An already trained GraphLab topic model.
60 |     docs : SArray of dicts
61 |         The corpus in bag of word form, the same docs used to train the model.
62 |     **kwargs :
63 |         additional keyword arguments are passed through to :func:`pyldavis.prepare`.
64 | 
65 |     Returns
66 |     -------
67 |     prepared_data : PreparedData
68 |         the data structures used in the visualization
69 | 
70 |     Example
71 |     --------
72 |     For example usage please see this notebook:
73 |     http://nbviewer.ipython.org/github/bmabey/pyLDAvis/blob/master/notebooks/GraphLab.ipynb
74 |     """
75 |     opts = fp.merge(_extract_data(topic_model, docs), kargs)
76 |     return pyLDAvis.prepare(**opts)
77 | 


--------------------------------------------------------------------------------
/pyLDAvis/js/ldavis.css:
--------------------------------------------------------------------------------
 1 | /* Taken from https://github.com/cpsievert/LDAvis */
 2 | /* Copyright 2013, AT&T Intellectual Property */
 3 | /* MIT Licence */
 4 | 
 5 | path {
 6 |   fill: none;
 7 |   stroke: none;
 8 | }
 9 | 
10 | .xaxis .tick.major {
11 |     fill: black;
12 |     stroke: black;
13 |     stroke-width: 0.1;
14 |     opacity: 0.7;
15 | }
16 | 
17 | .slideraxis {
18 |     fill: black;
19 |     stroke: black;
20 |     stroke-width: 0.4;
21 |     opacity: 1;
22 | }
23 | 
24 | text {
25 |     font-family: sans-serif;
26 |     font-size: 11px;
27 | }
28 | 
29 | /* IPython Notebook CSS to allow visualization to fit */
30 | /* I'm open to a better way of accomplishing this goal... */
31 | .container { width:1350px !important; }
32 | /* This is for nbviewer's benefit since the above wasn't enough... */
33 | .output_area { width:1450px !important; }
34 | 


--------------------------------------------------------------------------------
/pyLDAvis/js/ldavis.js:
--------------------------------------------------------------------------------
   1 | /* Original code taken from https://github.com/cpsievert/LDAvis */
   2 | /* Copyright 2013, AT&T Intellectual Property */
   3 | /* MIT Licence */
   4 | 
   5 | 'use strict';
   6 | 
   7 | var LDAvis = function(to_select, data_or_file_name, color1, color2) {
   8 | 
   9 |     // This section sets up the logic for event handling
  10 |     var current_clicked = {
  11 |             what: "nothing",
  12 |             element: undefined
  13 |         },
  14 |         current_hover = {
  15 |             what: "nothing",
  16 |             element: undefined
  17 |         },
  18 |         old_winning_state = {
  19 |             what: "nothing",
  20 |             element: undefined
  21 |         },
  22 |         vis_state = {
  23 |             lambda: 1,
  24 |             topic: 0,
  25 |             term: ""
  26 |         };
  27 | 
  28 |     // Set up a few 'global' variables to hold the data:
  29 |     var K, // number of topics
  30 |         R, // number of terms to display in bar chart
  31 |         mdsData, // (x,y) locations and topic proportions
  32 |         mdsData3, // topic proportions for all terms in the viz
  33 |         lamData, // all terms that are among the top-R most relevant for all topics, lambda values
  34 |         lambda = {
  35 |             old: 1,
  36 |             current: 1
  37 |         },
  38 |         color1 = typeof color1 !=='undefined' ? color1: "#1f77b4", // baseline color for default topic circles and overall term frequencies
  39 |         color2 = typeof color2 !=='undefined' ? color2: "#d62728"; // 'highlight' color for selected topics and term-topic frequencies
  40 | 
  41 |     // Set the duration of each half of the transition:
  42 |     var duration = 750;
  43 | 
  44 |     // Set global margins used for everything
  45 |     var margin = {
  46 |             top: 30,
  47 |             right: 30,
  48 |             bottom: 70,
  49 |             left: 30
  50 |         },
  51 | 
  52 |         mdswidth = 530,
  53 |         mdsheight = 530,
  54 |         barwidth = 530,
  55 |         barheight = 530,
  56 |         termwidth = 90, // width to add between two panels to display terms
  57 |         mdsarea = mdsheight * mdswidth;
  58 |     // controls how big the maximum circle can be
  59 |     // doesn't depend on data, only on mds width and height:
  60 |     var rMax = 60;
  61 | 
  62 |     // proportion of area of MDS plot to which the sum of default topic circle areas is set
  63 |     var circle_prop = 0.25;
  64 |     var word_prop = 0.25;
  65 | 
  66 |     // opacity of topic circles:
  67 |     var base_opacity = 0.2,
  68 |         highlight_opacity = 0.6;
  69 | 
  70 |     // topic/lambda selection names are specific to *this* vis
  71 |     var topic_select = to_select + "-topic";
  72 |     var lambda_select = to_select + "-lambda";
  73 | 
  74 |     // get rid of the # in the to_select (useful) for setting ID values
  75 |     var visID = to_select.replace("#", "");
  76 |     var topicID = visID + "-topic";
  77 |     var lambdaID = visID + "-lambda";
  78 |     var termID = visID + "-term";
  79 |     var topicDown = topicID + "-down";
  80 |     var topicUp = topicID + "-up";
  81 |     var topicClear = topicID + "-clear";
  82 | 
  83 |     var leftPanelID = visID + "-leftpanel";
  84 |     var barFreqsID = visID + "-bar-freqs";
  85 |     var topID = visID + "-top";
  86 |     var lambdaInputID = visID + "-lambdaInput";
  87 |     var lambdaZeroID = visID + "-lambdaZero";
  88 |     var sliderDivID = visID + "-sliderdiv";
  89 |     var lambdaLabelID = visID + "-lamlabel";
  90 | 
  91 |     //////////////////////////////////////////////////////////////////////////////
  92 | 
  93 |     // sort array according to a specified object key name
  94 |     // Note that default is decreasing sort, set decreasing = -1 for increasing
  95 |     // adapted from http://stackoverflow.com/questions/16648076/sort-array-on-key-value
  96 |     function fancysort(key_name, decreasing) {
  97 |         decreasing = (typeof decreasing === "undefined") ? 1 : decreasing;
  98 |         return function(a, b) {
  99 |             if (a[key_name] < b[key_name])
 100 |                 return 1 * decreasing;
 101 |             if (a[key_name] > b[key_name])
 102 |                 return -1 * decreasing;
 103 |             return 0;
 104 |         };
 105 |     }
 106 | 
 107 | 
 108 |     function visualize(data) {
 109 |         // set the number of topics to global variable K:
 110 |         K = data['mdsDat'].x.length;
 111 | 
 112 |         // R is the number of top relevant (or salient) words whose bars we display
 113 |         R = Math.min(data['R'], 30);
 114 | 
 115 |         // a (K x 5) matrix with columns x, y, topics, Freq, cluster (where x and y are locations for left panel)
 116 |         mdsData = [];
 117 |         for (var i = 0; i < K; i++) {
 118 |             var obj = {};
 119 |             for (var key in data['mdsDat']) {
 120 |                 obj[key] = data['mdsDat'][key][i];
 121 |             }
 122 |             mdsData.push(obj);
 123 |         }
 124 | 
 125 |         // a huge matrix with 3 columns: Term, Topic, Freq, where Freq is all non-zero probabilities of topics given terms
 126 |         // for the terms that appear in the bar-charts for this data
 127 |         mdsData3 = [];
 128 |         for (var i = 0; i < data['token.table'].Term.length; i++) {
 129 |             var obj = {};
 130 |             for (var key in data['token.table']) {
 131 |                 obj[key] = data['token.table'][key][i];
 132 |             }
 133 |             mdsData3.push(obj);
 134 |         };
 135 | 
 136 |         // large data for the widths of bars in bar-charts. 6 columns: Term, logprob, loglift, Freq, Total, Category
 137 |         // Contains all possible terms for topics in (1, 2, ..., k) and lambda in the user-supplied grid of lambda values
 138 |         // which defaults to (0, 0.01, 0.02, ..., 0.99, 1).
 139 |         lamData = [];
 140 |         for (var i = 0; i < data['tinfo'].Term.length; i++) {
 141 |             var obj = {};
 142 |             for (var key in data['tinfo']) {
 143 |                 obj[key] = data['tinfo'][key][i];
 144 |             }
 145 |             lamData.push(obj);
 146 |         }
 147 |         var dat3 = lamData.slice(0, R);
 148 | 
 149 |         // Create the topic input & lambda slider forms. Inspired from:
 150 |         // http://bl.ocks.org/d3noob/10632804
 151 |         // http://bl.ocks.org/d3noob/10633704
 152 |         init_forms(topicID, lambdaID, visID);
 153 | 
 154 |         // When the value of lambda changes, update the visualization
 155 |         console.log('lambda_select', lambda_select);
 156 |         d3.select(lambda_select)
 157 |             .on("mouseup", function() {
 158 |                 console.log('lambda_select mouseup');
 159 |                 // store the previous lambda value
 160 |                 lambda.old = lambda.current;
 161 |                 lambda.current = document.getElementById(lambdaID).value;
 162 |                 vis_state.lambda = +this.value;
 163 |                 // adjust the text on the range slider
 164 |                 d3.select(lambda_select).property("value", vis_state.lambda);
 165 |                 d3.select(lambda_select + "-value").text(vis_state.lambda);
 166 |                 // transition the order of the bars
 167 |                 var increased = lambda.old < vis_state.lambda;
 168 |                 if (vis_state.topic > 0) reorder_bars(increased);
 169 |                 // store the current lambda value
 170 |                 state_save(true);
 171 |                 document.getElementById(lambdaID).value = vis_state.lambda;
 172 |             });
 173 | 
 174 |         d3.select("#" + topicUp)
 175 |             .on("click", function() {
 176 |                 // remove term selection if it exists (from a saved URL)
 177 |                 var termElem = document.getElementById(termID + vis_state.term);
 178 |                 if (termElem !== undefined) term_off(termElem);
 179 |                 vis_state.term = "";
 180 |                 var value_old = document.getElementById(topicID).value;
 181 |                 var value_new = Math.min(K, +value_old + 1).toFixed(0);
 182 |                 // increment the value in the input box
 183 |                 document.getElementById(topicID).value = value_new;
 184 |                 topic_off(document.getElementById(topicID + value_old));
 185 |                 topic_on(document.getElementById(topicID + value_new));
 186 |                 vis_state.topic = value_new;
 187 |                 state_save(true);
 188 |             });
 189 | 
 190 |         d3.select("#" + topicDown)
 191 |             .on("click", function() {
 192 |                 // remove term selection if it exists (from a saved URL)
 193 |                 var termElem = document.getElementById(termID + vis_state.term);
 194 |                 if (termElem !== undefined) term_off(termElem);
 195 |                 vis_state.term = "";
 196 |                 var value_old = document.getElementById(topicID).value;
 197 |                 var value_new = Math.max(0, +value_old - 1).toFixed(0);
 198 |                 // increment the value in the input box
 199 |                 document.getElementById(topicID).value = value_new;
 200 |                 topic_off(document.getElementById(topicID + value_old));
 201 |                 topic_on(document.getElementById(topicID + value_new));
 202 |                 vis_state.topic = value_new;
 203 |                 state_save(true);
 204 |             });
 205 | 
 206 |         d3.select("#" + topicID)
 207 |             .on("keyup", function() {
 208 |                 // remove term selection if it exists (from a saved URL)
 209 |                 var termElem = document.getElementById(termID + vis_state.term);
 210 |                 if (termElem !== undefined) term_off(termElem);
 211 |                 vis_state.term = "";
 212 |                 topic_off(document.getElementById(topicID + vis_state.topic));
 213 |                 var value_new = document.getElementById(topicID).value;
 214 |                 if (!isNaN(value_new) && value_new > 0) {
 215 |                     value_new = Math.min(K, Math.max(1, value_new));
 216 |                     topic_on(document.getElementById(topicID + value_new));
 217 |                     vis_state.topic = value_new;
 218 |                     state_save(true);
 219 |                     document.getElementById(topicID).value = vis_state.topic;
 220 |                 }
 221 |             });
 222 | 
 223 |         d3.select("#" + topicClear)
 224 |             .on("click", function() {
 225 |                 state_reset();
 226 |                 state_save(true);
 227 |             });
 228 | 
 229 |         // create linear scaling to pixels (and add some padding on outer region of scatterplot)
 230 |         var xrange = d3.extent(mdsData, function(d) {
 231 |             return d.x;
 232 |         }); //d3.extent returns min and max of an array
 233 |         var xdiff = xrange[1] - xrange[0],
 234 |             xpad = 0.05;
 235 |         var yrange = d3.extent(mdsData, function(d) {
 236 |             return d.y;
 237 |         });
 238 |         var ydiff = yrange[1] - yrange[0],
 239 |             ypad = 0.05;
 240 | 
 241 |         if (xdiff > ydiff) {
 242 |             var xScale = d3.scaleLinear()
 243 |                 .range([0, mdswidth])
 244 |                 .domain([xrange[0] - xpad * xdiff, xrange[1] + xpad * xdiff]);
 245 | 
 246 |             var yScale = d3.scaleLinear()
 247 |                 .range([mdsheight, 0])
 248 |                 .domain([yrange[0] - 0.5*(xdiff - ydiff) - ypad*xdiff, yrange[1] + 0.5*(xdiff - ydiff) + ypad*xdiff]);
 249 |         } else {
 250 |             var xScale = d3.scaleLinear()
 251 |                 .range([0, mdswidth])
 252 |                 .domain([xrange[0] - 0.5*(ydiff - xdiff) - xpad*ydiff, xrange[1] + 0.5*(ydiff - xdiff) + xpad*ydiff]);
 253 | 
 254 |             var yScale = d3.scaleLinear()
 255 |                 .range([mdsheight, 0])
 256 |                 .domain([yrange[0] - ypad * ydiff, yrange[1] + ypad * ydiff]);
 257 |         }
 258 | 
 259 |         // Create new svg element (that will contain everything):
 260 |         var svg = d3.select(to_select).append("svg")
 261 |             .attr("width", mdswidth + barwidth + margin.left + termwidth + margin.right)
 262 |             .attr("height", mdsheight + 2 * margin.top + margin.bottom + 2 * rMax);
 263 | 
 264 |         // Create a group for the mds plot
 265 |         var mdsplot = svg.append("g")
 266 |             .attr("id", leftPanelID)
 267 |             .attr("class", "points")
 268 |             .attr("transform", "translate(" + margin.left + "," + 2 * margin.top + ")");
 269 | 
 270 |         // Clicking on the mdsplot should clear the selection
 271 |         mdsplot.append("rect")
 272 |             .attr("x", 0)
 273 |             .attr("y", 0)
 274 |             .attr("height", mdsheight)
 275 |             .attr("width", mdswidth)
 276 |             .style("fill", color1)
 277 |             .attr("opacity", 0)
 278 |             .on("click", function() {
 279 |                 state_reset();
 280 |                 state_save(true);
 281 |             });
 282 | 
 283 |         mdsplot.append("line") // draw x-axis
 284 |             .attr("x1", 0)
 285 |             .attr("x2", mdswidth)
 286 |             .attr("y1", mdsheight / 2)
 287 |             .attr("y2", mdsheight / 2)
 288 |             .attr("stroke", "gray")
 289 |             .attr("opacity", 0.3);
 290 |         mdsplot.append("text") // label x-axis
 291 |             .attr("x", 0)
 292 |             .attr("y", mdsheight/2 - 5)
 293 |             .text(data['plot.opts'].xlab)
 294 |             .attr("fill", "gray");
 295 | 
 296 |         mdsplot.append("line") // draw y-axis
 297 |             .attr("x1", mdswidth / 2)
 298 |             .attr("x2", mdswidth / 2)
 299 |             .attr("y1", 0)
 300 |             .attr("y2", mdsheight)
 301 |             .attr("stroke", "gray")
 302 |             .attr("opacity", 0.3);
 303 |         mdsplot.append("text") // label y-axis
 304 |             .attr("x", mdswidth/2 + 5)
 305 |             .attr("y", 7)
 306 |             .text(data['plot.opts'].ylab)
 307 |             .attr("fill", "gray");
 308 | 
 309 |         // new definitions based on fixing the sum of the areas of the default topic circles:
 310 |         var newSmall = Math.sqrt(0.02*mdsarea*circle_prop/Math.PI);
 311 |         var newMedium = Math.sqrt(0.05*mdsarea*circle_prop/Math.PI);
 312 |         var newLarge = Math.sqrt(0.10*mdsarea*circle_prop/Math.PI);
 313 |         var cx = 10 + newLarge,
 314 |             cx2 = cx + 1.5 * newLarge;
 315 | 
 316 |         // circle guide inspired from
 317 |         // http://www.nytimes.com/interactive/2012/02/13/us/politics/2013-budget-proposal-graphic.html?_r=0
 318 |         var circleGuide = function(rSize, size) {
 319 |             d3.select("#" + leftPanelID).append("circle")
 320 |                 .attr('class', "circleGuide" + size)
 321 |                 .attr('r', rSize)
 322 |                 .attr('cx', cx)
 323 |                 .attr('cy', mdsheight + rSize)
 324 |                 .style('fill', 'none')
 325 |                 .style('stroke-dasharray', '2 2')
 326 |                 .style('stroke', '#999');
 327 |             d3.select("#" + leftPanelID).append("line")
 328 |                 .attr('class', "lineGuide" + size)
 329 |                 .attr("x1", cx)
 330 |                 .attr("x2", cx2)
 331 |                 .attr("y1", mdsheight + 2 * rSize)
 332 |                 .attr("y2", mdsheight + 2 * rSize)
 333 |                 .style("stroke", "gray")
 334 |                 .style("opacity", 0.3);
 335 |         };
 336 | 
 337 |         circleGuide(newSmall, "Small");
 338 |         circleGuide(newMedium, "Medium");
 339 |         circleGuide(newLarge, "Large");
 340 | 
 341 |         var defaultLabelSmall = "2%";
 342 |         var defaultLabelMedium = "5%";
 343 |         var defaultLabelLarge = "10%";
 344 | 
 345 |         d3.select("#" + leftPanelID).append("text")
 346 |             .attr("x", 10)
 347 |             .attr("y", mdsheight - 10)
 348 |             .attr('class', "circleGuideTitle")
 349 |             .style("text-anchor", "left")
 350 |             .style("fontWeight", "bold")
 351 |             .text("Marginal topic distribution");
 352 |         d3.select("#" + leftPanelID).append("text")
 353 |             .attr("x", cx2 + 10)
 354 |             .attr("y", mdsheight + 2 * newSmall)
 355 |             .attr('class', "circleGuideLabelSmall")
 356 |             .style("text-anchor", "start")
 357 |             .text(defaultLabelSmall);
 358 |         d3.select("#" + leftPanelID).append("text")
 359 |             .attr("x", cx2 + 10)
 360 |             .attr("y", mdsheight + 2 * newMedium)
 361 |             .attr('class', "circleGuideLabelMedium")
 362 |             .style("text-anchor", "start")
 363 |             .text(defaultLabelMedium);
 364 |         d3.select("#" + leftPanelID).append("text")
 365 |             .attr("x", cx2 + 10)
 366 |             .attr("y", mdsheight + 2 * newLarge)
 367 |             .attr('class', "circleGuideLabelLarge")
 368 |             .style("text-anchor", "start")
 369 |             .text(defaultLabelLarge);
 370 | 
 371 |         // bind mdsData to the points in the left panel:
 372 |         var points = mdsplot.selectAll("points")
 373 |             .data(mdsData)
 374 |             .enter();
 375 | 
 376 |         // text to indicate topic
 377 |         points.append("text")
 378 |             .attr("class", "txt")
 379 |             .attr("x", function(d) {
 380 |                 return (xScale(+d.x));
 381 |             })
 382 |             .attr("y", function(d) {
 383 |                 return (yScale(+d.y) + 4);
 384 |             })
 385 |             .attr("stroke", "black")
 386 |             .attr("opacity", 1)
 387 |             .style("text-anchor", "middle")
 388 |             .style("font-size", "11px")
 389 |             .style("fontWeight", 100)
 390 |             .text(function(d) {
 391 |                 return d.topics;
 392 |             });
 393 | 
 394 |         // draw circles
 395 |         points.append("circle")
 396 |             .attr("class", "dot")
 397 |             .style("opacity", 0.2)
 398 |             .style("fill", color1)
 399 |             .attr("r", function(d) {
 400 |                 return (Math.sqrt((d.Freq/100)*mdswidth*mdsheight*circle_prop/Math.PI));
 401 |             })
 402 |             .attr("cx", function(d) {
 403 |                 return (xScale(+d.x));
 404 |             })
 405 |             .attr("cy", function(d) {
 406 |                 return (yScale(+d.y));
 407 |             })
 408 |             .attr("stroke", "black")
 409 |             .attr("id", function(d) {
 410 |                 return (topicID + d.topics);
 411 |             })
 412 |             .on("mouseover", function(d) {
 413 |                 var old_topic = topicID + vis_state.topic;
 414 |                 if (vis_state.topic > 0 && old_topic != this.id) {
 415 |                     topic_off(document.getElementById(old_topic));
 416 |                 }
 417 |                 topic_on(this);
 418 |             })
 419 |             .on("click", function(d) {
 420 |                 // prevent click event defined on the div container from firing
 421 |                 // http://bl.ocks.org/jasondavies/3186840
 422 |                 d3.event.stopPropagation();
 423 |                 var old_topic = topicID + vis_state.topic;
 424 |                 if (vis_state.topic > 0 && old_topic != this.id) {
 425 |                     topic_off(document.getElementById(old_topic));
 426 |                 }
 427 |                 // make sure topic input box value and fragment reflects clicked selection
 428 |                 document.getElementById(topicID).value = vis_state.topic = d.topics;
 429 |                 state_save(true);
 430 |                 topic_on(this);
 431 |             })
 432 |             .on("mouseout", function(d) {
 433 |                 if (vis_state.topic != d.topics) topic_off(this);
 434 |                 if (vis_state.topic > 0) topic_on(document.getElementById(topicID + vis_state.topic));
 435 |             });
 436 | 
 437 |         svg.append("text")
 438 |             .text("Intertopic Distance Map (via multidimensional scaling)")
 439 |             .attr("x", mdswidth/2 + margin.left)
 440 |             .attr("y", 30)
 441 |             .style("font-size", "16px")
 442 |             .style("text-anchor", "middle");
 443 | 
 444 |         // establish layout and vars for bar chart
 445 |         var barDefault2 = dat3.filter(function(d) {
 446 |             return d.Category == "Default";
 447 |         });
 448 | 
 449 |         var y = d3.scaleBand()
 450 |             .domain(barDefault2.map(function(d) {
 451 |                 return d.Term;
 452 |             }))
 453 |             .rangeRound([0, barheight])
 454 |             .padding(0.15);
 455 | 
 456 |         var x = d3.scaleLinear()
 457 |             .domain([1, d3.max(barDefault2, function(d) {
 458 |                 return d.Total;
 459 |             })])
 460 |             .range([0, barwidth])
 461 |             .nice();
 462 |         var yAxis = d3.axisLeft(y);
 463 | 
 464 |         // Add a group for the bar chart
 465 |         var chart = svg.append("g")
 466 |             .attr("transform", "translate(" + +(mdswidth + margin.left + termwidth) + "," + 2 * margin.top + ")")
 467 |             .attr("id", barFreqsID);
 468 | 
 469 |         // bar chart legend/guide:
 470 |         var barguide = {"width": 100, "height": 15};
 471 |         d3.select("#" + barFreqsID).append("rect")
 472 |             .attr("x", 0)
 473 |             .attr("y", mdsheight + 10)
 474 |             .attr("height", barguide.height)
 475 |             .attr("width", barguide.width)
 476 |             .style("fill", color1)
 477 |             .attr("opacity", 0.4);
 478 |         d3.select("#" + barFreqsID).append("text")
 479 |             .attr("x", barguide.width + 5)
 480 |             .attr("y", mdsheight + 10 + barguide.height/2)
 481 |             .style("dominant-baseline", "middle")
 482 |             .text("Overall term frequency");
 483 | 
 484 |         d3.select("#" + barFreqsID).append("rect")
 485 |             .attr("x", 0)
 486 |             .attr("y", mdsheight + 10 + barguide.height + 5)
 487 |             .attr("height", barguide.height)
 488 |             .attr("width", barguide.width/2)
 489 |             .style("fill", color2)
 490 |             .attr("opacity", 0.8);
 491 |         d3.select("#" + barFreqsID).append("text")
 492 |             .attr("x", barguide.width/2 + 5)
 493 |             .attr("y", mdsheight + 10 + (3/2)*barguide.height + 5)
 494 |             .style("dominant-baseline", "middle")
 495 |             .text("Estimated term frequency within the selected topic");
 496 | 
 497 |         // footnotes:
 498 |         d3.select("#" + barFreqsID)
 499 |             .append("a")
 500 |             .attr("xlink:href", "http://vis.stanford.edu/files/2012-Termite-AVI.pdf")
 501 |             .attr("target", "_blank")
 502 |             .append("text")
 503 |             .attr("x", 0)
 504 |             .attr("y", mdsheight + 10 + (6/2)*barguide.height + 5)
 505 |             .style("dominant-baseline", "middle")
 506 |             .text("1. saliency(term w) = frequency(w) * [sum_t p(t | w) * log(p(t | w)/p(t))] for topics t; see Chuang et. al (2012)");
 507 |         d3.select("#" + barFreqsID)
 508 |             .append("a")
 509 |             .attr("xlink:href", "http://nlp.stanford.edu/events/illvi2014/papers/sievert-illvi2014.pdf")
 510 |             .attr("target", "_blank")
 511 |             .append("text")
 512 |             .attr("x", 0)
 513 |             .attr("y", mdsheight + 10 + (8/2)*barguide.height + 5)
 514 |             .style("dominant-baseline", "middle")
 515 |             .text("2. relevance(term w | topic t) = \u03BB * p(w | t) + (1 - \u03BB) * p(w | t)/p(w); see Sievert & Shirley (2014)");
 516 | 
 517 |         // Bind 'default' data to 'default' bar chart
 518 |         var basebars = chart.selectAll(to_select + " .bar-totals")
 519 |             .data(barDefault2)
 520 |             .enter();
 521 | 
 522 |         // Draw the gray background bars defining the overall frequency of each word
 523 |         basebars.append("rect")
 524 |             .attr("class", "bar-totals")
 525 |             .attr("x", 0)
 526 |             .attr("y", function(d) {
 527 |                 return y(d.Term);
 528 |             })
 529 |             .attr("height", y.bandwidth())
 530 |             .attr("width", function(d) {
 531 |                 return x(d.Total);
 532 |             })
 533 |             .style("fill", color1)
 534 |             .attr("opacity", 0.4);
 535 | 
 536 |         // Add word labels to the side of each bar
 537 |         basebars.append("text")
 538 |             .attr("x", -5)
 539 |             .attr("class", "terms")
 540 |             .attr("y", function(d) {
 541 |                 return y(d.Term) + 12;
 542 |             })
 543 |             .attr("cursor", "pointer")
 544 |             .attr("id", function(d) {
 545 |                 return (termID + d.Term);
 546 |             })
 547 |             .style("text-anchor", "end") // right align text - use 'middle' for center alignment
 548 |             .text(function(d) {
 549 |                 return d.Term;
 550 |             })
 551 |             .on("mouseover", function() {
 552 |                 term_hover(this);
 553 |             })
 554 |             .on("mouseout", function() {
 555 |                 vis_state.term = "";
 556 |                 term_off(this);
 557 |                 state_save(true);
 558 |             });
 559 | 
 560 |         var title = chart.append("text")
 561 |             .attr("x", barwidth/2)
 562 |             .attr("y", -30)
 563 |             .attr("class", "bubble-tool") //  set class so we can remove it when highlight_off is called
 564 |             .style("text-anchor", "middle")
 565 |             .style("font-size", "16px")
 566 |             .text("Top-" + R + " Most Salient Terms");
 567 | 
 568 |         title.append("tspan")
 569 |             .attr("baseline-shift", "super")
 570 |             .attr("font-size", "12px")
 571 |             .text("(1)");
 572 | 
 573 |         // barchart axis adapted from http://bl.ocks.org/mbostock/1166403
 574 |         var xAxis = d3.axisTop(x)
 575 |             .tickSize(-barheight)
 576 |             .ticks(6);
 577 | 
 578 |         // dynamically create the topic and lambda input forms at the top of the page:
 579 |         function init_forms(topicID, lambdaID, visID) {
 580 | 
 581 |             // create container div for topic and lambda input:
 582 |             var inputDiv = document.createElement("div");
 583 |             inputDiv.setAttribute("id", topID);
 584 |             inputDiv.setAttribute("style", "width: 1210px"); // to match the width of the main svg element
 585 |             document.getElementById(visID).appendChild(inputDiv);
 586 | 
 587 |             // topic input container:
 588 |             var topicDiv = document.createElement("div");
 589 |             topicDiv.setAttribute("style", "padding: 5px; background-color: #e8e8e8; display: inline-block; width: " + mdswidth + "px; height: 50px; float: left");
 590 |             inputDiv.appendChild(topicDiv);
 591 | 
 592 |             var topicLabel = document.createElement("label");
 593 |             topicLabel.setAttribute("for", topicID);
 594 |             topicLabel.setAttribute("style", "font-family: sans-serif; font-size: 14px");
 595 |             topicLabel.innerHTML = "Selected Topic: <span id='" + topicID + "-value'></span>";
 596 |             topicDiv.appendChild(topicLabel);
 597 | 
 598 |             var topicInput = document.createElement("input");
 599 |             topicInput.setAttribute("style", "width: 50px");
 600 |             topicInput.type = "text";
 601 |             topicInput.min = "0";
 602 |             topicInput.max = K; // assumes the data has already been read in
 603 |             topicInput.value = "0"; // a value of 0 indicates no topic is selected
 604 |             topicInput.step = "1";
 605 |             topicInput.id = topicID;
 606 |             topicDiv.appendChild(topicInput);
 607 | 
 608 |             var previous = document.createElement("button");
 609 |             previous.setAttribute("id", topicDown);
 610 |             previous.setAttribute("style", "margin-left: 5px");
 611 |             previous.innerHTML = "Previous Topic";
 612 |             topicDiv.appendChild(previous);
 613 | 
 614 |             var next = document.createElement("button");
 615 |             next.setAttribute("id", topicUp);
 616 |             next.setAttribute("style", "margin-left: 5px");
 617 |             next.innerHTML = "Next Topic";
 618 |             topicDiv.appendChild(next);
 619 | 
 620 |             var clear = document.createElement("button");
 621 |             clear.setAttribute("id", topicClear);
 622 |             clear.setAttribute("style", "margin-left: 5px");
 623 |             clear.innerHTML = "Clear Topic";
 624 |             topicDiv.appendChild(clear);
 625 | 
 626 |             // lambda inputs
 627 |             var lambdaDivWidth = barwidth;
 628 |             var lambdaDiv = document.createElement("div");
 629 |             lambdaDiv.setAttribute("id", lambdaInputID);
 630 |             lambdaDiv.setAttribute("style", "padding: 5px; background-color: #e8e8e8; display: inline-block; height: 50px; width: " + lambdaDivWidth + "px; float: right; margin-right: 30px");
 631 |             inputDiv.appendChild(lambdaDiv);
 632 | 
 633 |             var lambdaZero = document.createElement("div");
 634 |             lambdaZero.setAttribute("style", "padding: 5px; height: 20px; width: 220px; font-family: sans-serif; float: left");
 635 |             lambdaZero.setAttribute("id", lambdaZeroID);
 636 |             lambdaDiv.appendChild(lambdaZero);
 637 |             var xx = d3.select("#" + lambdaZeroID)
 638 |                 .append("text")
 639 |                 .attr("x", 0)
 640 |                 .attr("y", 0)
 641 |                 .style("font-size", "14px")
 642 |                 .text("Slide to adjust relevance metric:");
 643 |             var yy = d3.select("#" + lambdaZeroID)
 644 |                 .append("text")
 645 |                 .attr("x", 125)
 646 |                 .attr("y", -5)
 647 |                 .style("font-size", "10px")
 648 |                 .style("position", "absolute")
 649 |                 .text("(2)");
 650 | 
 651 |             var sliderDiv = document.createElement("div");
 652 |             sliderDiv.setAttribute("id", sliderDivID);
 653 |             sliderDiv.setAttribute("style", "padding: 5px; height: 40px; width: 250px; float: right; margin-top: -5px; margin-right: 10px");
 654 |             lambdaDiv.appendChild(sliderDiv);
 655 | 
 656 |             var lambdaInput = document.createElement("input");
 657 |             lambdaInput.setAttribute("style", "width: 250px; margin-left: 0px; margin-right: 0px");
 658 |             lambdaInput.type = "range";
 659 |             lambdaInput.min = 0;
 660 |             lambdaInput.max = 1;
 661 |             lambdaInput.step = data['lambda.step'];
 662 |             lambdaInput.value = vis_state.lambda;
 663 |             lambdaInput.id = lambdaID;
 664 |             lambdaInput.setAttribute("list", "ticks"); // to enable automatic ticks (with no labels, see below)
 665 |             sliderDiv.appendChild(lambdaInput);
 666 | 
 667 |             var lambdaLabel = document.createElement("label");
 668 |             lambdaLabel.setAttribute("id", lambdaLabelID);
 669 |             lambdaLabel.setAttribute("for", lambdaID);
 670 |             lambdaLabel.setAttribute("style", "height: 20px; width: 60px; font-family: sans-serif; font-size: 14px; margin-left: 80px");
 671 |             lambdaLabel.innerHTML = "&#955 = <span id='" + lambdaID + "-value'>" + vis_state.lambda + "</span>";
 672 |             lambdaDiv.appendChild(lambdaLabel);
 673 | 
 674 |             // Create the svg to contain the slider scale:
 675 |             var scaleContainer = d3.select("#" + sliderDivID).append("svg")
 676 |                 .attr("width", 250)
 677 |                 .attr("height", 25);
 678 | 
 679 |             var sliderScale = d3.scaleLinear()
 680 |                 .domain([0, 1])
 681 |                 .range([7.5, 242.5])  // trimmed by 7.5px on each side to match the input type=range slider:
 682 |                 .nice();
 683 | 
 684 |             // adapted from http://bl.ocks.org/mbostock/1166403
 685 |             var sliderAxis = d3.axisBottom(sliderScale)
 686 |                 .tickSize(10)
 687 |                 .ticks(6);
 688 | 
 689 |             // group to contain the elements of the slider axis:
 690 |             var sliderAxisGroup = scaleContainer.append("g")
 691 |                 .attr("class", "slideraxis")
 692 |                 .attr("margin-top", "-10px")
 693 |                 .call(sliderAxis);
 694 |         }
 695 | 
 696 |         // function to re-order the bars (gray and red), and terms:
 697 |         function reorder_bars(increase) {
 698 |             // grab the bar-chart data for this topic only:
 699 |             var dat2 = lamData.filter(function(d) {
 700 |                 return d.Category == "Topic" + vis_state.topic;
 701 |             });
 702 |             // define relevance:
 703 |             for (var i = 0; i < dat2.length; i++) {
 704 |                 dat2[i].relevance = vis_state.lambda * dat2[i].logprob +
 705 |                     (1 - vis_state.lambda) * dat2[i].loglift;
 706 |             }
 707 | 
 708 |             // sort by relevance:
 709 |             dat2.sort(fancysort("relevance"));
 710 | 
 711 |             // truncate to the top R tokens:
 712 |             var dat3 = dat2.slice(0, R);
 713 | 
 714 |             var y = d3.scaleBand()
 715 |                 .domain(dat3.map(function(d) {
 716 |                     return d.Term;
 717 |                 }))
 718 |                 .rangeRound([0, barheight])
 719 |                 .padding(0.15);
 720 | 
 721 |             var x = d3.scaleLinear()
 722 |                 .domain([1, d3.max(dat3, function(d) {
 723 |                     return d.Total;
 724 |                 })])
 725 |                 .range([0, barwidth])
 726 |                 .nice();
 727 | 
 728 |             // Change Total Frequency bars
 729 |             var graybars = d3.select("#" + barFreqsID)
 730 |                 .selectAll(to_select + " .bar-totals")
 731 |                 .data(dat3, function(d) {
 732 |                     return d.Term;
 733 |                 });
 734 | 
 735 |             // Change word labels
 736 |             var labels = d3.select("#" + barFreqsID)
 737 |                 .selectAll(to_select + " .terms")
 738 |                 .data(dat3, function(d) {
 739 |                     return d.Term;
 740 |                 });
 741 | 
 742 |             // Create red bars (drawn over the gray ones) to signify the frequency under the selected topic
 743 |             var redbars = d3.select("#" + barFreqsID)
 744 |                 .selectAll(to_select + " .overlay")
 745 |                 .data(dat3, function(d) {
 746 |                     return d.Term;
 747 |                 });
 748 | 
 749 |             // adapted from http://bl.ocks.org/mbostock/1166403
 750 |             var xAxis = d3.axisTop(x)
 751 |                 .tickSize(-barheight)
 752 |                 .ticks(6);
 753 | 
 754 |             // New axis definition:
 755 |             var newaxis = d3.selectAll(to_select + " .xaxis");
 756 | 
 757 |             // define the new elements to enter:
 758 |             var graybarsEnter = graybars.enter().append("rect")
 759 |                 .attr("class", "bar-totals")
 760 |                 .attr("x", 0)
 761 |                 .attr("y", function(d) {
 762 |                     return y(d.Term) + barheight + margin.bottom + 2 * rMax;
 763 |                 })
 764 |                 .attr("height", y.bandwidth())
 765 |                 .style("fill", color1)
 766 |                 .attr("opacity", 0.4);
 767 | 
 768 |             var labelsEnter = labels.enter()
 769 |                 .append("text")
 770 |                 .attr("x", -5)
 771 |                 .attr("class", "terms")
 772 |                 .attr("y", function(d) {
 773 |                     return y(d.Term) + 12 + barheight + margin.bottom + 2 * rMax;
 774 |                 })
 775 |                 .attr("cursor", "pointer")
 776 |                 .style("text-anchor", "end")
 777 |                 .attr("id", function(d) {
 778 |                     return (termID + d.Term);
 779 |                 })
 780 |                 .text(function(d) {
 781 |                     return d.Term;
 782 |                 })
 783 |                 .on("mouseover", function() {
 784 |                     term_hover(this);
 785 |                 })
 786 |                 .on("mouseout", function() {
 787 |                     vis_state.term = "";
 788 |                     term_off(this);
 789 |                     state_save(true);
 790 |                 });
 791 | 
 792 |             var redbarsEnter = redbars.enter().append("rect")
 793 |                 .attr("class", "overlay")
 794 |                 .attr("x", 0)
 795 |                 .attr("y", function(d) {
 796 |                     return y(d.Term) + barheight + margin.bottom + 2 * rMax;
 797 |                 })
 798 |                 .attr("height", y.bandwidth())
 799 |                 .style("fill", color2)
 800 |                 .attr("opacity", 0.8);
 801 | 
 802 | 
 803 |             if (increase) {
 804 |                 graybarsEnter
 805 |                     .attr("width", function(d) {
 806 |                         return x(d.Total);
 807 |                     })
 808 |                     .transition().duration(duration)
 809 |                     .delay(duration)
 810 |                     .attr("y", function(d) {
 811 |                         return y(d.Term);
 812 |                     });
 813 |                 labelsEnter
 814 |                     .transition().duration(duration)
 815 |                     .delay(duration)
 816 |                     .attr("y", function(d) {
 817 |                         return y(d.Term) + 12;
 818 |                     });
 819 |                 redbarsEnter
 820 |                     .attr("width", function(d) {
 821 |                         return x(d.Freq);
 822 |                     })
 823 |                     .transition().duration(duration)
 824 |                     .delay(duration)
 825 |                     .attr("y", function(d) {
 826 |                         return y(d.Term);
 827 |                     });
 828 | 
 829 |                 graybars.transition().duration(duration)
 830 |                     .attr("width", function(d) {
 831 |                         return x(d.Total);
 832 |                     })
 833 |                     .transition().duration(duration)
 834 |                     .attr("y", function(d) {
 835 |                         return y(d.Term);
 836 |                     });
 837 |                 labels.transition().duration(duration)
 838 |                     .delay(duration)
 839 |                     .attr("y", function(d) {
 840 |                         return y(d.Term) + 12;
 841 |                     });
 842 |                 redbars.transition().duration(duration)
 843 |                     .attr("width", function(d) {
 844 |                         return x(d.Freq);
 845 |                     })
 846 |                     .transition().duration(duration)
 847 |                     .attr("y", function(d) {
 848 |                         return y(d.Term);
 849 |                     });
 850 | 
 851 |                 // Transition exiting rectangles to the bottom of the barchart:
 852 |                 graybars.exit()
 853 |                     .transition().duration(duration)
 854 |                     .attr("width", function(d) {
 855 |                         return x(d.Total);
 856 |                     })
 857 |                     .transition().duration(duration)
 858 |                     .attr("y", function(d, i) {
 859 |                         return barheight + margin.bottom + 6 + i * 18;
 860 |                     })
 861 |                     .remove();
 862 |                 labels.exit()
 863 |                     .transition().duration(duration)
 864 |                     .delay(duration)
 865 |                     .attr("y", function(d, i) {
 866 |                         return barheight + margin.bottom + 18 + i * 18;
 867 |                     })
 868 |                     .remove();
 869 |                 redbars.exit()
 870 |                     .transition().duration(duration)
 871 |                     .attr("width", function(d) {
 872 |                         return x(d.Freq);
 873 |                     })
 874 |                     .transition().duration(duration)
 875 |                     .attr("y", function(d, i) {
 876 |                         return barheight + margin.bottom + 6 + i * 18;
 877 |                     })
 878 |                     .remove();
 879 |                 // https://github.com/mbostock/d3/wiki/Transitions#wiki-d3_ease
 880 |                 newaxis.transition().duration(duration)
 881 |                     .call(xAxis)
 882 |                     .transition().duration(duration);
 883 |             } else {
 884 |                 graybarsEnter
 885 |                     .attr("width", 100) // FIXME by looking up old width of these bars
 886 |                     .transition().duration(duration)
 887 |                     .attr("y", function(d) {
 888 |                         return y(d.Term);
 889 |                     })
 890 |                     .transition().duration(duration)
 891 |                     .attr("width", function(d) {
 892 |                         return x(d.Total);
 893 |                     });
 894 |                 labelsEnter
 895 |                     .transition().duration(duration)
 896 |                     .attr("y", function(d) {
 897 |                         return y(d.Term) + 12;
 898 |                     });
 899 |                 redbarsEnter
 900 |                     .attr("width", 50) // FIXME by looking up old width of these bars
 901 |                     .transition().duration(duration)
 902 |                     .attr("y", function(d) {
 903 |                         return y(d.Term);
 904 |                     })
 905 |                     .transition().duration(duration)
 906 |                     .attr("width", function(d) {
 907 |                         return x(d.Freq);
 908 |                     });
 909 | 
 910 |                 graybars.transition().duration(duration)
 911 |                     .attr("y", function(d) {
 912 |                         return y(d.Term);
 913 |                     })
 914 |                     .transition().duration(duration)
 915 |                     .attr("width", function(d) {
 916 |                         return x(d.Total);
 917 |                     });
 918 |                 labels.transition().duration(duration)
 919 |                     .attr("y", function(d) {
 920 |                         return y(d.Term) + 12;
 921 |                     });
 922 |                 redbars.transition().duration(duration)
 923 |                     .attr("y", function(d) {
 924 |                         return y(d.Term);
 925 |                     })
 926 |                     .transition().duration(duration)
 927 |                     .attr("width", function(d) {
 928 |                         return x(d.Freq);
 929 |                     });
 930 | 
 931 |                 // Transition exiting rectangles to the bottom of the barchart:
 932 |                 graybars.exit()
 933 |                     .transition().duration(duration)
 934 |                     .attr("y", function(d, i) {
 935 |                         return barheight + margin.bottom + 6 + i * 18 + 2 * rMax;
 936 |                     })
 937 |                     .remove();
 938 |                 labels.exit()
 939 |                     .transition().duration(duration)
 940 |                     .attr("y", function(d, i) {
 941 |                         return barheight + margin.bottom + 18 + i * 18 + 2 * rMax;
 942 |                     })
 943 |                     .remove();
 944 |                 redbars.exit()
 945 |                     .transition().duration(duration)
 946 |                     .attr("y", function(d, i) {
 947 |                         return barheight + margin.bottom + 6 + i * 18 + 2 * rMax;
 948 |                     })
 949 |                     .remove();
 950 | 
 951 |                 // https://github.com/mbostock/d3/wiki/Transitions#wiki-d3_ease
 952 |                 newaxis.transition().duration(duration)
 953 |                     .transition().duration(duration)
 954 |                     .call(xAxis);
 955 |             }
 956 |         }
 957 | 
 958 |         //////////////////////////////////////////////////////////////////////////////
 959 | 
 960 |         // function to update bar chart when a topic is selected
 961 |         // the circle argument should be the appropriate circle element
 962 |         function topic_on(circle) {
 963 |             if (circle == null) return null;
 964 | 
 965 |             // grab data bound to this element
 966 |             var d = circle.__data__;
 967 |             var Freq = Math.round(d.Freq * 10) / 10,
 968 |                 topics = d.topics;
 969 | 
 970 |             // change opacity and fill of the selected circle
 971 |             circle.style.opacity = highlight_opacity;
 972 |             circle.style.fill = color2;
 973 | 
 974 |             // Remove 'old' bar chart title
 975 |             var text = d3.select(to_select + " .bubble-tool");
 976 |             text.remove();
 977 | 
 978 |             // append text with info relevant to topic of interest
 979 |             d3.select("#" + barFreqsID)
 980 |                 .append("text")
 981 |                 .attr("x", barwidth/2)
 982 |                 .attr("y", -30)
 983 |                 .attr("class", "bubble-tool") //  set class so we can remove it when highlight_off is called
 984 |                 .style("text-anchor", "middle")
 985 |                 .style("font-size", "16px")
 986 |                 .text("Top-" + R + " Most Relevant Terms for Topic " + topics + " (" + Freq + "% of tokens)");
 987 | 
 988 |             // grab the bar-chart data for this topic only:
 989 |             var dat2 = lamData.filter(function(d) {
 990 |                 return d.Category == "Topic" + topics;
 991 |             });
 992 | 
 993 |             // define relevance:
 994 |             for (var i = 0; i < dat2.length; i++) {
 995 |                 dat2[i].relevance = lambda.current * dat2[i].logprob +
 996 |                     (1 - lambda.current) * dat2[i].loglift;
 997 |             }
 998 | 
 999 |             // sort by relevance:
1000 |             dat2.sort(fancysort("relevance"));
1001 | 
1002 |             // truncate to the top R tokens:
1003 |             var dat3 = dat2.slice(0, R);
1004 | 
1005 |             // scale the bars to the top R terms:
1006 |             var y = d3.scaleBand()
1007 |                 .domain(dat3.map(function(d) {
1008 |                     return d.Term;
1009 |                 }))
1010 |                 .rangeRound([0, barheight])
1011 |                 .padding(0.1);
1012 | 
1013 |             var x = d3.scaleLinear()
1014 |                 .domain([1, d3.max(dat3, function(d) {
1015 |                     return d.Total;
1016 |                 })])
1017 |                 .range([0, barwidth])
1018 |                 .nice();
1019 | 
1020 |             // remove the red bars if there are any:
1021 |             d3.selectAll(to_select + " .overlay").remove();
1022 | 
1023 |             // Change Total Frequency bars
1024 |             d3.selectAll(to_select + " .bar-totals")
1025 |                 .data(dat3)
1026 |                 .attr("x", 0)
1027 |                 .attr("y", function(d) {
1028 |                     return y(d.Term);
1029 |                 })
1030 |                 .attr("height", y.bandwidth())
1031 |                 .attr("width", function(d) {
1032 |                     return x(d.Total);
1033 |                 })
1034 |                 .style("fill", color1)
1035 |                 .attr("opacity", 0.4);
1036 | 
1037 |             // Change word labels
1038 |             d3.selectAll(to_select + " .terms")
1039 |                 .data(dat3)
1040 |                 .attr("x", -5)
1041 |                 .attr("y", function(d) {
1042 |                     return y(d.Term) + 12;
1043 |                 })
1044 |                 .attr("id", function(d) {
1045 |                     return (termID + d.Term);
1046 |                 })
1047 |                 .style("text-anchor", "end") // right align text - use 'middle' for center alignment
1048 |                 .text(function(d) {
1049 |                     return d.Term;
1050 |                 });
1051 | 
1052 |             // Create red bars (drawn over the gray ones) to signify the frequency under the selected topic
1053 |             d3.select("#" + barFreqsID).selectAll(to_select + " .overlay")
1054 |                 .data(dat3)
1055 |                 .enter()
1056 |                 .append("rect")
1057 |                 .attr("class", "overlay")
1058 |                 .attr("x", 0)
1059 |                 .attr("y", function(d) {
1060 |                     return y(d.Term);
1061 |                 })
1062 |                 .attr("height", y.bandwidth())
1063 |                 .attr("width", function(d) {
1064 |                     return x(d.Freq);
1065 |                 })
1066 |                 .style("fill", color2)
1067 |                 .attr("opacity", 0.8);
1068 | 
1069 |             // adapted from http://bl.ocks.org/mbostock/1166403
1070 |             var xAxis = d3.axisTop(x)
1071 |                 .tickSize(-barheight)
1072 |                 .ticks(6);
1073 | 
1074 |             // redraw x-axis
1075 |             d3.selectAll(to_select + " .xaxis")
1076 |                 .call(xAxis);
1077 |         }
1078 | 
1079 | 
1080 |         function topic_off(circle) {
1081 |             if (circle == null) return circle;
1082 |             // go back to original opacity/fill
1083 |             circle.style.opacity = base_opacity;
1084 |             circle.style.fill = color1;
1085 | 
1086 |             var title = d3.selectAll(to_select + " .bubble-tool")
1087 |                 .text("Top-" + R + " Most Salient Terms");
1088 |             title.append("tspan")
1089 |                 .attr("baseline-shift", "super")
1090 |                 .attr("font-size", 12)
1091 |                 .text(1);
1092 | 
1093 |             // remove the red bars
1094 |             d3.selectAll(to_select + " .overlay").remove();
1095 | 
1096 |             // go back to 'default' bar chart
1097 |             var dat2 = lamData.filter(function(d) {
1098 |                 return d.Category == "Default";
1099 |             });
1100 | 
1101 |             var y = d3.scaleBand()
1102 |                 .domain(dat2.map(function(d) {
1103 |                     return d.Term;
1104 |                 }))
1105 |                 .rangeRound([0, barheight])
1106 |                 .padding(0.15);
1107 | 
1108 |             var x = d3.scaleLinear()
1109 |                 .domain([1, d3.max(dat2, function(d) {
1110 |                     return d.Total;
1111 |                 })])
1112 |                 .range([0, barwidth])
1113 |                 .nice();
1114 | 
1115 |             // Change Total Frequency bars
1116 |             d3.selectAll(to_select + " .bar-totals")
1117 |                 .data(dat2)
1118 |                 .attr("x", 0)
1119 |                 .attr("y", function(d) {
1120 |                     return y(d.Term);
1121 |                 })
1122 |                 .attr("height", y.bandwidth())
1123 |                 .attr("width", function(d) {
1124 |                     return x(d.Total);
1125 |                 })
1126 |                 .style("fill", color1)
1127 |                 .attr("opacity", 0.4);
1128 | 
1129 |             //Change word labels
1130 |             d3.selectAll(to_select + " .terms")
1131 |                 .data(dat2)
1132 |                 .attr("x", -5)
1133 |                 .attr("y", function(d) {
1134 |                     return y(d.Term) + 12;
1135 |                 })
1136 |                 .style("text-anchor", "end") // right align text - use 'middle' for center alignment
1137 |                 .text(function(d) {
1138 |                     return d.Term;
1139 |                 });
1140 | 
1141 |             // adapted from http://bl.ocks.org/mbostock/1166403
1142 |             var xAxis = d3.axisTop(x)
1143 |                 .tickSize(-barheight)
1144 |                 .ticks(6);
1145 | 
1146 |             // redraw x-axis
1147 |             d3.selectAll(to_select + " .xaxis")
1148 |                 .attr("class", "xaxis")
1149 |                 .call(xAxis);
1150 |         }
1151 | 
1152 |         // event definition for mousing over a term
1153 |         function term_hover(term) {
1154 |             var old_term = termID + vis_state.term;
1155 |             if (vis_state.term != "" && old_term != term.id) {
1156 |                 term_off(document.getElementById(old_term));
1157 |             }
1158 |             vis_state.term = term.innerHTML;
1159 |             term_on(term);
1160 |             state_save(true);
1161 |         }
1162 |         // updates vis when a term is selected via click or hover
1163 |         function term_on(term) {
1164 |             if (term == null) return null;
1165 |             term.style["fontWeight"] = "bold";
1166 |             var d = term.__data__;
1167 |             var Term = d.Term;
1168 |             var dat2 = mdsData3.filter(function(d2) {
1169 |                 return d2.Term == Term;
1170 |             });
1171 | 
1172 |             var k = dat2.length; // number of topics for this token with non-zero frequency
1173 | 
1174 |             var radius = [];
1175 |             for (var i = 0; i < K; ++i) {
1176 |                 radius[i] = 0;
1177 |             }
1178 |             for (i = 0; i < k; i++) {
1179 |                 radius[dat2[i].Topic - 1] = dat2[i].Freq;
1180 |             }
1181 | 
1182 |             var size = [];
1183 |             for (var i = 0; i < K; ++i) {
1184 |                 size[i] = 0;
1185 |             }
1186 |             for (i = 0; i < k; i++) {
1187 |                 // If we want to also re-size the topic number labels, do it here
1188 |                 // 11 is the default, so leaving this as 11 won't change anything.
1189 |                 size[dat2[i].Topic - 1] = 11;
1190 |             }
1191 | 
1192 |             var rScaleCond = d3.scaleSqrt()
1193 |                 .domain([0, 1]).range([0, rMax]);
1194 | 
1195 |             // Change size of bubbles according to the word's distribution over topics
1196 |             d3.selectAll(to_select + " .dot")
1197 |                 .data(radius)
1198 |                 .transition()
1199 |                 .attr("r", function(d) {
1200 |                     return (Math.sqrt(d*mdswidth*mdsheight*word_prop/Math.PI));
1201 |                 });
1202 | 
1203 |             // re-bind mdsData so we can handle multiple selection
1204 |             d3.selectAll(to_select + " .dot")
1205 |                 .data(mdsData);
1206 | 
1207 |             // Change sizes of topic numbers:
1208 |             d3.selectAll(to_select + " .txt")
1209 |                 .data(size)
1210 |                 .transition()
1211 |                 .style("font-size", function(d) {
1212 |                     return +d;
1213 |                 });
1214 | 
1215 |             // Alter the guide
1216 |             d3.select(to_select + " .circleGuideTitle")
1217 |                 .text("Conditional topic distribution given term = '" + term.innerHTML + "'");
1218 |         }
1219 | 
1220 |         function term_off(term) {
1221 |             if (term == null) return null;
1222 |             term.style["fontWeight"] = "normal";
1223 | 
1224 |             d3.selectAll(to_select + " .dot")
1225 |                 .data(mdsData)
1226 |                 .transition()
1227 |                 .attr("r", function(d) {
1228 |                     return (Math.sqrt((d.Freq/100)*mdswidth*mdsheight*circle_prop/Math.PI));
1229 |                 });
1230 | 
1231 |             // Change sizes of topic numbers:
1232 |             d3.selectAll(to_select + " .txt")
1233 |                 .transition()
1234 |                 .style("font-size", "11px");
1235 | 
1236 |             // Go back to the default guide
1237 |             d3.select(to_select + " .circleGuideTitle")
1238 |                 .text("Marginal topic distribution");
1239 |             d3.select(to_select + " .circleGuideLabelLarge")
1240 |                 .text(defaultLabelLarge);
1241 |             d3.select(to_select + " .circleGuideLabelSmall")
1242 |                 .attr("y", mdsheight + 2 * newSmall)
1243 |                 .text(defaultLabelSmall);
1244 |             d3.select(to_select + " .circleGuideSmall")
1245 |                 .attr("r", newSmall)
1246 |                 .attr("cy", mdsheight + newSmall);
1247 |             d3.select(to_select + " .lineGuideSmall")
1248 |                 .attr("y1", mdsheight + 2 * newSmall)
1249 |                 .attr("y2", mdsheight + 2 * newSmall);
1250 |         }
1251 | 
1252 | 
1253 |         // serialize the visualization state using fragment identifiers -- http://en.wikipedia.org/wiki/Fragment_identifier
1254 |         // location.hash holds the address information
1255 | 
1256 |         var params = location.hash.split("&");
1257 |         if (params.length > 1) {
1258 |             vis_state.topic = params[0].split("=")[1];
1259 |             vis_state.lambda = params[1].split("=")[1];
1260 |             vis_state.term = params[2].split("=")[1];
1261 | 
1262 |             // Idea: write a function to parse the URL string
1263 |             // only accept values in [0,1] for lambda, {0, 1, ..., K} for topics (any string is OK for term)
1264 |             // Allow for subsets of the three to be entered:
1265 |             // (1) topic only (lambda = 1 term = "")
1266 |             // (2) lambda only (topic = 0 term = "") visually the same but upon hovering a topic, the effect of lambda will be seen
1267 |             // (3) term only (topic = 0 lambda = 1) only fires when the term is among the R most salient
1268 |             // (4) topic + lambda (term = "")
1269 |             // (5) topic + term (lambda = 1)
1270 |             // (6) lambda + term (topic = 0) visually lambda doesn't make a difference unless a topic is hovered
1271 |             // (7) topic + lambda + term
1272 | 
1273 |             // Short-term: assume format of "#topic=k&lambda=l&term=s" where k, l, and s are strings (b/c they're from a URL)
1274 | 
1275 |             // Force k (topic identifier) to be an integer between 0 and K:
1276 |             vis_state.topic = Math.round(Math.min(K, Math.max(0, vis_state.topic)));
1277 | 
1278 |             // Force l (lambda identifier) to be in [0, 1]:
1279 |             vis_state.lambda = Math.min(1, Math.max(0, vis_state.lambda));
1280 | 
1281 |             // impose the value of lambda:
1282 |             document.getElementById(lambdaID).value = vis_state.lambda;
1283 |             document.getElementById(lambdaID + "-value").innerHTML = vis_state.lambda;
1284 | 
1285 |             // select the topic and transition the order of the bars (if appropriate)
1286 |             if (!isNaN(vis_state.topic)) {
1287 |                 document.getElementById(topicID).value = vis_state.topic;
1288 |                 if (vis_state.topic > 0) {
1289 |                     topic_on(document.getElementById(topicID + vis_state.topic));
1290 |                 }
1291 |                 if (vis_state.lambda < 1 && vis_state.topic > 0) {
1292 |                     reorder_bars(false);
1293 |                 }
1294 |             }
1295 |             lambda.current = vis_state.lambda;
1296 |             var termElem = document.getElementById(termID + vis_state.term);
1297 |             if (termElem !== undefined) term_on(termElem);
1298 |         }
1299 | 
1300 |         function state_url() {
1301 |             return location.origin + location.pathname + "#topic=" + vis_state.topic +
1302 |                 "&lambda=" + vis_state.lambda + "&term=" + vis_state.term;
1303 |         }
1304 | 
1305 |         function state_save(replace) {
1306 |             if (replace)
1307 |                 history.replaceState(vis_state, "Query", state_url());
1308 |             else
1309 |                 history.pushState(vis_state, "Query", state_url());
1310 |         }
1311 | 
1312 |         function state_reset() {
1313 |             if (vis_state.topic > 0) {
1314 |                 topic_off(document.getElementById(topicID + vis_state.topic));
1315 |             }
1316 |             if (vis_state.term != "") {
1317 |                 term_off(document.getElementById(termID + vis_state.term));
1318 |             }
1319 |             vis_state.term = "";
1320 |             document.getElementById(topicID).value = vis_state.topic = 0;
1321 |             state_save(true);
1322 |         }
1323 | 
1324 |     }
1325 | 
1326 |     if (typeof data_or_file_name === 'string')
1327 |         d3.json(data_or_file_name, function(error, data) {visualize(data);});
1328 |     else
1329 |         visualize(data_or_file_name);
1330 | };
1331 | 


--------------------------------------------------------------------------------
/pyLDAvis/js/ldavis.v1.0.0.css:
--------------------------------------------------------------------------------
 1 | /* Taken from https://github.com/cpsievert/LDAvis */
 2 | /* Copyright 2013, AT&T Intellectual Property */
 3 | /* MIT Licence */
 4 | 
 5 | .slideraxis path {
 6 |   fill: none;
 7 |   stroke: none;
 8 | }
 9 | 
10 | .xaxis .tick.major {
11 |     fill: black;
12 |     stroke: black;
13 |     stroke-width: 0.1;
14 |     opacity: 0.7;
15 | }
16 | 
17 | .slideraxis {
18 |     fill: black;
19 |     stroke: black;
20 |     stroke-width: 0.4;
21 |     opacity: 1;
22 | }
23 | 
24 | text {
25 |     font-family: sans-serif;
26 |     font-size: 11px;
27 | }
28 | 
29 | /* IPython Notebook CSS to allow visualization to fit */
30 | /* I'm open to a better way of accomplishing this goal... */
31 | .container { width:1350px !important; }
32 | /* This is for nbviewer's benefit since the above wasn't enough... */
33 | .output_area { width:1450px !important; }
34 | 


--------------------------------------------------------------------------------
/pyLDAvis/lda_model.py:
--------------------------------------------------------------------------------
  1 | """
  2 | pyLDAvis lda_model
  3 | ===============
  4 | Helper functions to visualize sklearn's LatentDirichletAllocation models
  5 | """
  6 | 
  7 | import funcy as fp
  8 | import pyLDAvis
  9 | import numpy as np
 10 | 
 11 | 
 12 | def _get_doc_lengths(dtm):
 13 |     if isinstance(dtm, np.ndarray):
 14 |         return dtm.sum(axis=1).ravel()
 15 |     if isinstance(dtm, np.matrix):
 16 |         return dtm.sum(axis=1).getA1()
 17 |     raise TypeError(str(type(dtm)))
 18 | 
 19 | 
 20 | def _get_term_freqs(dtm):
 21 |     if isinstance(dtm, np.ndarray):
 22 |         return dtm.sum(axis=0).ravel()
 23 |     if isinstance(dtm, np.matrix):
 24 |         return dtm.sum(axis=0).getA1()
 25 |     raise TypeError(str(type(dtm)))
 26 | 
 27 | 
 28 | def _get_vocab(vectorizer):
 29 |     return vectorizer.get_feature_names_out()
 30 | 
 31 | 
 32 | def _row_norm(dists):
 33 |     # row normalization function required
 34 |     # for doc_topic_dists and topic_term_dists
 35 |     return dists / dists.sum(axis=1)[:, None]
 36 | 
 37 | 
 38 | def _get_doc_topic_dists(lda_model, dtm):
 39 |     return _row_norm(lda_model.transform(dtm))
 40 | 
 41 | 
 42 | def _get_topic_term_dists(lda_model):
 43 |     return _row_norm(lda_model.components_)
 44 | 
 45 | 
 46 | def _extract_data(lda_model, dtm, vectorizer):
 47 |     vocab = _get_vocab(vectorizer)
 48 |     doc_lengths = _get_doc_lengths(dtm)
 49 |     term_freqs = _get_term_freqs(dtm)
 50 |     topic_term_dists = _get_topic_term_dists(lda_model)
 51 |     err_msg = ('Topic-term distributions and document-term matrix'
 52 |                'have different number of columns, {} != {}.')
 53 | 
 54 |     assert term_freqs.shape[0] == len(vocab), \
 55 |         ('Term frequencies and vocabulary are of different sizes, {} != {}.'
 56 |          .format(term_freqs.shape[0], len(vocab)))
 57 | 
 58 |     assert topic_term_dists.shape[1] == dtm.shape[1], \
 59 |         (err_msg.format(topic_term_dists.shape[1], len(vocab)))
 60 | 
 61 |     # column dimensions of document-term matrix and topic-term distributions
 62 |     # must match first before transforming to document-topic distributions
 63 |     doc_topic_dists = _get_doc_topic_dists(lda_model, dtm)
 64 |     return {'vocab': vocab,
 65 |             'doc_lengths': doc_lengths.tolist(),
 66 |             'term_frequency': term_freqs.tolist(),
 67 |             'doc_topic_dists': doc_topic_dists.tolist(),
 68 |             'topic_term_dists': topic_term_dists.tolist()}
 69 | 
 70 | 
 71 | def prepare(lda_model, dtm, vectorizer, **kwargs):
 72 |     """Create Prepared Data from sklearn's LatentDirichletAllocation and CountVectorizer.
 73 | 
 74 |     Parameters
 75 |     ----------
 76 |     lda_model : sklearn.decomposition.LatentDirichletAllocation.
 77 |         Latent Dirichlet Allocation model from sklearn fitted with `dtm`
 78 | 
 79 |     dtm : array-like or sparse matrix, shape=(n_samples, n_features)
 80 |         Document-term matrix used to fit on LatentDirichletAllocation model (`lda_model`)
 81 | 
 82 |     vectorizer : sklearn.feature_extraction.text.(CountVectorizer, TfIdfVectorizer).
 83 |         vectorizer used to convert raw documents to document-term matrix (`dtm`)
 84 | 
 85 |     **kwargs: Keyword argument to be passed to pyLDAvis.prepare()
 86 | 
 87 | 
 88 |     Returns
 89 |     -------
 90 |     prepared_data : PreparedData
 91 |           the data structures used in the visualization
 92 | 
 93 | 
 94 |     Example
 95 |     --------
 96 |     For example usage please see this notebook:
 97 |     http://nbviewer.ipython.org/github/bmabey/pyLDAvis/blob/master/notebooks/LDA%20model.ipynb
 98 | 
 99 |     See
100 |     ------
101 |     See `pyLDAvis.prepare` for **kwargs.
102 |     """
103 |     opts = fp.merge(_extract_data(lda_model, dtm, vectorizer), kwargs)
104 |     return pyLDAvis.prepare(**opts)
105 | 


--------------------------------------------------------------------------------
/pyLDAvis/urls.py:
--------------------------------------------------------------------------------
 1 | """
 2 | LDAvis URLs
 3 | ==========
 4 | URLs and filepaths for the LDAvis javascript libraries
 5 | """
 6 | 
 7 | import os
 8 | from . import __path__, __version__
 9 | 
10 | __all__ = ["D3_URL", "LDAVIS_URL", "LDAVIS_CSS_URL",
11 |            "D3_LOCAL", "LDAVIS_LOCAL", "LDAVIS_CSS_LOCAL"]
12 | 
13 | D3_URL = "https://d3js.org/d3.v5.js"
14 | 
15 | DEV = 'git' in __version__
16 | LOCAL_JS_DIR = os.path.join(__path__[0], "js")
17 | D3_LOCAL = os.path.join(LOCAL_JS_DIR, "d3.v5.min.js")
18 | 
19 | # Avoid browser caching with @version in the URL.
20 | WWW_JS_DIR = "https://cdn.jsdelivr.net/gh/bmabey/pyLDAvis@{0}/pyLDAvis/js/".format(__version__)
21 | 
22 | JS_VERSION = '1.0.0'
23 | if not DEV and int(__version__[0]) >= 3:
24 |     JS_VERSION = '3.0.0'
25 | CSS_VERSION = '1.0.0'
26 | 
27 | LDAVIS_URL = WWW_JS_DIR + "ldavis.v{0}.js".format(JS_VERSION)
28 | LDAVIS_CSS_URL = WWW_JS_DIR + "ldavis.v{0}.css".format(CSS_VERSION)
29 | 
30 | LDAVIS_LOCAL = os.path.join(LOCAL_JS_DIR, "ldavis.v{0}.js".format(JS_VERSION))
31 | LDAVIS_CSS_LOCAL = os.path.join(LOCAL_JS_DIR, "ldavis.v{0}.css".format(CSS_VERSION))
32 | 
33 | if DEV:
34 |     LDAVIS_URL = WWW_JS_DIR + "ldavis.js"
35 |     LDAVIS_CSS_URL = WWW_JS_DIR + "ldavis.css"
36 | 
37 |     LDAVIS_LOCAL = os.path.join(LOCAL_JS_DIR, "ldavis.js")
38 |     LDAVIS_CSS_LOCAL = os.path.join(LOCAL_JS_DIR, "ldavis.css")
39 | 


--------------------------------------------------------------------------------
/pyLDAvis/utils.py:
--------------------------------------------------------------------------------
  1 | """
  2 | pyLDAvis Utilities
  3 | ===============
  4 | Utility routines for the pyLDAvis package
  5 | """
  6 | 
  7 | import json
  8 | import os
  9 | import re
 10 | import shutil
 11 | import warnings
 12 | import numpy as np
 13 | import pyLDAvis.urls
 14 | 
 15 | # Make sure that DeprecationWarning gets printed
 16 | warnings.simplefilter("always", DeprecationWarning)
 17 | 
 18 | 
 19 | def html_id_ok(objid, html5=False):
 20 |     """Check whether objid is valid as an HTML id attribute.
 21 | 
 22 |     If html5 == True, then use the more liberal html5 rules.
 23 |     """
 24 |     if html5:
 25 |         return not re.search(r'\s', objid)
 26 |     else:
 27 |         return bool(re.match(r"^[a-zA-Z][a-zA-Z0-9\-\.\:\_]*$", objid))
 28 | 
 29 | 
 30 | def get_id(obj, suffix="", prefix="el", warn_on_invalid=True):
 31 |     """Get a unique id for the object"""
 32 |     if not suffix:
 33 |         suffix = ""
 34 |     if not prefix:
 35 |         prefix = ""
 36 | 
 37 |     objid = prefix + str(os.getpid()) + str(id(obj)) + suffix
 38 | 
 39 |     if warn_on_invalid and not html_id_ok(objid):
 40 |         warnings.warn('"{0}" is not a valid html ID. This may cause problems')
 41 | 
 42 |     return objid
 43 | 
 44 | 
 45 | def write_ipynb_local_js(location=None, d3_src=None, ldavis_src=None, ldavis_css=None):
 46 |     """
 47 |     Write the pyLDAvis and d3 javascript libraries to the given file location.
 48 | 
 49 |     This utility is used by the IPython notebook tools to enable easy use
 50 |     of pyLDAvis with no web connection.
 51 | 
 52 |     Parameters
 53 |     ----------
 54 |     location : string (optioal)
 55 |         the directory in which the d3 and pyLDAvis javascript libraries will be
 56 |         written. If not specified, the IPython nbextensions directory will be
 57 |         used. If IPython doesn't support nbextensions (< 2.0),
 58 |         the current working directory will be used.
 59 |     d3_src : string (optional)
 60 |         the source location of the d3 library. If not specified, the standard
 61 |         path in pyLDAvis.urls.D3_LOCAL will be used.
 62 |     ldavis_src : string (optional)
 63 |         the source location of the pyLDAvis library. If not specified, the
 64 |         standard path in pyLDAvis.urls.LDAVIS_LOCAL will be used.
 65 | 
 66 |     Returns
 67 |     -------
 68 |     d3_url, ldavis_url : string
 69 |         The URLs to be used for loading these js files.
 70 |     """
 71 |     nbextension = False
 72 |     if location is None:
 73 |         try:
 74 |             # Later IPython versions
 75 |             from notebook.nbextensions import install_nbextension
 76 |             nbextension = True
 77 |         except ImportError:
 78 |             try:
 79 |                 # Older IPython versions
 80 |                 from IPython.html import install_nbextension
 81 |                 nbextension = True
 82 |             except ImportError:
 83 |                 location = os.getcwd()
 84 | 
 85 |     if d3_src is None:
 86 |         d3_src = pyLDAvis.urls.D3_LOCAL
 87 |     if ldavis_src is None:
 88 |         ldavis_src = pyLDAvis.urls.LDAVIS_LOCAL
 89 |     if ldavis_css is None:
 90 |         ldavis_css = pyLDAvis.urls.LDAVIS_CSS_LOCAL
 91 | 
 92 |     d3js = os.path.basename(d3_src)
 93 |     ldavisjs = os.path.basename(ldavis_src)
 94 |     ldaviscss = os.path.basename(ldavis_css)
 95 | 
 96 |     if not os.path.exists(d3_src):
 97 |         raise ValueError("d3 src not found at '{0}'".format(d3_src))
 98 |     if not os.path.exists(ldavis_src):
 99 |         raise ValueError("pyLDAvis src not found at '{0}'".format(ldavis_src))
100 |     if not os.path.exists(ldavis_css):
101 |         raise ValueError("pyLDAvis src not found at '{0}'".format(ldavis_css))
102 | 
103 |     if nbextension:
104 |         # IPython 2.0+.
105 |         # This will not work if a url prefix is added
106 |         prefix = '/nbextensions/'
107 | 
108 |         try:
109 |             [install_nbextension(ext) for ext in [d3_src, ldavis_src, ldavis_css]]
110 |         except IOError:
111 |             # files may be read only. We'll try deleting them and re-installing
112 |             from IPython.utils.path import get_ipython_dir
113 |             nbext = os.path.join(get_ipython_dir(), "nbextensions")
114 | 
115 |             for src in [d3_src, ldavis_src]:
116 |                 dest = os.path.join(nbext, os.path.basename(src))
117 |                 if os.path.exists(dest):
118 |                     os.remove(dest)
119 |             [install_nbextension(ext) for ext in [d3_src, ldavis_src, ldavis_css]]
120 | 
121 |     else:
122 |         # IPython < 2.0 or explicit path.
123 |         # This won't work if users have changed the kernel directory.
124 |         prefix = '/files/'
125 | 
126 |         d3_dest = os.path.join(location, d3js)
127 |         ldavis_dest = os.path.join(location, ldavisjs)
128 |         ldavis_css_dest = os.path.join(location, ldaviscss)
129 | 
130 |         for src, dest in [(d3_src, d3_dest),
131 |                           (ldavis_src, ldavis_dest),
132 |                           (ldavis_css, ldavis_css_dest)]:
133 |             try:
134 |                 shutil.copyfile(src, dest)
135 |             except IOError:
136 |                 # file may be read only. We'll try deleting it first
137 |                 if os.path.exists(dest):
138 |                     os.remove(dest)
139 |                 shutil.copyfile(src, dest)
140 | 
141 |     return prefix + d3js, prefix + ldavisjs, prefix + ldaviscss
142 | 
143 | 
144 | class NumPyEncoder(json.JSONEncoder):
145 |     def default(self, obj):
146 |         if isinstance(obj, np.int64) or isinstance(obj, np.int32):
147 |             return int(obj)
148 |         if isinstance(obj, np.float64) or isinstance(obj, np.float32):
149 |             return float(obj)
150 |         return json.JSONEncoder.default(self, obj)
151 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
1 | [build-system]
2 | requires = ["setuptools"]
3 | build-backend = "setuptools.build_meta"
4 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | numpy
 2 | scipy
 3 | pandas>=2.0.0
 4 | joblib>=1.2.0
 5 | jinja2
 6 | numexpr
 7 | funcy
 8 | scikit-learn>=1.0.0
 9 | gensim
10 | setuptools
11 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | from setuptools import setup
 4 | 
 5 | with open('README.rst') as readme_file:
 6 |     readme = readme_file.read()
 7 | 
 8 | with open('HISTORY.rst') as history_file:
 9 |     history = history_file.read().replace('.. :changelog:', '')
10 | 
11 | on_rtd = os.environ.get('READTHEDOCS', None) == 'True'
12 | if on_rtd:
13 |     print('Being built on ReadTheDocs so we are avoiding pulling in scikit-bio since it imports numpy...')
14 |     requirements = []
15 | else:
16 |     with open('requirements.txt') as f:
17 |         requirements = f.read().splitlines()
18 | 
19 | setup(
20 |     name='pyLDAvis',
21 |     version='3.4.1',
22 |     description='Interactive topic model visualization. Port of the R package.',
23 |     long_description_content_type="text/x-rst",
24 |     long_description=readme,
25 |     author='Ben Mabey',
26 |     author_email='ben@benmabey.com',
27 |     url='https://github.com/bmabey/pyLDAvis',
28 |     download_url='https://github.com/bmabey/pyLDAvis/tarball/3.4.1',
29 |     packages=['pyLDAvis'],
30 |     package_dir={'pyLDAvis': 'pyLDAvis'},
31 |     tests_require=['pytest'],
32 |     python_requires=">=3.9",
33 |     include_package_data=True,
34 |     install_requires=requirements,
35 |     license='BSD-3-Clause',
36 |     zip_safe=False,
37 |     keywords=['data science', 'visualization'],
38 |     classifiers=[
39 |         'Development Status :: 5 - Production/Stable',
40 |         'Intended Audience :: Developers',
41 |         'Intended Audience :: Science/Research',
42 |         'License :: OSI Approved :: BSD License',
43 |         'Natural Language :: English',
44 |         'Programming Language :: Python :: 3',
45 |         'Programming Language :: Python :: 3.9',
46 |         'Programming Language :: Python :: 3.10',
47 |         'Programming Language :: Python :: 3.11',
48 |     ]
49 | )


--------------------------------------------------------------------------------
/tests/data/.gitattributes:
--------------------------------------------------------------------------------
1 | movie_reviews_input.json filter=lfs diff=lfs merge=lfs -crlf
2 | movie_reviews_output.json filter=lfs diff=lfs merge=lfs -crlf
3 | 


--------------------------------------------------------------------------------
/tests/data/export_data.R:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/Rscript
 2 | 
 3 | ensure.packages <- function(packages) {
 4 |   packages.not.installed <- Filter(function(p) !(p %in% installed.packages()), packages)
 5 |   if(length(packages.not.installed) > 0) {
 6 |     install.packages(packages.not.installed, dep = T)
 7 |   }}
 8 | 
 9 | ensure.packages(c('LDAvis', 'LDAvisData', 'jsonlite'))
10 | 
11 | library(LDAvis)
12 | library(LDAvisData)
13 | # RJSONIO did not roundtrip cleanly so it was annoying to use
14 | library(jsonlite)
15 | 
16 | export <- function(data, name, out.dir='.') {
17 |     input.name <- paste0(name, "_input.json")
18 |     if(!file.exists(input.name))
19 |     {
20 |         cat(paste0('Exporting ', name, '...\n'))
21 |         input <- jsonlite::toJSON(data, digits=50)
22 |         cat(input, file = file.path(out.dir, input.name))
23 |     }
24 | 
25 |     output.name <- paste0(name, "_output.json")
26 |     if(!file.exists(output.name))
27 |     {
28 |         # roundtrip the JSON so both libraries are using the same precision
29 |         data <- jsonlite::fromJSON(input)
30 |         output <- createJSON(data$phi, data$theta, data$doc.length, data$vocab, data$term.frequency)
31 |         cat(output, file = file.path(out.dir, output.name))
32 |         cat(paste0(input.name, ' and ', output.name, ' have been written.\n'))
33 |     }
34 | }
35 | 
36 | 
37 | export(AP, 'ap')
38 | export(Jeopardy, 'jeopardy')
39 | export(MovieReviews, 'movie_reviews')
40 | 


--------------------------------------------------------------------------------
/tests/data/movie_reviews_input.json:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:0f8753ac5b6e89031fc56623e9f71a61ebaca0e3382956944ad05c0844580298
3 | size 7087084
4 | 


--------------------------------------------------------------------------------
/tests/data/movie_reviews_output.json:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:777815470c09a2d852ee027047a9c0fff1d3683498e20c3c60b7dc10ef51cf8f
3 | size 159501
4 | 


--------------------------------------------------------------------------------
/tests/pyLDAvis/test_gensim_models.py:
--------------------------------------------------------------------------------
 1 | #! /usr/bin/venv python3
 2 | 
 3 | import os
 4 | 
 5 | from gensim.models import LdaModel, HdpModel
 6 | from gensim.corpora.dictionary import Dictionary
 7 | 
 8 | import pyLDAvis
 9 | import pyLDAvis.gensim_models as gensim_models
10 | 
11 | 
12 | def get_corpus_dictionary():
13 |     """Crafts a toy corpus and the dictionary associated."""
14 |     corpus = [
15 |         ['carrot', 'salad', 'tomato'],
16 |         ['carrot', 'salad', 'dish'],
17 |         ['tomato', 'dish'],
18 |         ['tomato', 'salad'],
19 | 
20 |         ['car', 'break', 'highway'],
21 |         ['highway', 'accident', 'car'],
22 |         ['moto', 'break'],
23 |         ['accident', 'moto', 'car']
24 |     ]
25 |     dictionary = Dictionary(corpus)
26 | 
27 |     # Transforming corpus with dictionary.
28 |     corpus = [dictionary.doc2bow(doc) for doc in corpus]
29 | 
30 |     # Building reverse index.
31 |     for (token, uid) in dictionary.token2id.items():
32 |         dictionary.id2token[uid] = token
33 | 
34 |     return corpus, dictionary
35 | 
36 | 
37 | def test_lda():
38 |     """Trains a LDA model and tests the html outputs."""
39 |     corpus, dictionary = get_corpus_dictionary()
40 |     lda = LdaModel(corpus=corpus, num_topics=2)
41 | 
42 |     data = gensim_models.prepare(lda, corpus, dictionary)
43 |     pyLDAvis.save_html(data, 'index_lda.html')
44 |     os.remove('index_lda.html')
45 | 
46 | 
47 | def test_hdp():
48 |     """Trains a HDP model and tests the html outputs."""
49 |     corpus, dictionary = get_corpus_dictionary()
50 |     hdp = HdpModel(corpus, dictionary.id2token)
51 | 
52 |     data = gensim_models.prepare(hdp, corpus, dictionary)
53 |     pyLDAvis.save_html(data, 'index_hdp.html')
54 |     os.remove('index_hdp.html')
55 | 
56 | 
57 | def test_sorted_terms():
58 |     """This tests that we can get the terms of a given topic using lambda
59 |     to calculate the relevance ranking. A common workflow is that once we
60 |     visualize the topics we modify the lambda slide and we are interested
61 |     in a particular lambda value, then with this function we can get the
62 |     terms in that order.
63 |     """
64 |     corpus, dictionary = get_corpus_dictionary()
65 |     lda = LdaModel(corpus=corpus, num_topics=2)
66 | 
67 |     data = gensim_models.prepare(lda, corpus, dictionary)
68 |     # https://nlp.stanford.edu/events/illvi2014/papers/sievert-illvi2014.pdf
69 |     # lambda = 0 should rank the terms by loglift
70 |     # lambda = 1 should rank them by logprob.
71 |     sorted_terms = data.sorted_terms(topic=1, _lambda=1).to_dict()
72 |     assert (sorted_terms['logprob'] == sorted_terms['relevance'])
73 |     sorted_terms = data.sorted_terms(topic=1, _lambda=0).to_dict()
74 |     assert (sorted_terms['loglift'] == sorted_terms['relevance'])
75 | 
76 | 
77 | if __name__ == "__main__":
78 |     test_lda()
79 |     test_hdp()
80 |     test_sorted_terms()
81 | 


--------------------------------------------------------------------------------
/tests/pyLDAvis/test_prepare.py:
--------------------------------------------------------------------------------
 1 | #! /usr/bin/venv python3
 2 | 
 3 | import json
 4 | import os.path as path
 5 | import funcy as fp
 6 | from numpy.testing import assert_array_equal
 7 | import numpy as np
 8 | import pandas as pd
 9 | from pandas.testing import assert_frame_equal
10 | 
11 | from pyLDAvis import prepare
12 | 
13 | roundtrip = fp.compose(json.loads, lambda d: d.to_json(), prepare)
14 | 
15 | DATA_DIR = path.join(path.dirname(path.realpath(__file__)), "../data/")
16 | 
17 | 
18 | def load_dataset(name):
19 |     with open(path.join(DATA_DIR, '%s_input.json' % name), 'r') as j:
20 |         data_input = json.load(j)
21 | 
22 |     with open(path.join(DATA_DIR, '%s_output.json' % name), 'r') as j:
23 |         expected = json.load(j)
24 | 
25 |     return data_input, expected
26 | 
27 | 
28 | def remove_col_suffixes(df):
29 |     df.columns = [w.split('_')[0] for w in df.columns]
30 |     return df
31 | 
32 | 
33 | def test_end_to_end_with_R_examples():
34 |     data_input, expected = load_dataset('movie_reviews')
35 |     output = roundtrip(topic_term_dists=data_input['phi'],
36 |                        doc_topic_dists=data_input['theta'],
37 |                        doc_lengths=data_input['doc.length'],
38 |                        vocab=data_input['vocab'],
39 |                        term_frequency=data_input['term.frequency'], R=30, lambda_step=0.01)
40 | 
41 |     assert_array_equal(np.array(expected['topic.order']), np.array(output['topic.order']))
42 | 
43 |     def both(f):
44 |         return f(expected), f(output)
45 | 
46 |     assert set(expected['tinfo']['Category']) == set(output['tinfo']['Category'])
47 |     etinfo, otinfo = both(lambda d: pd.DataFrame(d['tinfo']))
48 | 
49 |     eddf = etinfo.query('Category == "Default"')
50 |     eddf = eddf.reindex(sorted(eddf.columns), axis=1)
51 | 
52 |     oddf = otinfo.query('Category == "Default"')
53 |     oddf = oddf.reindex(sorted(oddf.columns), axis=1)
54 |     assert_frame_equal(eddf, oddf)
55 | 
56 |     joined = pd.merge(otinfo, etinfo, how='inner', on=['Term', 'Category'], suffixes=['_o', '_e'])
57 |     ejoined = remove_col_suffixes(joined[['Term', 'Category', 'Freq_e',
58 |                                           'Total_e', 'loglift_e', 'logprob_e']])
59 |     ojoined = remove_col_suffixes(joined[['Term', 'Category', 'Freq_o', 'Total_o',
60 |                                           'loglift_o', 'logprob_o']])
61 | 
62 |     join_percent = float(len(joined)) / len(etinfo)
63 |     print('Topic Info join was %.0f%%' % (100 * join_percent))
64 |     assert_frame_equal(ejoined, ojoined, check_exact=False, rtol=0.1)
65 |     assert join_percent > 0.95
66 | 
67 |     def abs_basis(df):
68 |         df.x = df.x.abs()
69 |         df.y = df.y.abs()
70 |         return df
71 | 
72 |     emds, omds = both(lambda r: abs_basis(pd.DataFrame(r['mdsDat'])))
73 |     assert_frame_equal(emds.reindex(sorted(oddf.columns), axis=1),
74 |                        omds.reindex(sorted(oddf.columns), axis=1), check_exact=False, rtol=0.1)
75 | 
76 |     def rounded_token_table(r):
77 |         tt = pd.DataFrame(r['token.table'])
78 |         tt.Freq = tt.Freq.round(5)
79 |         return tt
80 |     ett, ott = both(rounded_token_table)
81 |     joined = pd.DataFrame(pd.merge(ott, ett, on=['Freq', 'Term'],
82 |                           suffixes=['_o', '_e'], how='inner')
83 |                           .groupby('Topic_o')['Topic_e'].value_counts())
84 |     joined.columns = ['count']
85 |     most_likely_map = joined.query('count > 100')
86 |     most_likely_map.index.names = ['Topic_o', 'Topic_e']
87 |     df = pd.DataFrame(most_likely_map).reset_index()
88 |     assert_array_equal(df['Topic_o'].values, df['Topic_e'].values)
89 | 


--------------------------------------------------------------------------------
/tox.ini:
--------------------------------------------------------------------------------
 1 | [tox]
 2 | envlist = py39, py310, py311
 3 | 
 4 | [testenv]
 5 | install_command = pip3 install {opts} {packages}
 6 | whitelist_externals = sh, pytest
 7 | setenv = PYTHONPATH = {toxinidir}:{toxinidir}/pyLDAvis
 8 | commands = pytest {posargs} # substitute with tox' positional arguments
 9 | deps =
10 |     pytest
11 |     -r{toxinidir}/requirements.txt


--------------------------------------------------------------------------------