├── docs
├── _static
│ └── .gitignore
├── _templates
│ └── .gitignore
├── readme.rst
├── index.rst
├── Makefile
├── make.bat
└── conf.py
├── newsfragments
└── .gitignore
├── .gitattributes
├── streamlit_topic_modeling
├── tests
│ ├── test_app.py
│ └── __init__.py
├── __init__.py
└── app.py
├── Makefile
├── towncrier.toml
├── data
├── mf.png
├── Inkfree.ttf
├── favicon.png
├── Tweets.csv.zip
├── elonmusk.csv.zip
└── is-this-a-topic-modeling.jpg
├── setup.py
├── MANIFEST.in
├── pytest.ini
├── pyproject.toml
├── .idea
├── vcs.xml
├── other.xml
├── .gitignore
├── inspectionProfiles
│ └── profiles_settings.xml
├── misc.xml
├── modules.xml
└── streamlit_topic_modeling.iml
├── Dockerfile
├── README.rst
├── tox.ini
├── .devcontainer
└── devcontainer.json
├── requirements.txt
├── setup.cfg
├── .gitignore
└── .editorconfig
/docs/_static/.gitignore:
--------------------------------------------------------------------------------
1 | !.gitignore
--------------------------------------------------------------------------------
/docs/_templates/.gitignore:
--------------------------------------------------------------------------------
1 | !.gitignore
--------------------------------------------------------------------------------
/newsfragments/.gitignore:
--------------------------------------------------------------------------------
1 | !.gitignore
--------------------------------------------------------------------------------
/.gitattributes:
--------------------------------------------------------------------------------
1 | *.png binary
2 | *.zip binary
--------------------------------------------------------------------------------
/docs/readme.rst:
--------------------------------------------------------------------------------
1 | .. include:: ../README.rst
--------------------------------------------------------------------------------
/streamlit_topic_modeling/tests/test_app.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
1 | sphinx-apidoc:
2 | sphinx-apidoc -f -o docs . setup.py
--------------------------------------------------------------------------------
/towncrier.toml:
--------------------------------------------------------------------------------
1 | [tool.towncrier]
2 | directory = "newsfragments"
--------------------------------------------------------------------------------
/data/mf.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bpw1621/streamlit-topic-modeling/HEAD/data/mf.png
--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | from setuptools import setup
3 |
4 | setup(setup_cfg=True)
5 |
--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include README.md
2 | include pyproject.toml
3 | recursive-include data *.png *.zip *.ttf
--------------------------------------------------------------------------------
/data/Inkfree.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bpw1621/streamlit-topic-modeling/HEAD/data/Inkfree.ttf
--------------------------------------------------------------------------------
/data/favicon.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bpw1621/streamlit-topic-modeling/HEAD/data/favicon.png
--------------------------------------------------------------------------------
/streamlit_topic_modeling/tests/__init__.py:
--------------------------------------------------------------------------------
1 | """Tests for the streamlit_topic_modeling package."""
2 |
--------------------------------------------------------------------------------
/data/Tweets.csv.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bpw1621/streamlit-topic-modeling/HEAD/data/Tweets.csv.zip
--------------------------------------------------------------------------------
/data/elonmusk.csv.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bpw1621/streamlit-topic-modeling/HEAD/data/elonmusk.csv.zip
--------------------------------------------------------------------------------
/pytest.ini:
--------------------------------------------------------------------------------
1 | [pytest]
2 | minversion = 6.0
3 | addopts = -ra -q
4 | testpaths =
5 | streamlit_topic_modeling/tests
--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
1 | [build-system]
2 | requires = ["setuptools >= 51.1.1", "wheel"]
3 | build-backend = "setuptools.build_meta"
--------------------------------------------------------------------------------
/data/is-this-a-topic-modeling.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bpw1621/streamlit-topic-modeling/HEAD/data/is-this-a-topic-modeling.jpg
--------------------------------------------------------------------------------
/.idea/vcs.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
--------------------------------------------------------------------------------
/.idea/other.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
--------------------------------------------------------------------------------
/.idea/.gitignore:
--------------------------------------------------------------------------------
1 | # Default ignored files
2 | /shelf/
3 | /workspace.xml
4 | # Editor-based HTTP Client requests
5 | /httpRequests/
6 | # Datasource local storage ignored files
7 | /dataSources/
8 | /dataSources.local.xml
9 |
--------------------------------------------------------------------------------
/.idea/inspectionProfiles/profiles_settings.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM python:3.8
2 | LABEL maintainer="Bryan Patrick Wood "
3 |
4 | WORKDIR /usr/src/app
5 | COPY .. .
6 | RUN pip install -U pip && pip install --no-cache-dir -e .
7 | EXPOSE 8501
8 | ENTRYPOINT streamlit run ./streamlit_topic_modeling/app.py
--------------------------------------------------------------------------------
/README.rst:
--------------------------------------------------------------------------------
1 | README
2 | ******
3 |
4 | A topic modeling GUI application using Streamlit deployed on Streamlit Sharing `here `_.
5 |
6 | .. image:: ./data/is-this-a-topic-modeling.jpg
7 |
--------------------------------------------------------------------------------
/streamlit_topic_modeling/__init__.py:
--------------------------------------------------------------------------------
1 | """Top-level streamlit_topic_modeling package."""
2 |
3 | import logging
4 | from logging import NullHandler
5 |
6 | __author__ = 'Bryan Patrick Wood'
7 | __email__ = 'bpw1621@gmail.com'
8 | __version__ = '0.0a0'
9 |
10 | logging.getLogger(__name__).addHandler(NullHandler())
11 |
--------------------------------------------------------------------------------
/.idea/misc.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
--------------------------------------------------------------------------------
/.idea/modules.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
--------------------------------------------------------------------------------
/tox.ini:
--------------------------------------------------------------------------------
1 | # tox (https://tox.readthedocs.io/) is a tool for running tests
2 | # in multiple virtualenvs. This configuration file will run the
3 | # test suite on all supported python versions. To use it, "pip install tox"
4 | # and then run "tox" from this directory.
5 |
6 | [tox]
7 | envlist = py36, py37, py38, py39
8 |
9 | [testenv]
10 | deps =
11 | pytest
12 | pytest-cov
13 | pytest-mock
14 | commands =
15 | pytest
16 |
--------------------------------------------------------------------------------
/.idea/streamlit_topic_modeling.iml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
--------------------------------------------------------------------------------
/docs/index.rst:
--------------------------------------------------------------------------------
1 | .. Streamlit Topic Modeling documentation master file, created by
2 | sphinx-quickstart on Sat Jan 9 11:24:07 2021.
3 | You can adapt this file completely to your liking, but it should at least
4 | contain the root `toctree` directive.
5 |
6 | Welcome to Streamlit Topic Modeling's documentation!
7 | =========================================================
8 |
9 | .. toctree::
10 | :maxdepth: 2
11 | :caption: Contents:
12 |
13 | readme
14 |
15 |
16 |
17 | Indices and tables
18 | ==================
19 |
20 | * :ref:`genindex`
21 | * :ref:`modindex`
22 | * :ref:`search`
23 |
--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
1 | # Minimal makefile for Sphinx documentation
2 | #
3 |
4 | # You can set these variables from the command line, and also
5 | # from the environment for the first two.
6 | SPHINXOPTS ?=
7 | SPHINXBUILD ?= sphinx-build
8 | SOURCEDIR = .
9 | BUILDDIR = _build
10 |
11 | # Put it first so that "make" without argument is like "make help".
12 | help:
13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
14 |
15 | .PHONY: help Makefile
16 |
17 | # Catch-all target: route all unknown targets to Sphinx using the new
18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS).
19 | %: Makefile
20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
21 |
--------------------------------------------------------------------------------
/docs/make.bat:
--------------------------------------------------------------------------------
1 | @ECHO OFF
2 |
3 | pushd %~dp0
4 |
5 | REM Command file for Sphinx documentation
6 |
7 | if "%SPHINXBUILD%" == "" (
8 | set SPHINXBUILD=sphinx-build
9 | )
10 | set SOURCEDIR=.
11 | set BUILDDIR=_build
12 |
13 | if "%1" == "" goto help
14 |
15 | %SPHINXBUILD% >NUL 2>NUL
16 | if errorlevel 9009 (
17 | echo.
18 | echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
19 | echo.installed, then set the SPHINXBUILD environment variable to point
20 | echo.to the full path of the 'sphinx-build' executable. Alternatively you
21 | echo.may add the Sphinx directory to PATH.
22 | echo.
23 | echo.If you don't have Sphinx installed, grab it from
24 | echo.http://sphinx-doc.org/
25 | exit /b 1
26 | )
27 |
28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
29 | goto end
30 |
31 | :help
32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
33 |
34 | :end
35 | popd
36 |
--------------------------------------------------------------------------------
/.devcontainer/devcontainer.json:
--------------------------------------------------------------------------------
1 | {
2 | "name": "Python 3",
3 | // Or use a Dockerfile or Docker Compose file. More info: https://containers.dev/guide/dockerfile
4 | "image": "mcr.microsoft.com/devcontainers/python:1-3.11-bullseye",
5 | "customizations": {
6 | "codespaces": {
7 | "openFiles": [
8 | "README.md",
9 | "streamlit_topic_modeling/app.py"
10 | ]
11 | },
12 | "vscode": {
13 | "settings": {},
14 | "extensions": [
15 | "ms-python.python",
16 | "ms-python.vscode-pylance"
17 | ]
18 | }
19 | },
20 | "updateContentCommand": "[ -f packages.txt ] && sudo apt update && sudo apt upgrade -y && sudo xargs apt install -y 186 else 'white'
220 |
221 |
222 | def perplexity_section():
223 | with st.spinner('Calculating Perplexity ...'):
224 | perplexity = calculate_perplexity(st.session_state.model, st.session_state.corpus)
225 | key = 'previous_perplexity'
226 | delta = f'{perplexity - st.session_state[key]:.4}' if key in st.session_state else None
227 | st.metric(label='Perplexity', value=f'{perplexity:.4f}', delta=delta, delta_color='inverse')
228 | st.session_state[key] = perplexity
229 | st.markdown('Viz., https://en.wikipedia.org/wiki/Perplexity')
230 | st.latex(r'Perplexity = \exp\left(-\frac{\sum_d \log(p(w_d|\Phi, \alpha))}{N}\right)')
231 |
232 |
233 | def coherence_section():
234 | with st.spinner('Calculating Coherence Score ...'):
235 | coherence = calculate_coherence(st.session_state.model, st.session_state.corpus, 'u_mass')
236 | key = 'previous_coherence_model_value'
237 | delta = f'{coherence - st.session_state[key]:.4f}' if key in st.session_state else None
238 | st.metric(label='Coherence Score', value=f'{coherence:.4f}', delta=delta)
239 | st.session_state[key] = coherence
240 | st.markdown('Viz., http://svn.aksw.org/papers/2015/WSDM_Topic_Evaluation/public.pdf')
241 | st.latex(
242 | r'C_{UMass} = \frac{2}{N \cdot (N - 1)}\sum_{i=2}^N\sum_{j=1}^{i-1}\log\frac{P(w_i, w_j) + \epsilon}{P(w_j)}')
243 |
244 |
245 | @st.cache_data()
246 | def train_projection(projection, n_components, df):
247 | if projection == 'PCA':
248 | projection_model = PCA(n_components=n_components)
249 | elif projection == 'T-SNE':
250 | projection_model = TSNE(n_components=n_components)
251 | elif projection == 'UMAP':
252 | projection_model = UMAP(n_components=n_components)
253 | else:
254 | raise ValueError(f'Unknown projection: {projection}')
255 | return projection_model.fit_transform(df)
256 |
257 |
258 | if __name__ == '__main__':
259 | st.set_page_config(page_title='Topic Modeling', page_icon='./data/favicon.png', layout='wide')
260 |
261 | preprocessing_options = st.sidebar.form('preprocessing-options')
262 | with preprocessing_options:
263 | st.header('Preprocessing Options')
264 | ngrams = st.selectbox('N-grams', [None, 'bigrams', 'trigams'], help='TODO ...') # TODO ...
265 | st.form_submit_button('Preprocess')
266 |
267 | visualization_options = st.sidebar.form('visualization-options')
268 | with visualization_options:
269 | st.header('Visualization Options')
270 | collocations = st.checkbox('Enable WordCloud Collocations',
271 | help='Collocations in word clouds enable the display of phrases.')
272 | highlight_probability_minimum = st.select_slider('Highlight Probability Minimum',
273 | options=[10 ** exponent for exponent in range(-10, 1)],
274 | value=DEFAULT_HIGHLIGHT_PROBABILITY_MINIMUM,
275 | help='Minimum topic probability in order to color highlight a word in the _Topic Highlighted Sentences_ visualization.')
276 | st.form_submit_button('Apply')
277 |
278 | st.title('Topic Modeling')
279 | st.header('What is topic modeling?')
280 | with st.expander('Hero Image'):
281 | st.image('./data/is-this-a-topic-modeling.jpg', caption='No ... no it\'s not ...', use_column_width=True)
282 | st.markdown(
283 | 'Topic modeling is a broad term. It encompasses a number of specific statistical learning methods. '
284 | 'These methods do the following: explain documents in terms of a set of topics and those topics in terms of '
285 | 'the a set of words. Two very commonly used methods are Latent Dirichlet Allocation (LDA) and Non-Negative '
286 | 'Matrix Factorization (NMF), for instance. Used without additional qualifiers the approach is usually assumed '
287 | 'to be unsupervised although there are semi-supervised and supervised variants.'
288 | )
289 |
290 | with st.expander('Additional Details'):
291 | st.markdown('The objective can be viewed as a matrix factorization.')
292 | st.image('./data/mf.png', use_column_width=True)
293 | st.markdown('This factorization makes the methods much more efficient than directly characterizing documents '
294 | 'in term of words.')
295 | st.markdown('More information on LDA and NMF can be found at '
296 | 'https://en.wikipedia.org/wiki/Latent_Dirichlet_allocation and '
297 | 'https://en.wikipedia.org/wiki/Non-negative_matrix_factorization, respectively.')
298 |
299 | st.header('Datasets')
300 | st.markdown('Preloaded a couple of small example datasets to illustrate.')
301 | selected_dataset = st.selectbox('Dataset', [None, *sorted(list(DATASETS.keys()))], on_change=clear_session_state)
302 | if not selected_dataset:
303 | st.write('Choose a Dataset to Conintue ...')
304 | st.stop()
305 |
306 | with st.expander('Dataset Description'):
307 | st.markdown(DATASETS[selected_dataset]['description'])
308 | st.markdown(DATASETS[selected_dataset]['url'])
309 |
310 | text_column = DATASETS[selected_dataset]['column']
311 | texts_df = generate_texts_df(selected_dataset)
312 | docs = generate_docs(texts_df, text_column, ngrams=ngrams)
313 |
314 | with st.expander('Sample Documents'):
315 | sample_texts = texts_df[text_column].sample(5).values.tolist()
316 | for index, text in enumerate(sample_texts):
317 | st.markdown(f'**{index + 1}**: _{text}_')
318 |
319 | with st.expander('Frequency Sized Corpus Wordcloud'):
320 | wc = generate_wordcloud(docs)
321 | st.image(wc.to_image(), caption='Dataset Wordcloud (Not A Topic Model)', use_column_width=True)
322 | st.markdown('These are the remaining words after document preprocessing.')
323 |
324 | with st.expander('Document Word Count Distribution'):
325 | len_docs = [len(doc) for doc in docs]
326 | fig, ax = plt.subplots()
327 | sns.histplot(data=pd.DataFrame(len_docs, columns=['Words In Document']), discrete=True, ax=ax)
328 | st.pyplot(fig)
329 |
330 | model_key = st.sidebar.selectbox('Model', [None, *list(MODELS.keys())], on_change=clear_session_state)
331 | model_options = st.sidebar.form('model-options')
332 | if not model_key:
333 | with st.sidebar:
334 | st.write('Choose a Model to Continue ...')
335 | st.stop()
336 | with model_options:
337 | st.header('Model Options')
338 | model_kwargs = MODELS[model_key]['options']()
339 | st.session_state['model_kwargs'] = model_kwargs
340 | train_model_clicked = st.form_submit_button('Train Model')
341 |
342 | if train_model_clicked:
343 | with st.spinner('Training Model ...'):
344 | id2word, corpus, model = train_model(docs, MODELS[model_key]['class'], **st.session_state.model_kwargs)
345 | st.session_state.id2word = id2word
346 | st.session_state.corpus = corpus
347 | st.session_state.model = model
348 |
349 | if 'model' not in st.session_state:
350 | st.stop()
351 |
352 | st.header('Model')
353 | st.write(type(st.session_state.model).__name__)
354 | st.write(st.session_state.model_kwargs)
355 |
356 | st.header('Model Results')
357 |
358 | topics = st.session_state.model.show_topics(formatted=False, num_words=50,
359 | num_topics=st.session_state.model_kwargs['num_topics'], log=False)
360 | with st.expander('Topic Word-Weighted Summaries'):
361 | topic_summaries = {}
362 | for topic in topics:
363 | topic_index = topic[0]
364 | topic_word_weights = topic[1]
365 | topic_summaries[topic_index] = ' + '.join(
366 | f'{weight:.3f} * {word}' for word, weight in topic_word_weights[:10])
367 | for topic_index, topic_summary in topic_summaries.items():
368 | st.markdown(f'**Topic {topic_index}**: _{topic_summary}_')
369 |
370 | colors = random.sample(COLORS, k=model_kwargs['num_topics'])
371 | with st.expander('Top N Topic Keywords Wordclouds'):
372 | cols = st.columns(3)
373 | for index, topic in enumerate(topics):
374 | wc = WordCloud(font_path=WORDCLOUD_FONT_PATH, width=700, height=600,
375 | background_color='white', collocations=collocations, prefer_horizontal=1.0,
376 | color_func=lambda *args, **kwargs: colors[index])
377 | with cols[index % 3]:
378 | wc.generate_from_frequencies(dict(topic[1]))
379 | st.image(wc.to_image(), caption=f'Topic #{index}', use_column_width=True)
380 |
381 | with st.expander('Topic Highlighted Sentences'):
382 | sample = texts_df.sample(10)
383 | for index, row in sample.iterrows():
384 | html_elements = []
385 | for token in row[text_column].split():
386 | if st.session_state.id2word.token2id.get(token) is None:
387 | html_elements.append(f'{token}')
388 | else:
389 | term_topics = st.session_state.model.get_term_topics(token, minimum_probability=0)
390 | topic_probabilities = [term_topic[1] for term_topic in term_topics]
391 | max_topic_probability = max(topic_probabilities) if topic_probabilities else 0
392 | if max_topic_probability < highlight_probability_minimum:
393 | html_elements.append(token)
394 | else:
395 | max_topic_index = topic_probabilities.index(max_topic_probability)
396 | max_topic = term_topics[max_topic_index]
397 | background_color = colors[max_topic[0]]
398 | # color = 'white'
399 | color = white_or_black_text(background_color)
400 | html_elements.append(
401 | f'{token}')
402 | st.markdown(f'Document #{index}: {" ".join(html_elements)}', unsafe_allow_html=True)
403 |
404 | has_log_perplexity = hasattr(st.session_state.model, 'log_perplexity')
405 | with st.expander('Metrics'):
406 | if has_log_perplexity:
407 | left_column, right_column = st.columns(2)
408 | with left_column:
409 | perplexity_section()
410 | with right_column:
411 | coherence_section()
412 | else:
413 | coherence_section()
414 |
415 | with st.expander('Low Dimensional Projections'):
416 | with st.form('projections-form'):
417 | left_column, right_column = st.columns(2)
418 | projection = left_column.selectbox('Projection', ['PCA', 'T-SNE', 'UMAP'], help='TODO ...')
419 | plot_type = right_column.selectbox('Plot', ['2D', '3D'], help='TODO ...')
420 | n_components = 3
421 | columns = [f'proj{i}' for i in range(1, 4)]
422 | generate_projection_clicked = st.form_submit_button('Generate Projection')
423 |
424 | if generate_projection_clicked:
425 | topic_weights = []
426 | for index, topic_weight in enumerate(st.session_state.model[st.session_state.corpus]):
427 | weight_vector = [0] * int(st.session_state.model_kwargs['num_topics'])
428 | for topic, weight in topic_weight:
429 | weight_vector[topic] = weight
430 | topic_weights.append(weight_vector)
431 | df = pd.DataFrame(topic_weights)
432 | dominant_topic = df.idxmax(axis='columns').astype('string')
433 | dominant_topic_percentage = df.max(axis='columns')
434 | df = df.assign(dominant_topic=dominant_topic, dominant_topic_percentage=dominant_topic_percentage,
435 | text=texts_df[text_column])
436 | with st.spinner('Training Projection'):
437 | projections = train_projection(projection, n_components, df.drop(columns=['dominant_topic', 'dominant_topic_percentage', 'text']).add_prefix('topic_'))
438 | data = pd.concat([df, pd.DataFrame(projections, columns=columns)], axis=1)
439 |
440 | px_options = {'color': 'dominant_topic', 'size': 'dominant_topic_percentage',
441 | 'hover_data': ['dominant_topic', 'dominant_topic_percentage', 'text']}
442 | if plot_type == '2D':
443 | fig = px.scatter(data, x='proj1', y='proj2', **px_options)
444 | st.plotly_chart(fig)
445 | fig = px.scatter(data, x='proj1', y='proj3', **px_options)
446 | st.plotly_chart(fig)
447 | fig = px.scatter(data, x='proj2', y='proj3', **px_options)
448 | st.plotly_chart(fig)
449 | elif plot_type == '3D':
450 | fig = px.scatter_3d(data, x='proj1', y='proj2', z='proj3', **px_options)
451 | st.plotly_chart(fig)
452 |
453 | if hasattr(st.session_state.model, 'inference'): # gensim Nmf has no 'inference' attribute so pyLDAvis fails
454 | if st.button('Generate pyLDAvis'):
455 | with st.spinner('Creating pyLDAvis Visualization ...'):
456 | py_lda_vis_data = pyLDAvis.gensim_models.prepare(st.session_state.model, st.session_state.corpus,
457 | st.session_state.id2word)
458 | py_lda_vis_html = pyLDAvis.prepared_data_to_html(py_lda_vis_data)
459 | with st.expander('pyLDAvis', expanded=True):
460 | st.markdown('pyLDAvis is designed to help users interpret the topics in a topic model that has been '
461 | 'fit to a corpus of text data. The package extracts information from a fitted LDA topic '
462 | 'model to inform an interactive web-based visualization.')
463 | st.markdown('https://github.com/bmabey/pyLDAvis')
464 | components.html(py_lda_vis_html, width=1300, height=800)
465 |
--------------------------------------------------------------------------------