├── .github ├── ISSUE_TEMPLATE │ ├── bug_report.md │ └── feature_request.md └── workflows │ └── python-package.yml ├── .gitignore ├── .tool-versions ├── 2023-09-01 - reddit - depression dataset - etm - example.ipynb ├── CHANGELOG.md ├── CODE_OF_CONDUCT.md ├── CONTRIBUTING.md ├── LICENSE ├── README.md ├── SECURITY.md ├── conftest.py ├── create_test_resources.py ├── dev_requirements.txt ├── embedded_topic_model ├── __init__.py ├── data │ └── 20ng │ │ ├── bow_tr_counts.mat │ │ ├── bow_tr_tokens.mat │ │ ├── bow_ts_counts.mat │ │ ├── bow_ts_h1_counts.mat │ │ ├── bow_ts_h1_tokens.mat │ │ ├── bow_ts_h2_counts.mat │ │ ├── bow_ts_h2_tokens.mat │ │ ├── bow_ts_tokens.mat │ │ ├── bow_va_counts.mat │ │ ├── bow_va_tokens.mat │ │ └── vocab.pkl ├── models │ ├── __init__.py │ ├── etm.py │ └── model.py ├── scripts │ ├── __init__.py │ └── datasets │ │ ├── 20ng │ │ ├── bow_tr_counts.mat │ │ ├── bow_tr_tokens.mat │ │ ├── bow_ts_counts.mat │ │ ├── bow_ts_h1_counts.mat │ │ ├── bow_ts_h1_tokens.mat │ │ ├── bow_ts_h2_counts.mat │ │ ├── bow_ts_h2_tokens.mat │ │ ├── bow_ts_tokens.mat │ │ ├── bow_va_counts.mat │ │ ├── bow_va_tokens.mat │ │ └── vocab.pkl │ │ ├── __init__.py │ │ ├── data_20ng.py │ │ ├── data_nyt.py │ │ ├── data_reddit_nouns_only.py │ │ ├── data_reddit_raw_pt.py │ │ └── stops.txt └── utils │ ├── __init__.py │ ├── data.py │ ├── embedding.py │ ├── metrics.py │ └── preprocessing.py ├── lint.sh ├── publish.txt ├── requirements.txt ├── setup.py ├── tests ├── __init__.py ├── integration │ ├── __init__.py │ └── test_etm.py ├── resources │ ├── train_resources.test │ ├── train_w2v_embeddings.wordvectors │ ├── train_w2v_embeddings.wordvectors.bin │ └── train_w2v_embeddings.wordvectors.txt └── unit │ ├── __init__.py │ ├── test_embedding.py │ └── test_preprocessing.py └── train_resources.test /.github/ISSUE_TEMPLATE/bug_report.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lfmatosm/embedded-topic-model/HEAD/.github/ISSUE_TEMPLATE/bug_report.md -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature_request.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lfmatosm/embedded-topic-model/HEAD/.github/ISSUE_TEMPLATE/feature_request.md -------------------------------------------------------------------------------- /.github/workflows/python-package.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lfmatosm/embedded-topic-model/HEAD/.github/workflows/python-package.yml -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lfmatosm/embedded-topic-model/HEAD/.gitignore -------------------------------------------------------------------------------- /.tool-versions: -------------------------------------------------------------------------------- 1 | python 3.11.5 2 | -------------------------------------------------------------------------------- /2023-09-01 - reddit - depression dataset - etm - example.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lfmatosm/embedded-topic-model/HEAD/2023-09-01 - reddit - depression dataset - etm - example.ipynb -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lfmatosm/embedded-topic-model/HEAD/CHANGELOG.md -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lfmatosm/embedded-topic-model/HEAD/CODE_OF_CONDUCT.md -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lfmatosm/embedded-topic-model/HEAD/CONTRIBUTING.md -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lfmatosm/embedded-topic-model/HEAD/LICENSE -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lfmatosm/embedded-topic-model/HEAD/README.md -------------------------------------------------------------------------------- /SECURITY.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lfmatosm/embedded-topic-model/HEAD/SECURITY.md -------------------------------------------------------------------------------- /conftest.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /create_test_resources.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lfmatosm/embedded-topic-model/HEAD/create_test_resources.py -------------------------------------------------------------------------------- /dev_requirements.txt: -------------------------------------------------------------------------------- 1 | joblib 2 | flake8 3 | pytest>=7.4 4 | autopep8 5 | bump2version 6 | twine 7 | notebook 8 | -------------------------------------------------------------------------------- /embedded_topic_model/__init__.py: -------------------------------------------------------------------------------- 1 | __version__ = '1.2.1' 2 | -------------------------------------------------------------------------------- /embedded_topic_model/data/20ng/bow_tr_counts.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lfmatosm/embedded-topic-model/HEAD/embedded_topic_model/data/20ng/bow_tr_counts.mat -------------------------------------------------------------------------------- /embedded_topic_model/data/20ng/bow_tr_tokens.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lfmatosm/embedded-topic-model/HEAD/embedded_topic_model/data/20ng/bow_tr_tokens.mat -------------------------------------------------------------------------------- /embedded_topic_model/data/20ng/bow_ts_counts.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lfmatosm/embedded-topic-model/HEAD/embedded_topic_model/data/20ng/bow_ts_counts.mat -------------------------------------------------------------------------------- /embedded_topic_model/data/20ng/bow_ts_h1_counts.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lfmatosm/embedded-topic-model/HEAD/embedded_topic_model/data/20ng/bow_ts_h1_counts.mat -------------------------------------------------------------------------------- /embedded_topic_model/data/20ng/bow_ts_h1_tokens.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lfmatosm/embedded-topic-model/HEAD/embedded_topic_model/data/20ng/bow_ts_h1_tokens.mat -------------------------------------------------------------------------------- /embedded_topic_model/data/20ng/bow_ts_h2_counts.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lfmatosm/embedded-topic-model/HEAD/embedded_topic_model/data/20ng/bow_ts_h2_counts.mat -------------------------------------------------------------------------------- /embedded_topic_model/data/20ng/bow_ts_h2_tokens.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lfmatosm/embedded-topic-model/HEAD/embedded_topic_model/data/20ng/bow_ts_h2_tokens.mat -------------------------------------------------------------------------------- /embedded_topic_model/data/20ng/bow_ts_tokens.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lfmatosm/embedded-topic-model/HEAD/embedded_topic_model/data/20ng/bow_ts_tokens.mat -------------------------------------------------------------------------------- /embedded_topic_model/data/20ng/bow_va_counts.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lfmatosm/embedded-topic-model/HEAD/embedded_topic_model/data/20ng/bow_va_counts.mat -------------------------------------------------------------------------------- /embedded_topic_model/data/20ng/bow_va_tokens.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lfmatosm/embedded-topic-model/HEAD/embedded_topic_model/data/20ng/bow_va_tokens.mat -------------------------------------------------------------------------------- /embedded_topic_model/data/20ng/vocab.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lfmatosm/embedded-topic-model/HEAD/embedded_topic_model/data/20ng/vocab.pkl -------------------------------------------------------------------------------- /embedded_topic_model/models/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /embedded_topic_model/models/etm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lfmatosm/embedded-topic-model/HEAD/embedded_topic_model/models/etm.py -------------------------------------------------------------------------------- /embedded_topic_model/models/model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lfmatosm/embedded-topic-model/HEAD/embedded_topic_model/models/model.py -------------------------------------------------------------------------------- /embedded_topic_model/scripts/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /embedded_topic_model/scripts/datasets/20ng/bow_tr_counts.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lfmatosm/embedded-topic-model/HEAD/embedded_topic_model/scripts/datasets/20ng/bow_tr_counts.mat -------------------------------------------------------------------------------- /embedded_topic_model/scripts/datasets/20ng/bow_tr_tokens.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lfmatosm/embedded-topic-model/HEAD/embedded_topic_model/scripts/datasets/20ng/bow_tr_tokens.mat -------------------------------------------------------------------------------- /embedded_topic_model/scripts/datasets/20ng/bow_ts_counts.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lfmatosm/embedded-topic-model/HEAD/embedded_topic_model/scripts/datasets/20ng/bow_ts_counts.mat -------------------------------------------------------------------------------- /embedded_topic_model/scripts/datasets/20ng/bow_ts_h1_counts.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lfmatosm/embedded-topic-model/HEAD/embedded_topic_model/scripts/datasets/20ng/bow_ts_h1_counts.mat -------------------------------------------------------------------------------- /embedded_topic_model/scripts/datasets/20ng/bow_ts_h1_tokens.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lfmatosm/embedded-topic-model/HEAD/embedded_topic_model/scripts/datasets/20ng/bow_ts_h1_tokens.mat -------------------------------------------------------------------------------- /embedded_topic_model/scripts/datasets/20ng/bow_ts_h2_counts.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lfmatosm/embedded-topic-model/HEAD/embedded_topic_model/scripts/datasets/20ng/bow_ts_h2_counts.mat -------------------------------------------------------------------------------- /embedded_topic_model/scripts/datasets/20ng/bow_ts_h2_tokens.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lfmatosm/embedded-topic-model/HEAD/embedded_topic_model/scripts/datasets/20ng/bow_ts_h2_tokens.mat -------------------------------------------------------------------------------- /embedded_topic_model/scripts/datasets/20ng/bow_ts_tokens.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lfmatosm/embedded-topic-model/HEAD/embedded_topic_model/scripts/datasets/20ng/bow_ts_tokens.mat -------------------------------------------------------------------------------- /embedded_topic_model/scripts/datasets/20ng/bow_va_counts.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lfmatosm/embedded-topic-model/HEAD/embedded_topic_model/scripts/datasets/20ng/bow_va_counts.mat -------------------------------------------------------------------------------- /embedded_topic_model/scripts/datasets/20ng/bow_va_tokens.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lfmatosm/embedded-topic-model/HEAD/embedded_topic_model/scripts/datasets/20ng/bow_va_tokens.mat -------------------------------------------------------------------------------- /embedded_topic_model/scripts/datasets/20ng/vocab.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lfmatosm/embedded-topic-model/HEAD/embedded_topic_model/scripts/datasets/20ng/vocab.pkl -------------------------------------------------------------------------------- /embedded_topic_model/scripts/datasets/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /embedded_topic_model/scripts/datasets/data_20ng.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lfmatosm/embedded-topic-model/HEAD/embedded_topic_model/scripts/datasets/data_20ng.py -------------------------------------------------------------------------------- /embedded_topic_model/scripts/datasets/data_nyt.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lfmatosm/embedded-topic-model/HEAD/embedded_topic_model/scripts/datasets/data_nyt.py -------------------------------------------------------------------------------- /embedded_topic_model/scripts/datasets/data_reddit_nouns_only.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lfmatosm/embedded-topic-model/HEAD/embedded_topic_model/scripts/datasets/data_reddit_nouns_only.py -------------------------------------------------------------------------------- /embedded_topic_model/scripts/datasets/data_reddit_raw_pt.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lfmatosm/embedded-topic-model/HEAD/embedded_topic_model/scripts/datasets/data_reddit_raw_pt.py -------------------------------------------------------------------------------- /embedded_topic_model/scripts/datasets/stops.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lfmatosm/embedded-topic-model/HEAD/embedded_topic_model/scripts/datasets/stops.txt -------------------------------------------------------------------------------- /embedded_topic_model/utils/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /embedded_topic_model/utils/data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lfmatosm/embedded-topic-model/HEAD/embedded_topic_model/utils/data.py -------------------------------------------------------------------------------- /embedded_topic_model/utils/embedding.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lfmatosm/embedded-topic-model/HEAD/embedded_topic_model/utils/embedding.py -------------------------------------------------------------------------------- /embedded_topic_model/utils/metrics.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lfmatosm/embedded-topic-model/HEAD/embedded_topic_model/utils/metrics.py -------------------------------------------------------------------------------- /embedded_topic_model/utils/preprocessing.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lfmatosm/embedded-topic-model/HEAD/embedded_topic_model/utils/preprocessing.py -------------------------------------------------------------------------------- /lint.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lfmatosm/embedded-topic-model/HEAD/lint.sh -------------------------------------------------------------------------------- /publish.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lfmatosm/embedded-topic-model/HEAD/publish.txt -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lfmatosm/embedded-topic-model/HEAD/requirements.txt -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lfmatosm/embedded-topic-model/HEAD/setup.py -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/integration/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/integration/test_etm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lfmatosm/embedded-topic-model/HEAD/tests/integration/test_etm.py -------------------------------------------------------------------------------- /tests/resources/train_resources.test: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lfmatosm/embedded-topic-model/HEAD/tests/resources/train_resources.test -------------------------------------------------------------------------------- /tests/resources/train_w2v_embeddings.wordvectors: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lfmatosm/embedded-topic-model/HEAD/tests/resources/train_w2v_embeddings.wordvectors -------------------------------------------------------------------------------- /tests/resources/train_w2v_embeddings.wordvectors.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lfmatosm/embedded-topic-model/HEAD/tests/resources/train_w2v_embeddings.wordvectors.bin -------------------------------------------------------------------------------- /tests/resources/train_w2v_embeddings.wordvectors.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lfmatosm/embedded-topic-model/HEAD/tests/resources/train_w2v_embeddings.wordvectors.txt -------------------------------------------------------------------------------- /tests/unit/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/unit/test_embedding.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lfmatosm/embedded-topic-model/HEAD/tests/unit/test_embedding.py -------------------------------------------------------------------------------- /tests/unit/test_preprocessing.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lfmatosm/embedded-topic-model/HEAD/tests/unit/test_preprocessing.py -------------------------------------------------------------------------------- /train_resources.test: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lfmatosm/embedded-topic-model/HEAD/train_resources.test --------------------------------------------------------------------------------