├── .coveragerc ├── .github └── workflows │ └── test_package.yaml ├── .gitignore ├── .pre-commit-config.yaml ├── Dockerfile ├── LICENSE ├── Makefile ├── README.md ├── api ├── __init__.py ├── api.py ├── final_svc.sav ├── requirements.txt ├── schemas.py └── test_api_locally.py ├── coverage.xml ├── current_best_model ├── final_bert │ ├── bert_perf.xlsx │ └── bert_summary.txt ├── final_ensemble │ ├── ensemble_perf.xlsx │ └── ensemble_summary.txt ├── final_svc │ ├── final_svc.sav │ ├── final_svc_perf.xlsx │ └── final_svc_summary.txt ├── final_xgb │ ├── final_xgb.sav │ ├── final_xgb_perf.xlsx │ └── final_xgb_summary.txt └── sentiment │ ├── bert_sentiment.txt │ ├── confusion_matrix_3_counts.png │ ├── confusion_matrix_3_percentages.png │ ├── confusion_matrix_5_counts.png │ └── confusion_matrix_5_percentages.png ├── datasets ├── README.md ├── phase_1 │ ├── README.md │ ├── co.csv │ ├── co_multi_label.csv │ └── text_data.csv ├── testing │ └── test_data.csv └── v6framework_230831.csv ├── docker_README.md ├── docker_data ├── data_in │ ├── file_01.json │ └── file_02.json └── data_out │ └── file_01.json ├── docker_run.py ├── docs ├── about.md ├── create_docs.py ├── getting started │ ├── install.md │ ├── package.md │ ├── training_new_model.md │ └── using_trained_model.md ├── index.md ├── main.css └── reference │ ├── API │ ├── API.md │ ├── quick_API.md │ └── slow_API.md │ ├── Docker │ └── docker_README.md │ └── pxtextmining │ ├── factories │ ├── factory_data_load_and_split.md │ ├── factory_model_performance.md │ ├── factory_pipeline.md │ ├── factory_predict_unlabelled_text.md │ └── factory_write_results.md │ ├── helpers │ └── text_preprocessor.md │ └── pipelines │ ├── multilabel_pipeline.md │ └── sentiment_pipeline.md ├── mkdocs.yml ├── poetry.lock ├── pxtextmining ├── __init__.py ├── factories │ ├── __init__.py │ ├── factory_data_load_and_split.py │ ├── factory_model_performance.py │ ├── factory_pipeline.py │ ├── factory_predict_unlabelled_text.py │ └── factory_write_results.py ├── helpers │ ├── __init__.py │ └── text_preprocessor.py ├── params.py └── pipelines │ ├── __init__.py │ ├── multilabel_pipeline.py │ └── sentiment_pipeline.py ├── pyproject.toml ├── setup.py ├── test_multilabel └── dummy_metrics.txt └── tests ├── __init__.py ├── conftest.py ├── test_api.py ├── test_data_load_and_split.py ├── test_docker_run.py ├── test_factory_pipeline.py ├── test_helpers.py ├── test_model_performance.py ├── test_multilabel_pipeline.py ├── test_predict_unlabelled_text.py ├── test_sentiment_pipeline.py └── test_write_results.py /.coveragerc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/The-Strategy-Unit/pxtextmining/HEAD/.coveragerc -------------------------------------------------------------------------------- /.github/workflows/test_package.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/The-Strategy-Unit/pxtextmining/HEAD/.github/workflows/test_package.yaml -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/The-Strategy-Unit/pxtextmining/HEAD/.gitignore -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/The-Strategy-Unit/pxtextmining/HEAD/.pre-commit-config.yaml -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/The-Strategy-Unit/pxtextmining/HEAD/Dockerfile -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/The-Strategy-Unit/pxtextmining/HEAD/LICENSE -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/The-Strategy-Unit/pxtextmining/HEAD/Makefile -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/The-Strategy-Unit/pxtextmining/HEAD/README.md -------------------------------------------------------------------------------- /api/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /api/api.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/The-Strategy-Unit/pxtextmining/HEAD/api/api.py -------------------------------------------------------------------------------- /api/final_svc.sav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/The-Strategy-Unit/pxtextmining/HEAD/api/final_svc.sav -------------------------------------------------------------------------------- /api/requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/The-Strategy-Unit/pxtextmining/HEAD/api/requirements.txt -------------------------------------------------------------------------------- /api/schemas.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/The-Strategy-Unit/pxtextmining/HEAD/api/schemas.py -------------------------------------------------------------------------------- /api/test_api_locally.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/The-Strategy-Unit/pxtextmining/HEAD/api/test_api_locally.py -------------------------------------------------------------------------------- /coverage.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/The-Strategy-Unit/pxtextmining/HEAD/coverage.xml -------------------------------------------------------------------------------- /current_best_model/final_bert/bert_perf.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/The-Strategy-Unit/pxtextmining/HEAD/current_best_model/final_bert/bert_perf.xlsx -------------------------------------------------------------------------------- /current_best_model/final_bert/bert_summary.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/The-Strategy-Unit/pxtextmining/HEAD/current_best_model/final_bert/bert_summary.txt -------------------------------------------------------------------------------- /current_best_model/final_ensemble/ensemble_perf.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/The-Strategy-Unit/pxtextmining/HEAD/current_best_model/final_ensemble/ensemble_perf.xlsx -------------------------------------------------------------------------------- /current_best_model/final_ensemble/ensemble_summary.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/The-Strategy-Unit/pxtextmining/HEAD/current_best_model/final_ensemble/ensemble_summary.txt -------------------------------------------------------------------------------- /current_best_model/final_svc/final_svc.sav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/The-Strategy-Unit/pxtextmining/HEAD/current_best_model/final_svc/final_svc.sav -------------------------------------------------------------------------------- /current_best_model/final_svc/final_svc_perf.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/The-Strategy-Unit/pxtextmining/HEAD/current_best_model/final_svc/final_svc_perf.xlsx -------------------------------------------------------------------------------- /current_best_model/final_svc/final_svc_summary.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/The-Strategy-Unit/pxtextmining/HEAD/current_best_model/final_svc/final_svc_summary.txt -------------------------------------------------------------------------------- /current_best_model/final_xgb/final_xgb.sav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/The-Strategy-Unit/pxtextmining/HEAD/current_best_model/final_xgb/final_xgb.sav -------------------------------------------------------------------------------- /current_best_model/final_xgb/final_xgb_perf.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/The-Strategy-Unit/pxtextmining/HEAD/current_best_model/final_xgb/final_xgb_perf.xlsx -------------------------------------------------------------------------------- /current_best_model/final_xgb/final_xgb_summary.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/The-Strategy-Unit/pxtextmining/HEAD/current_best_model/final_xgb/final_xgb_summary.txt -------------------------------------------------------------------------------- /current_best_model/sentiment/bert_sentiment.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/The-Strategy-Unit/pxtextmining/HEAD/current_best_model/sentiment/bert_sentiment.txt -------------------------------------------------------------------------------- /current_best_model/sentiment/confusion_matrix_3_counts.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/The-Strategy-Unit/pxtextmining/HEAD/current_best_model/sentiment/confusion_matrix_3_counts.png -------------------------------------------------------------------------------- /current_best_model/sentiment/confusion_matrix_3_percentages.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/The-Strategy-Unit/pxtextmining/HEAD/current_best_model/sentiment/confusion_matrix_3_percentages.png -------------------------------------------------------------------------------- /current_best_model/sentiment/confusion_matrix_5_counts.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/The-Strategy-Unit/pxtextmining/HEAD/current_best_model/sentiment/confusion_matrix_5_counts.png -------------------------------------------------------------------------------- /current_best_model/sentiment/confusion_matrix_5_percentages.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/The-Strategy-Unit/pxtextmining/HEAD/current_best_model/sentiment/confusion_matrix_5_percentages.png -------------------------------------------------------------------------------- /datasets/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/The-Strategy-Unit/pxtextmining/HEAD/datasets/README.md -------------------------------------------------------------------------------- /datasets/phase_1/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/The-Strategy-Unit/pxtextmining/HEAD/datasets/phase_1/README.md -------------------------------------------------------------------------------- /datasets/phase_1/co.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/The-Strategy-Unit/pxtextmining/HEAD/datasets/phase_1/co.csv -------------------------------------------------------------------------------- /datasets/phase_1/co_multi_label.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/The-Strategy-Unit/pxtextmining/HEAD/datasets/phase_1/co_multi_label.csv -------------------------------------------------------------------------------- /datasets/phase_1/text_data.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/The-Strategy-Unit/pxtextmining/HEAD/datasets/phase_1/text_data.csv -------------------------------------------------------------------------------- /datasets/testing/test_data.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/The-Strategy-Unit/pxtextmining/HEAD/datasets/testing/test_data.csv -------------------------------------------------------------------------------- /datasets/v6framework_230831.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/The-Strategy-Unit/pxtextmining/HEAD/datasets/v6framework_230831.csv -------------------------------------------------------------------------------- /docker_README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/The-Strategy-Unit/pxtextmining/HEAD/docker_README.md -------------------------------------------------------------------------------- /docker_data/data_in/file_01.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/The-Strategy-Unit/pxtextmining/HEAD/docker_data/data_in/file_01.json -------------------------------------------------------------------------------- /docker_data/data_in/file_02.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/The-Strategy-Unit/pxtextmining/HEAD/docker_data/data_in/file_02.json -------------------------------------------------------------------------------- /docker_data/data_out/file_01.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/The-Strategy-Unit/pxtextmining/HEAD/docker_data/data_out/file_01.json -------------------------------------------------------------------------------- /docker_run.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/The-Strategy-Unit/pxtextmining/HEAD/docker_run.py -------------------------------------------------------------------------------- /docs/about.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/The-Strategy-Unit/pxtextmining/HEAD/docs/about.md -------------------------------------------------------------------------------- /docs/create_docs.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/The-Strategy-Unit/pxtextmining/HEAD/docs/create_docs.py -------------------------------------------------------------------------------- /docs/getting started/install.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/The-Strategy-Unit/pxtextmining/HEAD/docs/getting started/install.md -------------------------------------------------------------------------------- /docs/getting started/package.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/The-Strategy-Unit/pxtextmining/HEAD/docs/getting started/package.md -------------------------------------------------------------------------------- /docs/getting started/training_new_model.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/The-Strategy-Unit/pxtextmining/HEAD/docs/getting started/training_new_model.md -------------------------------------------------------------------------------- /docs/getting started/using_trained_model.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/The-Strategy-Unit/pxtextmining/HEAD/docs/getting started/using_trained_model.md -------------------------------------------------------------------------------- /docs/index.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/The-Strategy-Unit/pxtextmining/HEAD/docs/index.md -------------------------------------------------------------------------------- /docs/main.css: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/The-Strategy-Unit/pxtextmining/HEAD/docs/main.css -------------------------------------------------------------------------------- /docs/reference/API/API.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/The-Strategy-Unit/pxtextmining/HEAD/docs/reference/API/API.md -------------------------------------------------------------------------------- /docs/reference/API/quick_API.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/The-Strategy-Unit/pxtextmining/HEAD/docs/reference/API/quick_API.md -------------------------------------------------------------------------------- /docs/reference/API/slow_API.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/The-Strategy-Unit/pxtextmining/HEAD/docs/reference/API/slow_API.md -------------------------------------------------------------------------------- /docs/reference/Docker/docker_README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/The-Strategy-Unit/pxtextmining/HEAD/docs/reference/Docker/docker_README.md -------------------------------------------------------------------------------- /docs/reference/pxtextmining/factories/factory_data_load_and_split.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/The-Strategy-Unit/pxtextmining/HEAD/docs/reference/pxtextmining/factories/factory_data_load_and_split.md -------------------------------------------------------------------------------- /docs/reference/pxtextmining/factories/factory_model_performance.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/The-Strategy-Unit/pxtextmining/HEAD/docs/reference/pxtextmining/factories/factory_model_performance.md -------------------------------------------------------------------------------- /docs/reference/pxtextmining/factories/factory_pipeline.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/The-Strategy-Unit/pxtextmining/HEAD/docs/reference/pxtextmining/factories/factory_pipeline.md -------------------------------------------------------------------------------- /docs/reference/pxtextmining/factories/factory_predict_unlabelled_text.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/The-Strategy-Unit/pxtextmining/HEAD/docs/reference/pxtextmining/factories/factory_predict_unlabelled_text.md -------------------------------------------------------------------------------- /docs/reference/pxtextmining/factories/factory_write_results.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/The-Strategy-Unit/pxtextmining/HEAD/docs/reference/pxtextmining/factories/factory_write_results.md -------------------------------------------------------------------------------- /docs/reference/pxtextmining/helpers/text_preprocessor.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/The-Strategy-Unit/pxtextmining/HEAD/docs/reference/pxtextmining/helpers/text_preprocessor.md -------------------------------------------------------------------------------- /docs/reference/pxtextmining/pipelines/multilabel_pipeline.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/The-Strategy-Unit/pxtextmining/HEAD/docs/reference/pxtextmining/pipelines/multilabel_pipeline.md -------------------------------------------------------------------------------- /docs/reference/pxtextmining/pipelines/sentiment_pipeline.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/The-Strategy-Unit/pxtextmining/HEAD/docs/reference/pxtextmining/pipelines/sentiment_pipeline.md -------------------------------------------------------------------------------- /mkdocs.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/The-Strategy-Unit/pxtextmining/HEAD/mkdocs.yml -------------------------------------------------------------------------------- /poetry.lock: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/The-Strategy-Unit/pxtextmining/HEAD/poetry.lock -------------------------------------------------------------------------------- /pxtextmining/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /pxtextmining/factories/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /pxtextmining/factories/factory_data_load_and_split.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/The-Strategy-Unit/pxtextmining/HEAD/pxtextmining/factories/factory_data_load_and_split.py -------------------------------------------------------------------------------- /pxtextmining/factories/factory_model_performance.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/The-Strategy-Unit/pxtextmining/HEAD/pxtextmining/factories/factory_model_performance.py -------------------------------------------------------------------------------- /pxtextmining/factories/factory_pipeline.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/The-Strategy-Unit/pxtextmining/HEAD/pxtextmining/factories/factory_pipeline.py -------------------------------------------------------------------------------- /pxtextmining/factories/factory_predict_unlabelled_text.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/The-Strategy-Unit/pxtextmining/HEAD/pxtextmining/factories/factory_predict_unlabelled_text.py -------------------------------------------------------------------------------- /pxtextmining/factories/factory_write_results.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/The-Strategy-Unit/pxtextmining/HEAD/pxtextmining/factories/factory_write_results.py -------------------------------------------------------------------------------- /pxtextmining/helpers/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /pxtextmining/helpers/text_preprocessor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/The-Strategy-Unit/pxtextmining/HEAD/pxtextmining/helpers/text_preprocessor.py -------------------------------------------------------------------------------- /pxtextmining/params.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/The-Strategy-Unit/pxtextmining/HEAD/pxtextmining/params.py -------------------------------------------------------------------------------- /pxtextmining/pipelines/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /pxtextmining/pipelines/multilabel_pipeline.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/The-Strategy-Unit/pxtextmining/HEAD/pxtextmining/pipelines/multilabel_pipeline.py -------------------------------------------------------------------------------- /pxtextmining/pipelines/sentiment_pipeline.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/The-Strategy-Unit/pxtextmining/HEAD/pxtextmining/pipelines/sentiment_pipeline.py -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/The-Strategy-Unit/pxtextmining/HEAD/pyproject.toml -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/The-Strategy-Unit/pxtextmining/HEAD/setup.py -------------------------------------------------------------------------------- /test_multilabel/dummy_metrics.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/The-Strategy-Unit/pxtextmining/HEAD/test_multilabel/dummy_metrics.txt -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/conftest.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/The-Strategy-Unit/pxtextmining/HEAD/tests/conftest.py -------------------------------------------------------------------------------- /tests/test_api.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/The-Strategy-Unit/pxtextmining/HEAD/tests/test_api.py -------------------------------------------------------------------------------- /tests/test_data_load_and_split.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/The-Strategy-Unit/pxtextmining/HEAD/tests/test_data_load_and_split.py -------------------------------------------------------------------------------- /tests/test_docker_run.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/The-Strategy-Unit/pxtextmining/HEAD/tests/test_docker_run.py -------------------------------------------------------------------------------- /tests/test_factory_pipeline.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/The-Strategy-Unit/pxtextmining/HEAD/tests/test_factory_pipeline.py -------------------------------------------------------------------------------- /tests/test_helpers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/The-Strategy-Unit/pxtextmining/HEAD/tests/test_helpers.py -------------------------------------------------------------------------------- /tests/test_model_performance.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/The-Strategy-Unit/pxtextmining/HEAD/tests/test_model_performance.py -------------------------------------------------------------------------------- /tests/test_multilabel_pipeline.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/The-Strategy-Unit/pxtextmining/HEAD/tests/test_multilabel_pipeline.py -------------------------------------------------------------------------------- /tests/test_predict_unlabelled_text.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/The-Strategy-Unit/pxtextmining/HEAD/tests/test_predict_unlabelled_text.py -------------------------------------------------------------------------------- /tests/test_sentiment_pipeline.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/The-Strategy-Unit/pxtextmining/HEAD/tests/test_sentiment_pipeline.py -------------------------------------------------------------------------------- /tests/test_write_results.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/The-Strategy-Unit/pxtextmining/HEAD/tests/test_write_results.py --------------------------------------------------------------------------------