├── .gitignore ├── README.md ├── data └── cleaned_data │ ├── authors.txt │ ├── books.txt │ ├── cleaned_content.txt │ ├── freq_words_removed_content.txt │ └── topics.txt ├── plots ├── dbscan-mds.png ├── hier_clustering.png ├── lined_2.png └── similarity_matrix.png └── src ├── clustering_functions.py ├── main.py ├── plot.py ├── plot_correlation_matrix.py ├── read_and_clean_documents.py └── text_processing.py /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/utkuozbulak/unsupervised-learning-document-clustering/HEAD/.gitignore -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/utkuozbulak/unsupervised-learning-document-clustering/HEAD/README.md -------------------------------------------------------------------------------- /data/cleaned_data/authors.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/utkuozbulak/unsupervised-learning-document-clustering/HEAD/data/cleaned_data/authors.txt -------------------------------------------------------------------------------- /data/cleaned_data/books.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/utkuozbulak/unsupervised-learning-document-clustering/HEAD/data/cleaned_data/books.txt -------------------------------------------------------------------------------- /data/cleaned_data/cleaned_content.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/utkuozbulak/unsupervised-learning-document-clustering/HEAD/data/cleaned_data/cleaned_content.txt -------------------------------------------------------------------------------- /data/cleaned_data/freq_words_removed_content.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/utkuozbulak/unsupervised-learning-document-clustering/HEAD/data/cleaned_data/freq_words_removed_content.txt -------------------------------------------------------------------------------- /data/cleaned_data/topics.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/utkuozbulak/unsupervised-learning-document-clustering/HEAD/data/cleaned_data/topics.txt -------------------------------------------------------------------------------- /plots/dbscan-mds.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/utkuozbulak/unsupervised-learning-document-clustering/HEAD/plots/dbscan-mds.png -------------------------------------------------------------------------------- /plots/hier_clustering.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/utkuozbulak/unsupervised-learning-document-clustering/HEAD/plots/hier_clustering.png -------------------------------------------------------------------------------- /plots/lined_2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/utkuozbulak/unsupervised-learning-document-clustering/HEAD/plots/lined_2.png -------------------------------------------------------------------------------- /plots/similarity_matrix.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/utkuozbulak/unsupervised-learning-document-clustering/HEAD/plots/similarity_matrix.png -------------------------------------------------------------------------------- /src/clustering_functions.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/utkuozbulak/unsupervised-learning-document-clustering/HEAD/src/clustering_functions.py -------------------------------------------------------------------------------- /src/main.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/utkuozbulak/unsupervised-learning-document-clustering/HEAD/src/main.py -------------------------------------------------------------------------------- /src/plot.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/utkuozbulak/unsupervised-learning-document-clustering/HEAD/src/plot.py -------------------------------------------------------------------------------- /src/plot_correlation_matrix.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/utkuozbulak/unsupervised-learning-document-clustering/HEAD/src/plot_correlation_matrix.py -------------------------------------------------------------------------------- /src/read_and_clean_documents.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/utkuozbulak/unsupervised-learning-document-clustering/HEAD/src/read_and_clean_documents.py -------------------------------------------------------------------------------- /src/text_processing.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/utkuozbulak/unsupervised-learning-document-clustering/HEAD/src/text_processing.py --------------------------------------------------------------------------------