├── .github └── stale.yml ├── .gitignore ├── LICENSE ├── README.md ├── TextAnalysis.Rproj ├── TextAnalysis.wpr ├── data ├── fake_news_data │ └── liar_dataset │ │ ├── README.md │ │ ├── test.tsv │ │ ├── train.tsv │ │ └── valid.tsv ├── kaggle_spooky_authors │ ├── sample_submission.csv │ ├── test.csv │ └── train.csv ├── novelwordsonly.txt ├── plainText │ ├── austen.txt │ └── melville.txt ├── readme.md └── text_data_for_analysis.txt ├── figures ├── Rplot-wordcloud01.png └── readme.md ├── resources └── readme.md └── scripts ├── .gitignore ├── R ├── HP_preprocess_01.R ├── austen_text_analysis.R ├── dispersion_plots.R ├── dracula_text_analysis.R ├── expr_kaggle-reddit_EDA.R ├── expr_kaggle-reddit_text-analysis.R ├── initialScript-1.R ├── initialScript.R ├── mobydick_novel_text_analysis.R ├── structural_topic_modeling_00.R ├── text_analysis_example01.R ├── text_analysis_example02.R ├── token_distribution_analysis.R ├── topic_modeling_00.R ├── topic_modeling_01.R ├── wuthering_heights_sentiment_analysis.R └── wuthering_heights_text_analysis.R ├── python ├── .gitignore ├── 00_topic_modelling_fundamentals.ipynb ├── 00_topic_modelling_theoretical_concepts.ipynb ├── 01_topic_modelling_fundamentals.ipynb ├── Tutorial1-An introduction to NLP with SpaCy.ipynb ├── check_pkgs.py ├── extract_table_from_pdf.py ├── extract_text_data_from_pdf.py ├── extract_text_data_from_pdf_2.py ├── extract_text_data_from_pdf_3.py ├── extract_text_data_from_textfiles.py ├── func_text_preprocess.py ├── kaggle_hotel_review_rating.py ├── kaggle_spooky_authors_ml_model.ipynb └── learn_text_analysis.py └── readme.md /.github/stale.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/duttashi/text-analysis/HEAD/.github/stale.yml -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/duttashi/text-analysis/HEAD/.gitignore -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/duttashi/text-analysis/HEAD/LICENSE -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/duttashi/text-analysis/HEAD/README.md -------------------------------------------------------------------------------- /TextAnalysis.Rproj: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/duttashi/text-analysis/HEAD/TextAnalysis.Rproj -------------------------------------------------------------------------------- /TextAnalysis.wpr: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/duttashi/text-analysis/HEAD/TextAnalysis.wpr -------------------------------------------------------------------------------- /data/fake_news_data/liar_dataset/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/duttashi/text-analysis/HEAD/data/fake_news_data/liar_dataset/README.md -------------------------------------------------------------------------------- /data/fake_news_data/liar_dataset/test.tsv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/duttashi/text-analysis/HEAD/data/fake_news_data/liar_dataset/test.tsv -------------------------------------------------------------------------------- /data/fake_news_data/liar_dataset/train.tsv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/duttashi/text-analysis/HEAD/data/fake_news_data/liar_dataset/train.tsv -------------------------------------------------------------------------------- /data/fake_news_data/liar_dataset/valid.tsv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/duttashi/text-analysis/HEAD/data/fake_news_data/liar_dataset/valid.tsv -------------------------------------------------------------------------------- /data/kaggle_spooky_authors/sample_submission.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/duttashi/text-analysis/HEAD/data/kaggle_spooky_authors/sample_submission.csv -------------------------------------------------------------------------------- /data/kaggle_spooky_authors/test.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/duttashi/text-analysis/HEAD/data/kaggle_spooky_authors/test.csv -------------------------------------------------------------------------------- /data/kaggle_spooky_authors/train.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/duttashi/text-analysis/HEAD/data/kaggle_spooky_authors/train.csv -------------------------------------------------------------------------------- /data/novelwordsonly.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/duttashi/text-analysis/HEAD/data/novelwordsonly.txt -------------------------------------------------------------------------------- /data/plainText/austen.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/duttashi/text-analysis/HEAD/data/plainText/austen.txt -------------------------------------------------------------------------------- /data/plainText/melville.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/duttashi/text-analysis/HEAD/data/plainText/melville.txt -------------------------------------------------------------------------------- /data/readme.md: -------------------------------------------------------------------------------- 1 | data files for text analysis are kept here 2 | -------------------------------------------------------------------------------- /data/text_data_for_analysis.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/duttashi/text-analysis/HEAD/data/text_data_for_analysis.txt -------------------------------------------------------------------------------- /figures/Rplot-wordcloud01.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/duttashi/text-analysis/HEAD/figures/Rplot-wordcloud01.png -------------------------------------------------------------------------------- /figures/readme.md: -------------------------------------------------------------------------------- 1 | All plots/figures live here 2 | -------------------------------------------------------------------------------- /resources/readme.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/duttashi/text-analysis/HEAD/resources/readme.md -------------------------------------------------------------------------------- /scripts/.gitignore: -------------------------------------------------------------------------------- 1 | /.ipynb_checkpoints/ 2 | -------------------------------------------------------------------------------- /scripts/R/HP_preprocess_01.R: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/duttashi/text-analysis/HEAD/scripts/R/HP_preprocess_01.R -------------------------------------------------------------------------------- /scripts/R/austen_text_analysis.R: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/duttashi/text-analysis/HEAD/scripts/R/austen_text_analysis.R -------------------------------------------------------------------------------- /scripts/R/dispersion_plots.R: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/duttashi/text-analysis/HEAD/scripts/R/dispersion_plots.R -------------------------------------------------------------------------------- /scripts/R/dracula_text_analysis.R: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/duttashi/text-analysis/HEAD/scripts/R/dracula_text_analysis.R -------------------------------------------------------------------------------- /scripts/R/expr_kaggle-reddit_EDA.R: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/duttashi/text-analysis/HEAD/scripts/R/expr_kaggle-reddit_EDA.R -------------------------------------------------------------------------------- /scripts/R/expr_kaggle-reddit_text-analysis.R: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/duttashi/text-analysis/HEAD/scripts/R/expr_kaggle-reddit_text-analysis.R -------------------------------------------------------------------------------- /scripts/R/initialScript-1.R: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/duttashi/text-analysis/HEAD/scripts/R/initialScript-1.R -------------------------------------------------------------------------------- /scripts/R/initialScript.R: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/duttashi/text-analysis/HEAD/scripts/R/initialScript.R -------------------------------------------------------------------------------- /scripts/R/mobydick_novel_text_analysis.R: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/duttashi/text-analysis/HEAD/scripts/R/mobydick_novel_text_analysis.R -------------------------------------------------------------------------------- /scripts/R/structural_topic_modeling_00.R: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/duttashi/text-analysis/HEAD/scripts/R/structural_topic_modeling_00.R -------------------------------------------------------------------------------- /scripts/R/text_analysis_example01.R: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/duttashi/text-analysis/HEAD/scripts/R/text_analysis_example01.R -------------------------------------------------------------------------------- /scripts/R/text_analysis_example02.R: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/duttashi/text-analysis/HEAD/scripts/R/text_analysis_example02.R -------------------------------------------------------------------------------- /scripts/R/token_distribution_analysis.R: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/duttashi/text-analysis/HEAD/scripts/R/token_distribution_analysis.R -------------------------------------------------------------------------------- /scripts/R/topic_modeling_00.R: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/duttashi/text-analysis/HEAD/scripts/R/topic_modeling_00.R -------------------------------------------------------------------------------- /scripts/R/topic_modeling_01.R: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/duttashi/text-analysis/HEAD/scripts/R/topic_modeling_01.R -------------------------------------------------------------------------------- /scripts/R/wuthering_heights_sentiment_analysis.R: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/duttashi/text-analysis/HEAD/scripts/R/wuthering_heights_sentiment_analysis.R -------------------------------------------------------------------------------- /scripts/R/wuthering_heights_text_analysis.R: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/duttashi/text-analysis/HEAD/scripts/R/wuthering_heights_text_analysis.R -------------------------------------------------------------------------------- /scripts/python/.gitignore: -------------------------------------------------------------------------------- 1 | /.ipynb_checkpoints/ 2 | -------------------------------------------------------------------------------- /scripts/python/00_topic_modelling_fundamentals.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/duttashi/text-analysis/HEAD/scripts/python/00_topic_modelling_fundamentals.ipynb -------------------------------------------------------------------------------- /scripts/python/00_topic_modelling_theoretical_concepts.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/duttashi/text-analysis/HEAD/scripts/python/00_topic_modelling_theoretical_concepts.ipynb -------------------------------------------------------------------------------- /scripts/python/01_topic_modelling_fundamentals.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/duttashi/text-analysis/HEAD/scripts/python/01_topic_modelling_fundamentals.ipynb -------------------------------------------------------------------------------- /scripts/python/Tutorial1-An introduction to NLP with SpaCy.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/duttashi/text-analysis/HEAD/scripts/python/Tutorial1-An introduction to NLP with SpaCy.ipynb -------------------------------------------------------------------------------- /scripts/python/check_pkgs.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /scripts/python/extract_table_from_pdf.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/duttashi/text-analysis/HEAD/scripts/python/extract_table_from_pdf.py -------------------------------------------------------------------------------- /scripts/python/extract_text_data_from_pdf.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/duttashi/text-analysis/HEAD/scripts/python/extract_text_data_from_pdf.py -------------------------------------------------------------------------------- /scripts/python/extract_text_data_from_pdf_2.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/duttashi/text-analysis/HEAD/scripts/python/extract_text_data_from_pdf_2.py -------------------------------------------------------------------------------- /scripts/python/extract_text_data_from_pdf_3.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/duttashi/text-analysis/HEAD/scripts/python/extract_text_data_from_pdf_3.py -------------------------------------------------------------------------------- /scripts/python/extract_text_data_from_textfiles.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/duttashi/text-analysis/HEAD/scripts/python/extract_text_data_from_textfiles.py -------------------------------------------------------------------------------- /scripts/python/func_text_preprocess.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/duttashi/text-analysis/HEAD/scripts/python/func_text_preprocess.py -------------------------------------------------------------------------------- /scripts/python/kaggle_hotel_review_rating.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/duttashi/text-analysis/HEAD/scripts/python/kaggle_hotel_review_rating.py -------------------------------------------------------------------------------- /scripts/python/kaggle_spooky_authors_ml_model.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/duttashi/text-analysis/HEAD/scripts/python/kaggle_spooky_authors_ml_model.ipynb -------------------------------------------------------------------------------- /scripts/python/learn_text_analysis.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/duttashi/text-analysis/HEAD/scripts/python/learn_text_analysis.py -------------------------------------------------------------------------------- /scripts/readme.md: -------------------------------------------------------------------------------- 1 | All code scripts live here 2 | --------------------------------------------------------------------------------