├── .gitignore ├── README.md ├── topics ├── applications_named_entity_recognition │ └── named_entity_recognition.pdf ├── applications_postagger │ └── hmm_crf_postagger.pdf ├── applications_space_correction │ ├── crfbased_korean_space_correction.pdf │ └── soyspacing_heuristic_korean_soace_correction.pdf ├── python_basic │ ├── Appendix0_scraping_naver_movie.ipynb │ ├── Appendix1_handling_excel-toy.ipynb │ ├── Python0_Dict_JSON_IO.ipynb │ ├── Python1_pickling.ipynb │ ├── Python2_list_comprehension_and_iteration.ipynb │ ├── Python3_slice_and_sorting.ipynb │ ├── Python4_yield_len_iter.ipynb │ ├── Python5_three_way_for_word_counting.ipynb │ ├── Python6_zip.ipynb │ ├── google_logo.png │ ├── lalaland_1.png │ ├── lalaland_2.png │ ├── texts.txt │ ├── toy.xls │ └── toy.xlsx ├── topic10_string_distance │ └── string_distance.pdf ├── topic11_graph │ ├── graph_ranking.pdf │ └── shortestpath_word_segmentation.pdf ├── topic12_cnn │ ├── cnn.pdf │ └── cnn_nlp.pdf ├── topic13_rnn │ └── rnn_and_attention.pdf ├── topic1_from_text_to_vector │ ├── from_text_to_vector.pdf │ ├── ngram.pdf │ └── tutorials │ │ ├── From_text_to_sparse_matrix.ipynb │ │ ├── KoNLPy.ipynb │ │ ├── ngram.ipynb │ │ └── userdic.txt ├── topic2_word_tokenizing │ ├── dictionary_based_pos_tagger.pdf │ ├── lemmatizer.pdf │ ├── noun_extractor.pdf │ └── word_extractor_unsupervised_tokenizer.pdf ├── topic3_classification │ ├── classifiers.pdf │ ├── from_logistic_to_crf.pdf │ └── logistic_and_lasso_regression.pdf ├── topic4_embedding │ ├── appendix_word_representation_using_pmi_svd.pdf │ ├── sparse_coding_and_nmf.pdf │ ├── topic_modeling.pdf │ ├── vector_visualization.pdf │ └── word_document_embedding.pdf ├── topic5_keywords_and_related_word_analysis │ └── cooccurrence_based_relations.pdf ├── topic6_clustering │ └── document_clustering.pdf ├── topic7_nearest_neighbors │ └── vector_indexing.pdf ├── topic_a1_visualization │ └── plotting_in_python.pdf └── tutorials_builing_your_konlpy │ └── building_your_KoNLPy.pdf └── tutorial_data ├── .gitignore ├── 2016-10-20_article_all_normed.zip ├── bow_for_keywords.pkl ├── data_loader.py └── lalaland_comments.zip /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lovit/textmining-tutorial/HEAD/.gitignore -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lovit/textmining-tutorial/HEAD/README.md -------------------------------------------------------------------------------- /topics/applications_named_entity_recognition/named_entity_recognition.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lovit/textmining-tutorial/HEAD/topics/applications_named_entity_recognition/named_entity_recognition.pdf -------------------------------------------------------------------------------- /topics/applications_postagger/hmm_crf_postagger.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lovit/textmining-tutorial/HEAD/topics/applications_postagger/hmm_crf_postagger.pdf -------------------------------------------------------------------------------- /topics/applications_space_correction/crfbased_korean_space_correction.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lovit/textmining-tutorial/HEAD/topics/applications_space_correction/crfbased_korean_space_correction.pdf -------------------------------------------------------------------------------- /topics/applications_space_correction/soyspacing_heuristic_korean_soace_correction.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lovit/textmining-tutorial/HEAD/topics/applications_space_correction/soyspacing_heuristic_korean_soace_correction.pdf -------------------------------------------------------------------------------- /topics/python_basic/Appendix0_scraping_naver_movie.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lovit/textmining-tutorial/HEAD/topics/python_basic/Appendix0_scraping_naver_movie.ipynb -------------------------------------------------------------------------------- /topics/python_basic/Appendix1_handling_excel-toy.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lovit/textmining-tutorial/HEAD/topics/python_basic/Appendix1_handling_excel-toy.ipynb -------------------------------------------------------------------------------- /topics/python_basic/Python0_Dict_JSON_IO.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lovit/textmining-tutorial/HEAD/topics/python_basic/Python0_Dict_JSON_IO.ipynb -------------------------------------------------------------------------------- /topics/python_basic/Python1_pickling.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lovit/textmining-tutorial/HEAD/topics/python_basic/Python1_pickling.ipynb -------------------------------------------------------------------------------- /topics/python_basic/Python2_list_comprehension_and_iteration.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lovit/textmining-tutorial/HEAD/topics/python_basic/Python2_list_comprehension_and_iteration.ipynb -------------------------------------------------------------------------------- /topics/python_basic/Python3_slice_and_sorting.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lovit/textmining-tutorial/HEAD/topics/python_basic/Python3_slice_and_sorting.ipynb -------------------------------------------------------------------------------- /topics/python_basic/Python4_yield_len_iter.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lovit/textmining-tutorial/HEAD/topics/python_basic/Python4_yield_len_iter.ipynb -------------------------------------------------------------------------------- /topics/python_basic/Python5_three_way_for_word_counting.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lovit/textmining-tutorial/HEAD/topics/python_basic/Python5_three_way_for_word_counting.ipynb -------------------------------------------------------------------------------- /topics/python_basic/Python6_zip.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lovit/textmining-tutorial/HEAD/topics/python_basic/Python6_zip.ipynb -------------------------------------------------------------------------------- /topics/python_basic/google_logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lovit/textmining-tutorial/HEAD/topics/python_basic/google_logo.png -------------------------------------------------------------------------------- /topics/python_basic/lalaland_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lovit/textmining-tutorial/HEAD/topics/python_basic/lalaland_1.png -------------------------------------------------------------------------------- /topics/python_basic/lalaland_2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lovit/textmining-tutorial/HEAD/topics/python_basic/lalaland_2.png -------------------------------------------------------------------------------- /topics/python_basic/texts.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lovit/textmining-tutorial/HEAD/topics/python_basic/texts.txt -------------------------------------------------------------------------------- /topics/python_basic/toy.xls: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lovit/textmining-tutorial/HEAD/topics/python_basic/toy.xls -------------------------------------------------------------------------------- /topics/python_basic/toy.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lovit/textmining-tutorial/HEAD/topics/python_basic/toy.xlsx -------------------------------------------------------------------------------- /topics/topic10_string_distance/string_distance.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lovit/textmining-tutorial/HEAD/topics/topic10_string_distance/string_distance.pdf -------------------------------------------------------------------------------- /topics/topic11_graph/graph_ranking.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lovit/textmining-tutorial/HEAD/topics/topic11_graph/graph_ranking.pdf -------------------------------------------------------------------------------- /topics/topic11_graph/shortestpath_word_segmentation.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lovit/textmining-tutorial/HEAD/topics/topic11_graph/shortestpath_word_segmentation.pdf -------------------------------------------------------------------------------- /topics/topic12_cnn/cnn.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lovit/textmining-tutorial/HEAD/topics/topic12_cnn/cnn.pdf -------------------------------------------------------------------------------- /topics/topic12_cnn/cnn_nlp.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lovit/textmining-tutorial/HEAD/topics/topic12_cnn/cnn_nlp.pdf -------------------------------------------------------------------------------- /topics/topic13_rnn/rnn_and_attention.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lovit/textmining-tutorial/HEAD/topics/topic13_rnn/rnn_and_attention.pdf -------------------------------------------------------------------------------- /topics/topic1_from_text_to_vector/from_text_to_vector.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lovit/textmining-tutorial/HEAD/topics/topic1_from_text_to_vector/from_text_to_vector.pdf -------------------------------------------------------------------------------- /topics/topic1_from_text_to_vector/ngram.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lovit/textmining-tutorial/HEAD/topics/topic1_from_text_to_vector/ngram.pdf -------------------------------------------------------------------------------- /topics/topic1_from_text_to_vector/tutorials/From_text_to_sparse_matrix.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lovit/textmining-tutorial/HEAD/topics/topic1_from_text_to_vector/tutorials/From_text_to_sparse_matrix.ipynb -------------------------------------------------------------------------------- /topics/topic1_from_text_to_vector/tutorials/KoNLPy.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lovit/textmining-tutorial/HEAD/topics/topic1_from_text_to_vector/tutorials/KoNLPy.ipynb -------------------------------------------------------------------------------- /topics/topic1_from_text_to_vector/tutorials/ngram.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lovit/textmining-tutorial/HEAD/topics/topic1_from_text_to_vector/tutorials/ngram.ipynb -------------------------------------------------------------------------------- /topics/topic1_from_text_to_vector/tutorials/userdic.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lovit/textmining-tutorial/HEAD/topics/topic1_from_text_to_vector/tutorials/userdic.txt -------------------------------------------------------------------------------- /topics/topic2_word_tokenizing/dictionary_based_pos_tagger.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lovit/textmining-tutorial/HEAD/topics/topic2_word_tokenizing/dictionary_based_pos_tagger.pdf -------------------------------------------------------------------------------- /topics/topic2_word_tokenizing/lemmatizer.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lovit/textmining-tutorial/HEAD/topics/topic2_word_tokenizing/lemmatizer.pdf -------------------------------------------------------------------------------- /topics/topic2_word_tokenizing/noun_extractor.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lovit/textmining-tutorial/HEAD/topics/topic2_word_tokenizing/noun_extractor.pdf -------------------------------------------------------------------------------- /topics/topic2_word_tokenizing/word_extractor_unsupervised_tokenizer.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lovit/textmining-tutorial/HEAD/topics/topic2_word_tokenizing/word_extractor_unsupervised_tokenizer.pdf -------------------------------------------------------------------------------- /topics/topic3_classification/classifiers.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lovit/textmining-tutorial/HEAD/topics/topic3_classification/classifiers.pdf -------------------------------------------------------------------------------- /topics/topic3_classification/from_logistic_to_crf.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lovit/textmining-tutorial/HEAD/topics/topic3_classification/from_logistic_to_crf.pdf -------------------------------------------------------------------------------- /topics/topic3_classification/logistic_and_lasso_regression.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lovit/textmining-tutorial/HEAD/topics/topic3_classification/logistic_and_lasso_regression.pdf -------------------------------------------------------------------------------- /topics/topic4_embedding/appendix_word_representation_using_pmi_svd.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lovit/textmining-tutorial/HEAD/topics/topic4_embedding/appendix_word_representation_using_pmi_svd.pdf -------------------------------------------------------------------------------- /topics/topic4_embedding/sparse_coding_and_nmf.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lovit/textmining-tutorial/HEAD/topics/topic4_embedding/sparse_coding_and_nmf.pdf -------------------------------------------------------------------------------- /topics/topic4_embedding/topic_modeling.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lovit/textmining-tutorial/HEAD/topics/topic4_embedding/topic_modeling.pdf -------------------------------------------------------------------------------- /topics/topic4_embedding/vector_visualization.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lovit/textmining-tutorial/HEAD/topics/topic4_embedding/vector_visualization.pdf -------------------------------------------------------------------------------- /topics/topic4_embedding/word_document_embedding.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lovit/textmining-tutorial/HEAD/topics/topic4_embedding/word_document_embedding.pdf -------------------------------------------------------------------------------- /topics/topic5_keywords_and_related_word_analysis/cooccurrence_based_relations.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lovit/textmining-tutorial/HEAD/topics/topic5_keywords_and_related_word_analysis/cooccurrence_based_relations.pdf -------------------------------------------------------------------------------- /topics/topic6_clustering/document_clustering.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lovit/textmining-tutorial/HEAD/topics/topic6_clustering/document_clustering.pdf -------------------------------------------------------------------------------- /topics/topic7_nearest_neighbors/vector_indexing.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lovit/textmining-tutorial/HEAD/topics/topic7_nearest_neighbors/vector_indexing.pdf -------------------------------------------------------------------------------- /topics/topic_a1_visualization/plotting_in_python.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lovit/textmining-tutorial/HEAD/topics/topic_a1_visualization/plotting_in_python.pdf -------------------------------------------------------------------------------- /topics/tutorials_builing_your_konlpy/building_your_KoNLPy.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lovit/textmining-tutorial/HEAD/topics/tutorials_builing_your_konlpy/building_your_KoNLPy.pdf -------------------------------------------------------------------------------- /tutorial_data/.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lovit/textmining-tutorial/HEAD/tutorial_data/.gitignore -------------------------------------------------------------------------------- /tutorial_data/2016-10-20_article_all_normed.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lovit/textmining-tutorial/HEAD/tutorial_data/2016-10-20_article_all_normed.zip -------------------------------------------------------------------------------- /tutorial_data/bow_for_keywords.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lovit/textmining-tutorial/HEAD/tutorial_data/bow_for_keywords.pkl -------------------------------------------------------------------------------- /tutorial_data/data_loader.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lovit/textmining-tutorial/HEAD/tutorial_data/data_loader.py -------------------------------------------------------------------------------- /tutorial_data/lalaland_comments.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lovit/textmining-tutorial/HEAD/tutorial_data/lalaland_comments.zip --------------------------------------------------------------------------------