├── .DS_Store ├── .gitignore ├── Algorithm ├── .ipynb_checkpoints │ ├── HelloCoding_Algo-checkpoint.ipynb │ └── algo_basic-checkpoint.ipynb ├── 06-01_calc.py ├── Algo_basic.py ├── HelloCoding_Algo.ipynb ├── IsPrime.py ├── algo_basic.ipynb ├── binary_search_1.py ├── euler_prob1.py ├── graph_algo.py └── selection_sort_2.py ├── Chatbot ├── .ipynb_checkpoints │ ├── qa_chatbot-checkpoint.ipynb │ └── seq2seq-checkpoint.ipynb ├── Slack_Bot │ ├── .Rhistory │ ├── __init__.py │ ├── __pycache__ │ │ └── mcbot_chat.cpython-35.pyc │ ├── data │ │ ├── 29일간의실종.txt │ │ ├── desktop.ini │ │ ├── 게리온의무리들.txt │ │ ├── 게임의종말.txt │ │ ├── 경찰청사람들.txt │ │ ├── 사랑과욕망의덫.txt │ │ ├── 연산군.txt │ │ ├── 욕망이타는숲.txt │ │ ├── 위대한개츠비.txt │ │ └── 황야의이리.txt │ ├── lstm_bot.py │ ├── markov-toji.json │ ├── markov_chain_bot.py │ ├── mcbot_chat.py │ ├── modubot.py │ ├── print_bot_id.py │ ├── toji.model │ ├── toji.wakati │ └── toji2.json ├── __init__.py ├── __pycache__ │ └── helpers.cpython-36.pyc ├── helpers.py ├── seq2seq.ipynb └── seq2seq_tut.py ├── Dataset └── dataset.md ├── ETC ├── Lec1.txt ├── Lec4.txt └── Subtitle_tools.ipynb ├── HTML └── code_academy.html ├── Kaggle ├── Quora │ └── Quora_shin.ipynb └── What_Cooking │ └── Cooking.ipynb ├── Keras_Basic ├── .ipynb_checkpoints │ ├── Keras_tutorial_imdb_text_classification-checkpoint.ipynb │ └── Keras_tutorial_text_generation-checkpoint.ipynb ├── Keras_Cheat_Sheet_Python.pdf ├── Keras_basic_fin.py ├── Keras_classification_basic.py ├── Keras_fine_tuning_basic.py ├── Keras_tutorial_imdb_text_classification.ipynb └── Keras_tutorial_text_generation.ipynb ├── Machine_Comprehension ├── Attention_Keras │ ├── .Rhistory │ ├── Attention_Keras_QA_Model.py │ ├── CNNQA_architecture.json │ ├── CNNQA_weights.h5.7z │ ├── Glove.py │ ├── KerasQA.ods │ └── embedding_data.h5 ├── DMN_QA │ ├── DynamicMemoryNetwork.pdf │ ├── bAbi.pdf │ ├── dataset │ │ └── babi_tasks_1-20_v1-2.tar.gz │ ├── image │ │ ├── algo_process1.png │ │ ├── algo_process2.png │ │ ├── algo_process3.png │ │ └── babi_dataset.png │ └── qa_chatbot.ipynb └── Readme_MC.md ├── Math_Stat ├── .Rhistory ├── .ipynb_checkpoints │ ├── ML_Basic_Siraj Raval-checkpoint.ipynb │ └── support_vector_machine_lesson-checkpoint.ipynb ├── ML_Basic_Siraj Raval.ipynb ├── Readme.md ├── data.csv └── support_vector_machine_lesson.ipynb ├── Natural Language Generation └── lstm_keras_generation.py ├── Python ├── .ipynb_checkpoints │ ├── Python_Data_Science-Matplotlib-checkpoint.ipynb │ ├── Python_Data_science_toolbox_part1-checkpoint.ipynb │ └── Python_Review-checkpoint.ipynb ├── Cheat_Sheet.py ├── Decorator.py ├── OOP.ipynb ├── Python_Data_Science-Matplotlib.ipynb ├── Python_Data_science_toolbox_part1.ipynb ├── Python_Review.ipynb ├── Visualization │ ├── .ipynb_checkpoints │ │ └── Bokeh-checkpoint.ipynb │ └── Bokeh.ipynb └── attribute.py ├── Quora_insincere ├── .gitignore ├── README.md ├── jupyter_examples │ ├── Data_Prepro.ipynb │ ├── Modeling.ipynb │ ├── data_preprocessing.py │ ├── lstm_kernel_shin.ipynb │ ├── lstm_kernel_simple.ipynb │ ├── test_kernel1.ipynb │ └── test_kernel_ryan.ipynb └── lstm.py ├── Readme.md ├── Tensorflow ├── .gitignore ├── 04_word2vec_eager.py ├── Chatbot_Attention.ipynb ├── TF_README.md ├── nmt_with_attention.ipynb ├── nmt_with_attention_chatbot_kor.ipynb ├── standford_example │ ├── 02_lazy_loading.py │ ├── 02_placeholder.py │ ├── 02_simple_tf.py │ ├── 02_variables.py │ ├── 03_linreg_dataset.py │ ├── 03_linreg_placeholder.py │ ├── 03_linreg_starter.py │ ├── 03_logreg.py │ ├── 03_logreg_placeholder.py │ ├── 03_logreg_starter.py │ ├── 04_linreg_eager.py │ ├── 04_linreg_eager_starter.py │ ├── 04_word2vec.py │ ├── 04_word2vec_eager.py │ ├── 04_word2vec_eager_starter.py │ ├── 04_word2vec_visualize.py │ ├── 05_randomization.py │ ├── 05_variable_sharing.py │ ├── 07_convnet_layers.py │ ├── 07_convnet_mnist.py │ ├── 07_convnet_mnist_starter.py │ ├── 07_run_kernels.py │ ├── 11_char_rnn.py │ ├── kernels.py │ └── word2vec_utils.py └── tf_eagar(Define by run) 튜토리얼.ipynb ├── Text_Classification ├── .DS_Store ├── .ipynb_checkpoints │ ├── Bag of Words Meets Bags of Popcorn-checkpoint.ipynb │ ├── bagofwords_text_classficiation_kaggle-checkpoint.ipynb │ ├── cnn_textclassification_keras-checkpoint.ipynb │ └── seq2seq_keras-checkpoint.ipynb ├── To_Do │ ├── cnn_keras_text_class_kor.py │ ├── hierachical_attention_keras_text_class_eng.py │ ├── lstm_keras_text_class_eng.py │ └── rnn_attention_keras_text_class_eng .py ├── bagofwords_text_classficiation_kaggle.ipynb ├── char_text_classification_keras.py ├── cnn_keras_text_class_imdb.py ├── cnn_keras_text_class_imdb2.py ├── cnn_keras_text_class_imdb2_korean.py ├── cnn_keras_text_class_kaggle_eng.py ├── cnn_pytorch_text_class_kaggle_eng.py ├── nets │ ├── __pycache__ │ │ └── text_cnn.cpython-35.pyc │ └── text_cnn.py └── seq2seq_keras.ipynb ├── VAE └── vae_sample.py ├── dataset └── .DS_Store └── pytorch_basic ├── .ipynb_checkpoints ├── 10.pytorch_rnn-checkpoint.ipynb ├── Pytorch_basic-checkpoint.ipynb └── sec 6. Linear regression wih Python-checkpoint.ipynb ├── 10.pytorch_rnn.ipynb ├── Pytorch Seq2Seq.ipynb ├── Pytorch.ipynb ├── Pytorch_basic.ipynb ├── Pytorch_mnist.ipynb ├── Start_Pytorch.ipynb ├── cnn_cifar10_pytorch.py ├── cnn_text_pytorch.py ├── pytorch_basic.py ├── pytorch_nlp.py ├── pytorch_nlp2.py ├── pytorch_nlp3.py ├── pytorch_seq2seq(LSTM).py ├── sec 6. Linear regression wih Python.ipynb ├── seq2seq_models.py └── text_loader.py /.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rainmaker712/nlp_ryan/108ce890659ed29d4a143e41e5546f613aa878ca/.DS_Store -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Created by .ignore support plugin (hsz.mobi) 2 | ### JetBrains template 3 | # Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio and Webstorm 4 | # Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839 5 | 6 | # User-specific stuff: 7 | .idea/**/workspace.xml 8 | .idea/**/tasks.xml 9 | .idea/dictionaries 10 | 11 | # Sensitive or high-churn files: 12 | .idea/**/dataSources/ 13 | .idea/**/dataSources.ids 14 | .idea/**/dataSources.xml 15 | .idea/**/dataSources.local.xml 16 | .idea/**/sqlDataSources.xml 17 | .idea/**/dynamic.xml 18 | .idea/**/uiDesigner.xml 19 | 20 | # Gradle: 21 | .idea/**/gradle.xml 22 | .idea/**/libraries 23 | 24 | # CMake 25 | cmake-build-debug/ 26 | 27 | # Mongo Explorer plugin: 28 | .idea/**/mongoSettings.xml 29 | 30 | ## File-based project format: 31 | *.iws 32 | 33 | ## Plugin-specific files: 34 | 35 | # IntelliJ 36 | out/ 37 | 38 | # mpeltonen/sbt-idea plugin 39 | .idea_modules/ 40 | 41 | # JIRA plugin 42 | atlassian-ide-plugin.xml 43 | 44 | # Cursive Clojure plugin 45 | .idea/replstate.xml 46 | 47 | # Crashlytics plugin (for Android Studio and IntelliJ) 48 | com_crashlytics_export_strings.xml 49 | crashlytics.properties 50 | crashlytics-build.properties 51 | fabric.properties 52 | ### Python template 53 | # Byte-compiled / optimized / DLL files 54 | __pycache__/ 55 | *.py[cod] 56 | *$py.class 57 | 58 | # C extensions 59 | *.so 60 | 61 | # Distribution / packaging 62 | .Python 63 | build/ 64 | develop-eggs/ 65 | dist/ 66 | downloads/ 67 | eggs/ 68 | .eggs/ 69 | lib/ 70 | lib64/ 71 | parts/ 72 | sdist/ 73 | var/ 74 | wheels/ 75 | *.egg-info/ 76 | .installed.cfg 77 | *.egg 78 | MANIFEST 79 | 80 | # PyInstaller 81 | # Usually these files are written by a python script from a template 82 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 83 | *.manifest 84 | *.spec 85 | 86 | # Installer logs 87 | pip-log.txt 88 | pip-delete-this-directory.txt 89 | 90 | # Unit test / coverage reports 91 | htmlcov/ 92 | .tox/ 93 | .coverage 94 | .coverage.* 95 | .cache 96 | nosetests.xml 97 | coverage.xml 98 | *.cover 99 | .hypothesis/ 100 | 101 | # Translations 102 | *.mo 103 | *.pot 104 | 105 | # Django stuff: 106 | *.log 107 | .static_storage/ 108 | .media/ 109 | local_settings.py 110 | 111 | # Flask stuff: 112 | instance/ 113 | .webassets-cache 114 | 115 | # Scrapy stuff: 116 | .scrapy 117 | 118 | # Sphinx documentation 119 | docs/_build/ 120 | 121 | # PyBuilder 122 | target/ 123 | 124 | # Jupyter Notebook 125 | .ipynb_checkpoints 126 | 127 | # pyenv 128 | .python-version 129 | 130 | # celery beat schedule file 131 | celerybeat-schedule 132 | 133 | # SageMath parsed files 134 | *.sage.py 135 | 136 | # Environments 137 | .env 138 | .venv 139 | env/ 140 | venv/ 141 | ENV/ 142 | env.bak/ 143 | venv.bak/ 144 | 145 | # Spyder project settings 146 | .spyderproject 147 | .spyproject 148 | 149 | # Rope project settings 150 | .ropeproject 151 | 152 | # mkdocs documentation 153 | /site 154 | 155 | # mypy 156 | .mypy_cache/ 157 | ### macOS template 158 | # General 159 | .DS_Store 160 | .AppleDouble 161 | .LSOverride 162 | 163 | # Icon must end with two \r 164 | Icon 165 | 166 | # Thumbnails 167 | ._* 168 | 169 | # Files that might appear in the root of a volume 170 | .DocumentRevisions-V100 171 | .fseventsd 172 | .Spotlight-V100 173 | .TemporaryItems 174 | .Trashes 175 | .VolumeIcon.icns 176 | .com.apple.timemachine.donotpresent 177 | 178 | # Directories potentially created on remote AFP share 179 | .AppleDB 180 | .AppleDesktop 181 | Network Trash Folder 182 | Temporary Items 183 | .apdisk 184 | 185 | .idea/ 186 | data_in/ 187 | data_out/* 188 | checkpoint/ 189 | logs/ 190 | OLD/ 191 | practice/ 192 | scala_data_pre/ 193 | target/ 194 | .vscode/ 195 | .ipynb_checkpoints/ 196 | .DS_Store 197 | .DS_Store* 198 | my_test_model/ 199 | result/ 200 | blog_origin.pkl 201 | -------------------------------------------------------------------------------- /Algorithm/.ipynb_checkpoints/HelloCoding_Algo-checkpoint.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# 1.원래 저장위치에서 문자열을 역순으로 변환하기\n", 8 | "\n", 9 | "문자열 역순 변환\n", 10 | "- 문자열의 길이 알아내기\n", 11 | "- 문자열의 첫번째문자와 마지막 문자 교환\n", 12 | "- 문자열의 두번째 문자와 마지막 문자 -1을 서로 교환\n", 13 | "\n", 14 | "위의 절차를 반복 (유니코드 한글에 대한 주의)" 15 | ] 16 | }, 17 | { 18 | "cell_type": "code", 19 | "execution_count": 1, 20 | "metadata": { 21 | "collapsed": true 22 | }, 23 | "outputs": [], 24 | "source": [ 25 | "#문자열 뒤집기 알고리즘\n", 26 | "\n", 27 | "#Sample String\n", 28 | "Sam_string = 'ABCD'" 29 | ] 30 | }, 31 | { 32 | "cell_type": "code", 33 | "execution_count": 3, 34 | "metadata": { 35 | "collapsed": false 36 | }, 37 | "outputs": [ 38 | { 39 | "data": { 40 | "text/plain": [ 41 | "'DCBA'" 42 | ] 43 | }, 44 | "execution_count": 3, 45 | "metadata": {}, 46 | "output_type": "execute_result" 47 | } 48 | ], 49 | "source": [ 50 | "#Python은 심플하게 끝남\n", 51 | "\n", 52 | "def reverseString(str):\n", 53 | " return str[::-1]\n", 54 | "\n", 55 | "reverseString(Sam_string)" 56 | ] 57 | }, 58 | { 59 | "cell_type": "markdown", 60 | "metadata": {}, 61 | "source": [ 62 | "# 스택 (접착지 메모)\n", 63 | "\n", 64 | "- 스택에는 푸시와 팝이라는 두가지 연산\n", 65 | "- 모든 함수 호출은 호출 스택을 사용\n", 66 | "- 호출 스택은 너무 켜져 메모리를 크게 소모 할 수 도 있음" 67 | ] 68 | }, 69 | { 70 | "cell_type": "code", 71 | "execution_count": 9, 72 | "metadata": { 73 | "collapsed": false 74 | }, 75 | "outputs": [ 76 | { 77 | "data": { 78 | "text/plain": [ 79 | "'D'" 80 | ] 81 | }, 82 | "execution_count": 9, 83 | "metadata": {}, 84 | "output_type": "execute_result" 85 | } 86 | ], 87 | "source": [ 88 | "#Stack을 사용해 보자\n", 89 | "#Sam_string\n", 90 | "# 문자열의 길이 알아내기\n", 91 | "# 문자열의 첫번째문자와 마지막 문자 교환\n", 92 | "# 문자열의 두번째 문자와 마지막 문자 -1을 서로 교환\n", 93 | "\n", 94 | "len(Sam_string)\n", 95 | "\n", 96 | "Sam_string[0]\n", 97 | "Sam_string[-1]\n" 98 | ] 99 | }, 100 | { 101 | "cell_type": "code", 102 | "execution_count": 16, 103 | "metadata": { 104 | "collapsed": false 105 | }, 106 | "outputs": [], 107 | "source": [ 108 | "def reverseString2(str):\n", 109 | " stack = []\n", 110 | " for ch in str:\n", 111 | " stack.append(ch)\n", 112 | " \n", 113 | " result = \"\"\n", 114 | " while len(stack) > 0:\n", 115 | " result += stack.pop()\n", 116 | " \n", 117 | " return result\n", 118 | "\n" 119 | ] 120 | }, 121 | { 122 | "cell_type": "markdown", 123 | "metadata": {}, 124 | "source": [ 125 | "# 선택 정렬" 126 | ] 127 | }, 128 | { 129 | "cell_type": "code", 130 | "execution_count": null, 131 | "metadata": { 132 | "collapsed": true 133 | }, 134 | "outputs": [], 135 | "source": [] 136 | } 137 | ], 138 | "metadata": { 139 | "anaconda-cloud": {}, 140 | "kernelspec": { 141 | "display_name": "Python [conda root]", 142 | "language": "python", 143 | "name": "conda-root-py" 144 | }, 145 | "language_info": { 146 | "codemirror_mode": { 147 | "name": "ipython", 148 | "version": 3 149 | }, 150 | "file_extension": ".py", 151 | "mimetype": "text/x-python", 152 | "name": "python", 153 | "nbconvert_exporter": "python", 154 | "pygments_lexer": "ipython3", 155 | "version": "3.5.2" 156 | } 157 | }, 158 | "nbformat": 4, 159 | "nbformat_minor": 1 160 | } 161 | -------------------------------------------------------------------------------- /Algorithm/.ipynb_checkpoints/algo_basic-checkpoint.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# 1. Bubble Sort\n", 8 | "\n", 9 | "* performance: O(n^2)\n", 10 | "* space complexity O(1)\n", 11 | "\n", 12 | "Procedure: \n", 13 | "Loop1 \n", 14 | "6,5,3,1 / 5,6,3,1 / 5,3,6,1 /5,3,1,6 \n", 15 | "Loop2 \n", 16 | "3,5,1,6 / 3,1,5,6 / 3,1,5,6 \n", 17 | "Loop3 \n", 18 | "1,3,5,6 " 19 | ] 20 | }, 21 | { 22 | "cell_type": "code", 23 | "execution_count": 2, 24 | "metadata": { 25 | "collapsed": false 26 | }, 27 | "outputs": [ 28 | { 29 | "name": "stdout", 30 | "output_type": "stream", 31 | "text": [ 32 | "[1, 2, 3, 4, 5, 6]\n" 33 | ] 34 | } 35 | ], 36 | "source": [ 37 | "import unittest\n", 38 | "\n", 39 | "def bubblesort(alist):\n", 40 | " for i in range(len(alist)-1):\n", 41 | " for j in range(len(alist)-1):\n", 42 | " if alist[j] > alist[j+1]:\n", 43 | " alist[j], alist[j+1] = alist[j+1], alist[j]\n", 44 | " return alist\n", 45 | " \n", 46 | "sort = [4,6,1,3,5,2]\n", 47 | "print(bubblesort(sort))\n", 48 | "\n", 49 | "class unit_test(unittest.TestCase):\n", 50 | " def test(self):\n", 51 | " self.assertEqual([1, 2, 3, 4, 5, 6], bubblesort([4, 6, 1, 3, 5, 2]))\n", 52 | " self.assertEqual([1, 2, 3, 4, 5, 6], bubblesort([6, 4, 3, 1, 2, 5]))\n", 53 | " self.assertEqual([1, 2, 3, 4, 5, 6], bubblesort([6, 5, 4, 3, 2, 1]))" 54 | ] 55 | }, 56 | { 57 | "cell_type": "markdown", 58 | "metadata": {}, 59 | "source": [ 60 | "# 2. Selection Sort\n", 61 | "\n", 62 | "* Worst case performance: O(n^2)\n", 63 | "* Best Case perform: O(n^2)\n", 64 | "* Avg. Case perform: O(n^2)\n", 65 | "* Worst case space complexity: O(n) total, O(1) auxilary\n", 66 | "\n", 67 | "Procedure: \n", 68 | "4,6,1,3,5,2 \n", 69 | "Min: 4 (첫 번째 포인트) \n", 70 | "4,6,1,3,5,2 \n", 71 | "Min: 1 \n", 72 | "\n", 73 | "1,6,4,3,5,2 \n", 74 | "Min: 6 (두 번째 포인트) \n", 75 | "1,6,4,3,5,2 \n", 76 | "Min: 2 \n", 77 | "\n", 78 | "1,2,4,3,5,6 \n", 79 | "Min: 4 (세 번째 포인트) " 80 | ] 81 | }, 82 | { 83 | "cell_type": "markdown", 84 | "metadata": {}, 85 | "source": [ 86 | "# 3. Insertion Sort\n", 87 | "\n", 88 | "쉽지만 성능이 낮음\n", 89 | "\n", 90 | "Procedure: \n", 91 | "4,6,1,3,5,2 \n", 92 | "\n", 93 | "4,6 \n", 94 | "4,1,6 \n", 95 | "1,4,6 \n", 96 | "\n", 97 | "1,4,6,3 \n", 98 | "1,4,3,6 \n", 99 | "1,3,4,6 \n" 100 | ] 101 | }, 102 | { 103 | "cell_type": "markdown", 104 | "metadata": {}, 105 | "source": [ 106 | "# 4. Merge Sort\n", 107 | "\n", 108 | "Perform: O(nlogn)\n", 109 | "space complex: O(n)\n", 110 | "\n", 111 | "1. 정렬되지 않은 리스트를 지속적으로 쪼갠다\n", 112 | "2. 정렬된 아이템과 병합한다.\n", 113 | "\n", 114 | "Procedure: \n", 115 | "6,2,4,1,3,6,5,8\n", 116 | "\n", 117 | "Step1: \n", 118 | "6,2,4,1 / 3,7,5,8 \n", 119 | "* 6,2,4,1\n", 120 | "\n", 121 | "6,2 / 4,1 \n", 122 | "6|2 / 4|1\n", 123 | "\n", 124 | "Step2:\n", 125 | "2|6 -> 2,6 -> 1|4 -> 1,4\n", 126 | "2,6 / 1,4 -> 1,2,4,6\n", 127 | "\n", 128 | "Step3:\n", 129 | "3/7 / 5/8\n", 130 | "\n" 131 | ] 132 | } 133 | ], 134 | "metadata": { 135 | "anaconda-cloud": {}, 136 | "kernelspec": { 137 | "display_name": "Python [conda root]", 138 | "language": "python", 139 | "name": "conda-root-py" 140 | }, 141 | "language_info": { 142 | "codemirror_mode": { 143 | "name": "ipython", 144 | "version": 3 145 | }, 146 | "file_extension": ".py", 147 | "mimetype": "text/x-python", 148 | "name": "python", 149 | "nbconvert_exporter": "python", 150 | "pygments_lexer": "ipython3", 151 | "version": "3.5.2" 152 | } 153 | }, 154 | "nbformat": 4, 155 | "nbformat_minor": 1 156 | } 157 | -------------------------------------------------------------------------------- /Algorithm/06-01_calc.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on Sat Jul 29 16:01:08 2017 5 | 6 | @author: ryan 7 | 8 | #탑코더 06 알고리즘 9 | """ 10 | 11 | A = [] 12 | B = [] 13 | C = [] 14 | 15 | n = 5 16 | m = 5 17 | 18 | def calc(n, m): 19 | A 20 | for i in range(n): 21 | B 22 | i += 1 23 | print("count {} times B called".format(i)) 24 | for j in range(m): 25 | C 26 | j += 1 27 | print("count {} times C called".format(j)) 28 | 29 | 30 | calc(n, m) 31 | 32 | #계산량은 O(nm) 33 | #Top Coder 에서는 10^7 까지는 괜찮지만 10^8을 넘으면 안됨 34 | -------------------------------------------------------------------------------- /Algorithm/Algo_basic.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on Mon Jun 12 00:08:58 2017 5 | 6 | @author: ryan 7 | """ 8 | 9 | """ 10 | Bubble Sort 11 | performance: O(n^2) 12 | space complexity O(1) 13 | 14 | Procedure: 15 | Loop1 16 | 6,5,3,1 / 5,6,3,1 / 5,3,6,1 /5,3,1,6 17 | Loop2 18 | 3,5,1,6 / 3,1,5,6 / 3,1,5,6 19 | Loop3 20 | 1,3,5,6 21 | """ 22 | 23 | import unittest 24 | 25 | def bubblesort(alist): 26 | for i in range(len(alist)-1): 27 | for j in range(len(alist)-1): 28 | if alist[j] > alist[j+1]: 29 | alist[j], alist[j+1] = alist[j+1], alist[j] 30 | return alist 31 | 32 | sort = [4,6,1,3,5,2] 33 | bubblesort(sort) 34 | 35 | class unit_test(unittest.TestCase): 36 | def test(self): 37 | self.assertEqual([1, 2, 3, 4, 5, 6], bubblesort([4, 6, 1, 3, 5, 2])) 38 | self.assertEqual([1, 2, 3, 4, 5, 6], bubblesort([6, 4, 3, 1, 2, 5])) 39 | self.assertEqual([1, 2, 3, 4, 5, 6], bubblesort([6, 5, 4, 3, 2, 1])) 40 | 41 | 42 | -------------------------------------------------------------------------------- /Algorithm/HelloCoding_Algo.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# 1.원래 저장위치에서 문자열을 역순으로 변환하기\n", 8 | "\n", 9 | "문자열 역순 변환\n", 10 | "- 문자열의 길이 알아내기\n", 11 | "- 문자열의 첫번째문자와 마지막 문자 교환\n", 12 | "- 문자열의 두번째 문자와 마지막 문자 -1을 서로 교환\n", 13 | "\n", 14 | "위의 절차를 반복 (유니코드 한글에 대한 주의)" 15 | ] 16 | }, 17 | { 18 | "cell_type": "code", 19 | "execution_count": 1, 20 | "metadata": { 21 | "collapsed": true 22 | }, 23 | "outputs": [], 24 | "source": [ 25 | "#문자열 뒤집기 알고리즘\n", 26 | "\n", 27 | "#Sample String\n", 28 | "Sam_string = 'ABCD'" 29 | ] 30 | }, 31 | { 32 | "cell_type": "code", 33 | "execution_count": 3, 34 | "metadata": { 35 | "collapsed": false 36 | }, 37 | "outputs": [ 38 | { 39 | "data": { 40 | "text/plain": [ 41 | "'DCBA'" 42 | ] 43 | }, 44 | "execution_count": 3, 45 | "metadata": {}, 46 | "output_type": "execute_result" 47 | } 48 | ], 49 | "source": [ 50 | "#Python은 심플하게 끝남\n", 51 | "\n", 52 | "def reverseString(str):\n", 53 | " return str[::-1]\n", 54 | "\n", 55 | "reverseString(Sam_string)" 56 | ] 57 | }, 58 | { 59 | "cell_type": "markdown", 60 | "metadata": {}, 61 | "source": [ 62 | "# 스택 (접착지 메모)\n", 63 | "\n", 64 | "- 스택에는 푸시와 팝이라는 두가지 연산\n", 65 | "- 모든 함수 호출은 호출 스택을 사용\n", 66 | "- 호출 스택은 너무 켜져 메모리를 크게 소모 할 수 도 있음" 67 | ] 68 | }, 69 | { 70 | "cell_type": "code", 71 | "execution_count": 9, 72 | "metadata": { 73 | "collapsed": false 74 | }, 75 | "outputs": [ 76 | { 77 | "data": { 78 | "text/plain": [ 79 | "'D'" 80 | ] 81 | }, 82 | "execution_count": 9, 83 | "metadata": {}, 84 | "output_type": "execute_result" 85 | } 86 | ], 87 | "source": [ 88 | "#Stack을 사용해 보자\n", 89 | "#Sam_string\n", 90 | "# 문자열의 길이 알아내기\n", 91 | "# 문자열의 첫번째문자와 마지막 문자 교환\n", 92 | "# 문자열의 두번째 문자와 마지막 문자 -1을 서로 교환\n", 93 | "\n", 94 | "len(Sam_string)\n", 95 | "\n", 96 | "Sam_string[0]\n", 97 | "Sam_string[-1]\n" 98 | ] 99 | }, 100 | { 101 | "cell_type": "code", 102 | "execution_count": 16, 103 | "metadata": { 104 | "collapsed": false 105 | }, 106 | "outputs": [], 107 | "source": [ 108 | "def reverseString2(str):\n", 109 | " stack = []\n", 110 | " for ch in str:\n", 111 | " stack.append(ch)\n", 112 | " \n", 113 | " result = \"\"\n", 114 | " while len(stack) > 0:\n", 115 | " result += stack.pop()\n", 116 | " \n", 117 | " return result\n", 118 | "\n" 119 | ] 120 | }, 121 | { 122 | "cell_type": "markdown", 123 | "metadata": {}, 124 | "source": [ 125 | "# 선택 정렬" 126 | ] 127 | }, 128 | { 129 | "cell_type": "code", 130 | "execution_count": null, 131 | "metadata": { 132 | "collapsed": true 133 | }, 134 | "outputs": [], 135 | "source": [] 136 | }, 137 | { 138 | "cell_type": "markdown", 139 | "metadata": {}, 140 | "source": [ 141 | "## 1.1 문자열에 포함된 문자들이 전부 유일한지 검사 하는 알고리즘\n", 142 | "\n", 143 | "https://www.youtube.com/watch?v=piDwgBqmqKM&list=PLVNY1HnUlO24RlncfRjfoZHnD0YWVsvhq" 144 | ] 145 | }, 146 | { 147 | "cell_type": "code", 148 | "execution_count": 1, 149 | "metadata": { 150 | "collapsed": true 151 | }, 152 | "outputs": [], 153 | "source": [ 154 | "test1 = 'ABCD' #True\n", 155 | "test2 = 'ABAD' #False" 156 | ] 157 | }, 158 | { 159 | "cell_type": "code", 160 | "execution_count": null, 161 | "metadata": { 162 | "collapsed": true 163 | }, 164 | "outputs": [], 165 | "source": [ 166 | "fkdls;s;skfkfld;sfksld;sjflldldls;a'fkdls;dldldldldls;s;dldlfjdk'" 167 | ] 168 | } 169 | ], 170 | "metadata": { 171 | "anaconda-cloud": {}, 172 | "kernelspec": { 173 | "display_name": "Python [conda root]", 174 | "language": "python", 175 | "name": "conda-root-py" 176 | }, 177 | "language_info": { 178 | "codemirror_mode": { 179 | "name": "ipython", 180 | "version": 3 181 | }, 182 | "file_extension": ".py", 183 | "mimetype": "text/x-python", 184 | "name": "python", 185 | "nbconvert_exporter": "python", 186 | "pygments_lexer": "ipython3", 187 | "version": "3.5.2" 188 | } 189 | }, 190 | "nbformat": 4, 191 | "nbformat_minor": 1 192 | } 193 | -------------------------------------------------------------------------------- /Algorithm/IsPrime.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on Sun Jun 18 00:22:29 2017 5 | 6 | @author: ryan 7 | """ 8 | 9 | #Check whether Prime number or not 10 | 11 | def isPrime(num): 12 | if num > 0: 13 | 14 | if (num % 2) != 0: 15 | print("{} is prime num".format(num)) 16 | else: 17 | print("{} is not prime num".format(num)) 18 | 19 | else: 20 | print("input value must be greater than zero") 21 | 22 | a = -3 23 | 24 | isPrime(a) -------------------------------------------------------------------------------- /Algorithm/algo_basic.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# 1. Bubble Sort\n", 8 | "\n", 9 | "* performance: O(n^2)\n", 10 | "* space complexity O(1)\n", 11 | "\n", 12 | "Procedure: \n", 13 | "Loop1 \n", 14 | "6,5,3,1 / 5,6,3,1 / 5,3,6,1 /5,3,1,6 \n", 15 | "Loop2 \n", 16 | "3,5,1,6 / 3,1,5,6 / 3,1,5,6 \n", 17 | "Loop3 \n", 18 | "1,3,5,6 " 19 | ] 20 | }, 21 | { 22 | "cell_type": "code", 23 | "execution_count": 2, 24 | "metadata": { 25 | "collapsed": false 26 | }, 27 | "outputs": [ 28 | { 29 | "name": "stdout", 30 | "output_type": "stream", 31 | "text": [ 32 | "[1, 2, 3, 4, 5, 6]\n" 33 | ] 34 | } 35 | ], 36 | "source": [ 37 | "import unittest\n", 38 | "\n", 39 | "def bubblesort(alist):\n", 40 | " for i in range(len(alist)-1):\n", 41 | " for j in range(len(alist)-1):\n", 42 | " if alist[j] > alist[j+1]:\n", 43 | " alist[j], alist[j+1] = alist[j+1], alist[j]\n", 44 | " return alist\n", 45 | " \n", 46 | "sort = [4,6,1,3,5,2]\n", 47 | "print(bubblesort(sort))\n", 48 | "\n", 49 | "class unit_test(unittest.TestCase):\n", 50 | " def test(self):\n", 51 | " self.assertEqual([1, 2, 3, 4, 5, 6], bubblesort([4, 6, 1, 3, 5, 2]))\n", 52 | " self.assertEqual([1, 2, 3, 4, 5, 6], bubblesort([6, 4, 3, 1, 2, 5]))\n", 53 | " self.assertEqual([1, 2, 3, 4, 5, 6], bubblesort([6, 5, 4, 3, 2, 1]))" 54 | ] 55 | }, 56 | { 57 | "cell_type": "markdown", 58 | "metadata": {}, 59 | "source": [ 60 | "# 2. Selection Sort\n", 61 | "\n", 62 | "* Worst case performance: O(n^2)\n", 63 | "* Best Case perform: O(n^2)\n", 64 | "* Avg. Case perform: O(n^2)\n", 65 | "* Worst case space complexity: O(n) total, O(1) auxilary\n", 66 | "\n", 67 | "Procedure: \n", 68 | "4,6,1,3,5,2 \n", 69 | "Min: 4 (첫 번째 포인트) \n", 70 | "4,6,1,3,5,2 \n", 71 | "Min: 1 \n", 72 | "\n", 73 | "1,6,4,3,5,2 \n", 74 | "Min: 6 (두 번째 포인트) \n", 75 | "1,6,4,3,5,2 \n", 76 | "Min: 2 \n", 77 | "\n", 78 | "1,2,4,3,5,6 \n", 79 | "Min: 4 (세 번째 포인트) " 80 | ] 81 | }, 82 | { 83 | "cell_type": "markdown", 84 | "metadata": {}, 85 | "source": [ 86 | "# 3. Insertion Sort\n", 87 | "\n", 88 | "쉽지만 성능이 낮음\n", 89 | "\n", 90 | "Procedure: \n", 91 | "4,6,1,3,5,2 \n", 92 | "\n", 93 | "4,6 \n", 94 | "4,1,6 \n", 95 | "1,4,6 \n", 96 | "\n", 97 | "1,4,6,3 \n", 98 | "1,4,3,6 \n", 99 | "1,3,4,6 \n" 100 | ] 101 | }, 102 | { 103 | "cell_type": "markdown", 104 | "metadata": {}, 105 | "source": [ 106 | "# 4. Merge Sort\n", 107 | "\n", 108 | "Perform: O(nlogn)\n", 109 | "space complex: O(n)\n", 110 | "\n", 111 | "1. 정렬되지 않은 리스트를 지속적으로 쪼갠다\n", 112 | "2. 정렬된 아이템과 병합한다.\n", 113 | "\n", 114 | "Procedure: \n", 115 | "6,2,4,1,3,6,5,8\n", 116 | "\n", 117 | "Step1: \n", 118 | "6,2,4,1 / 3,7,5,8 \n", 119 | "* 6,2,4,1\n", 120 | "\n", 121 | "6,2 / 4,1 \n", 122 | "6|2 / 4|1\n", 123 | "\n", 124 | "Step2: \n", 125 | "2|6 -> 2,6 -> 1|4 -> 1,4 \n", 126 | "2,6 / 1,4 -> 1,2,4,6\n", 127 | "\n", 128 | "Step3: \n", 129 | "3|7 / 5|8\n", 130 | "\n", 131 | "Step4: \n", 132 | "3|7 -> 3,7 -> 5|8 -> 5,8 \n", 133 | "3,7 / 5,8 -> 3,5,7,8\n", 134 | "\n", 135 | "Step5: \n", 136 | "1,2,4,6 / 3,5,7,8 -> 1,2,3,4,5,6,7,8\n", 137 | "\n", 138 | "\n", 139 | "\n" 140 | ] 141 | }, 142 | { 143 | "cell_type": "code", 144 | "execution_count": null, 145 | "metadata": { 146 | "collapsed": true 147 | }, 148 | "outputs": [], 149 | "source": [] 150 | } 151 | ], 152 | "metadata": { 153 | "anaconda-cloud": {}, 154 | "kernelspec": { 155 | "display_name": "Python [conda root]", 156 | "language": "python", 157 | "name": "conda-root-py" 158 | }, 159 | "language_info": { 160 | "codemirror_mode": { 161 | "name": "ipython", 162 | "version": 3 163 | }, 164 | "file_extension": ".py", 165 | "mimetype": "text/x-python", 166 | "name": "python", 167 | "nbconvert_exporter": "python", 168 | "pygments_lexer": "ipython3", 169 | "version": "3.5.2" 170 | } 171 | }, 172 | "nbformat": 4, 173 | "nbformat_minor": 1 174 | } 175 | -------------------------------------------------------------------------------- /Algorithm/binary_search_1.py: -------------------------------------------------------------------------------- 1 | #Binary search 2 | 3 | alist = [2,4,6,8,10,14,21] 4 | 5 | exp_val = 14 6 | 7 | #hint1 8 | low = 0 9 | high = len(alist) - 1 10 | mid = (low + high) // 2 11 | guess = alist[mid] 12 | 13 | #hint2 14 | if guess < item: 15 | low = mid + 1 16 | 17 | #hint3: cannot solve 18 | def binarySearch(list, item): 19 | low = 0 20 | high = len(list) - 1 21 | 22 | while low <= high: # Key point 23 | mid = (low + high) // 2 24 | guess = list[mid] 25 | if guess == item: 26 | return mid 27 | elif guess > item: 28 | high = mid - 1 29 | else: 30 | low = mid + 1 31 | return None 32 | 33 | binarySearch(alist, exp_val) 34 | 35 | 36 | 37 | #my code 38 | i = 0 39 | while i > 100: 40 | i += 1 41 | if guess < exp_val: 42 | mid = (mid + high) // 2 43 | guess = alist[mid] 44 | elif guess > exp_val: 45 | mid = (mid + low) // 2 46 | guess = alist[mid] 47 | else: 48 | print("Value Location {}, Value {}".format(mid, guess)) 49 | break 50 | 51 | 52 | 53 | -------------------------------------------------------------------------------- /Algorithm/euler_prob1.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rainmaker712/nlp_ryan/108ce890659ed29d4a143e41e5546f613aa878ca/Algorithm/euler_prob1.py -------------------------------------------------------------------------------- /Algorithm/graph_algo.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rainmaker712/nlp_ryan/108ce890659ed29d4a143e41e5546f613aa878ca/Algorithm/graph_algo.py -------------------------------------------------------------------------------- /Algorithm/selection_sort_2.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on Sun Jun 25 22:49:03 2017 5 | 6 | @author: ryan 7 | """ 8 | 9 | -------------------------------------------------------------------------------- /Chatbot/.ipynb_checkpoints/seq2seq-checkpoint.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [], 3 | "metadata": {}, 4 | "nbformat": 4, 5 | "nbformat_minor": 2 6 | } 7 | -------------------------------------------------------------------------------- /Chatbot/Slack_Bot/.Rhistory: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rainmaker712/nlp_ryan/108ce890659ed29d4a143e41e5546f613aa878ca/Chatbot/Slack_Bot/.Rhistory -------------------------------------------------------------------------------- /Chatbot/Slack_Bot/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- -------------------------------------------------------------------------------- /Chatbot/Slack_Bot/__pycache__/mcbot_chat.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rainmaker712/nlp_ryan/108ce890659ed29d4a143e41e5546f613aa878ca/Chatbot/Slack_Bot/__pycache__/mcbot_chat.cpython-35.pyc -------------------------------------------------------------------------------- /Chatbot/Slack_Bot/data/desktop.ini: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rainmaker712/nlp_ryan/108ce890659ed29d4a143e41e5546f613aa878ca/Chatbot/Slack_Bot/data/desktop.ini -------------------------------------------------------------------------------- /Chatbot/Slack_Bot/lstm_bot.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on Sun Jun 25 20:02:25 2017 5 | 소설 사이트 다운로드:http://blog.naver.com/PostView.nhn?blogId=dmswjd5366&logNo=220010721513 6 | @author: ryan 7 | """ 8 | 9 | import codecs 10 | from bs4 import BeautifulSoup 11 | from keras.models import Sequential 12 | from keras.layers import Dense, Activation, Dropout 13 | from keras.layers import LSTM 14 | from keras.optimizers import RMSprop 15 | from keras.utils.data_utils import get_file 16 | import numpy as np 17 | import random, sys 18 | 19 | import os 20 | import pandas as pd 21 | import json 22 | 23 | #import chardet 24 | 25 | #data = pd.read_csv("/home/ryan/nlp_ryan/Chatbot/Slack_Bot/data/toji1.txt", "r", encoding="utf-8") 26 | fp = codecs.open("/home/ryan/nlp_ryan/Chatbot/Slack_Bot/data/toji1.txt", "r", encoding="utf-8") 27 | #soup = BeautifulSoup(fp, "html.parser") 28 | #body = soup.select_one("body") 29 | #text = body.getText() + " " 30 | 31 | print('코퍼스의 길이: ', len(dic)) 32 | # 문자를 하나하나 읽어 들이고 ID 붙이기 33 | chars = sorted(list(set(text))) 34 | print('사용되고 있는 문자의 수:', len(chars)) 35 | char_indices = dict((c, i) for i, c in enumerate(chars)) # 문자 → ID 36 | indices_char = dict((i, c) for i, c in enumerate(chars)) # ID → 문자 37 | # 텍스트를 maxlen개의 문자로 자르고 다음에 오는 문자 등록하기 38 | maxlen = 20 39 | step = 3 40 | sentences = [] 41 | next_chars = [] 42 | for i in range(0, len(text) - maxlen, step): 43 | sentences.append(text[i: i + maxlen]) 44 | next_chars.append(text[i + maxlen]) 45 | print('학습할 구문의 수:', len(sentences)) 46 | print('텍스트를 ID 벡터로 변환합니다...') 47 | X = np.zeros((len(sentences), maxlen, len(chars)), dtype=np.bool) 48 | y = np.zeros((len(sentences), len(chars)), dtype=np.bool) 49 | for i, sentence in enumerate(sentences): 50 | for t, char in enumerate(sentence): 51 | X[i, t, char_indices[char]] = 1 52 | y[i, char_indices[next_chars[i]]] = 1 53 | # 모델 구축하기(LSTM) 54 | print('모델을 구축합니다...') 55 | model = Sequential() 56 | model.add(LSTM(128, input_shape=(maxlen, len(chars)))) 57 | model.add(Dense(len(chars))) 58 | model.add(Activation('softmax')) 59 | optimizer = RMSprop(lr=0.01) 60 | model.compile(loss='categorical_crossentropy', optimizer=optimizer) 61 | # 후보를 배열에서 꺼내기 62 | def sample(preds, temperature=1.0): 63 | preds = np.asarray(preds).astype('float64') 64 | preds = np.log(preds) / temperature 65 | exp_preds = np.exp(preds) 66 | preds = exp_preds / np.sum(exp_preds) 67 | probas = np.random.multinomial(1, preds, 1) 68 | return np.argmax(probas) 69 | # 학습시키고 텍스트 생성하기 반복 70 | for iteration in range(1, 60): 71 | print() 72 | print('-' * 50) 73 | print('반복 =', iteration) 74 | model.fit(X, y, batch_size=128, nb_epoch=1) # 75 | # 임의의 시작 텍스트 선택하기 76 | start_index = random.randint(0, len(text) - maxlen - 1) 77 | # 다양한 다양성의 문장 생성 78 | for diversity in [0.2, 0.5, 1.0, 1.2]: 79 | print() 80 | print('--- 다양성 = ', diversity) 81 | generated = '' 82 | sentence = text[start_index: start_index + maxlen] 83 | generated += sentence 84 | print('--- 시드 = "' + sentence + '"') 85 | sys.stdout.write(generated) 86 | # 시드를 기반으로 텍스트 자동 생성 87 | for i in range(400): 88 | x = np.zeros((1, maxlen, len(chars))) 89 | for t, char in enumerate(sentence): 90 | x[0, t, char_indices[char]] = 1. 91 | # 다음에 올 문자를 예측하기 92 | preds = model.predict(x, verbose=0)[0] 93 | next_index = sample(preds, diversity) 94 | next_char = indices_char[next_index] 95 | # 출력하기 96 | generated += next_char 97 | sentence = sentence[1:] + next_char 98 | sys.stdout.write(next_char) 99 | sys.stdout.flush() 100 | print() -------------------------------------------------------------------------------- /Chatbot/Slack_Bot/markov_chain_bot.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on Sun Jun 25 20:02:25 2017 5 | 6 | @author: ryan 7 | """ 8 | 9 | import codecs 10 | from bs4 import BeautifulSoup 11 | from konlpy.tag import Twitter 12 | import urllib.request 13 | import os, re, json, random 14 | # 마르코프 체인 딕셔너리 만들기 --- (※1) 15 | def make_dic(words): 16 | tmp = ["@"] 17 | dic = {} 18 | for word in words: 19 | tmp.append(word) 20 | if len(tmp) < 3: continue 21 | if len(tmp) > 3: tmp = tmp[1:] 22 | set_word3(dic, tmp) 23 | if word == ".": 24 | tmp = ["@"] 25 | continue 26 | return dic 27 | # 딕셔너리에 데이터 등록하기 --- (※2) 28 | def set_word3(dic, s3): 29 | w1, w2, w3 = s3 30 | if not w1 in dic: dic[w1] = {} 31 | if not w2 in dic[w1]: dic[w1][w2] = {} 32 | if not w3 in dic[w1][w2]: dic[w1][w2][w3] = 0 33 | dic[w1][w2][w3] += 1 34 | 35 | # 문장 만들기 --- (※3) 36 | def make_sentence(dic): 37 | ret = [] 38 | if not "@" in dic: return "no dic" 39 | top = dic["@"] 40 | w1 = word_choice(top) 41 | w2 = word_choice(top[w1]) 42 | ret.append(w1) 43 | ret.append(w2) 44 | while True: 45 | w3 = word_choice(dic[w1][w2]) 46 | ret.append(w3) 47 | if w3 == ".": break 48 | w1, w2 = w2, w3 49 | ret = "".join(ret) 50 | # 띄어쓰기 51 | params = urllib.parse.urlencode({ 52 | "_callback": "", 53 | "q": ret 54 | }) 55 | # 네이버 맞춤법 검사기를 사용합니다. 56 | data = urllib.request.urlopen("https://m.search.naver.com/p/csearch/dcontent/spellchecker.nhn?" + params) 57 | data = data.read().decode("utf-8")[1:-2] 58 | data = json.loads(data) 59 | data = data["message"]["result"]["html"] 60 | #data = soup = BeautifulSoup(data, "html.parser").getText() 61 | data = BeautifulSoup(data, "html.parser").getText() 62 | 63 | # 리턴 64 | return data 65 | 66 | def word_choice(sel): 67 | keys = sel.keys() 68 | return random.choice(list(keys)) 69 | 70 | # 문장 읽어 들이기 --- (※4) 71 | toji_file = "toji.txt" 72 | dict_file = "/home/ryan/nlp_ryan/Chatbot/Slack_Bot/markov-toji.json" 73 | 74 | if not os.path.exists(dict_file): 75 | # 토지 텍스트 파일 읽어 들이기 76 | fp = codecs.open("BEXX0003.txt", "r", encoding="utf-16") 77 | soup = BeautifulSoup(fp, "html.parser") 78 | body = soup.select_one("body > text") 79 | text = body.getText() 80 | text = text.replace("…", "") # 현재 koNLPy가 …을 구두점으로 잡지 못하는 문제 임시 해결 81 | # 형태소 분석 82 | twitter = Twitter() 83 | malist = twitter.pos(text, norm=True) 84 | words = [] 85 | for word in malist: 86 | # 구두점 등은 대상에서 제외(단 마침표는 포함) 87 | if not word[1] in ["Punctuation"]: 88 | words.append(word[0]) 89 | if word[0] == ".": 90 | words.append(word[0]) 91 | # 딕셔너리 생성 92 | dic = make_dic(words) 93 | json.dump(dic, open(dict_file,"w", encoding="utf-8")) 94 | else: 95 | dic = json.load(open(dict_file,"r")) 96 | # 문장 만들기 --- (※6) 97 | 98 | 99 | 100 | for i in range(3): 101 | s = make_sentence(dic) 102 | print(s) 103 | print("---") -------------------------------------------------------------------------------- /Chatbot/Slack_Bot/mcbot_chat.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on Sun Jun 25 20:02:25 2017 5 | 6 | @author: ryan 7 | """ 8 | 9 | import codecs 10 | from bs4 import BeautifulSoup 11 | from konlpy.tag import Twitter 12 | import urllib.request 13 | import os, re, json, random 14 | 15 | # 문장 읽어 들이기 --- (※4) 16 | dict_file = "/home/ryan/nlp_ryan/Chatbot/Slack_Bot/markov-toji.json" 17 | dic = json.load(open(dict_file,"r")) 18 | 19 | # 문장 만들기 --- (※3) 20 | def make_sentence(dic): 21 | ret = [] 22 | if not "@" in dic: return "no dic" 23 | top = dic["@"] 24 | w1 = word_choice(top) 25 | w2 = word_choice(top[w1]) 26 | ret.append(w1) 27 | ret.append(w2) 28 | while True: 29 | w3 = word_choice(dic[w1][w2]) 30 | ret.append(w3) 31 | if w3 == ".": break 32 | w1, w2 = w2, w3 33 | ret = "".join(ret) 34 | # 띄어쓰기 35 | params = urllib.parse.urlencode({ 36 | "_callback": "", 37 | "q": ret 38 | }) 39 | # 네이버 맞춤법 검사기를 사용합니다. 40 | data = urllib.request.urlopen("https://m.search.naver.com/p/csearch/dcontent/spellchecker.nhn?" + params) 41 | data = data.read().decode("utf-8")[1:-2] 42 | data = json.loads(data) 43 | data = data["message"]["result"]["html"] 44 | #data = soup = BeautifulSoup(data, "html.parser").getText() 45 | data = BeautifulSoup(data, "html.parser").getText() 46 | 47 | # 리턴 48 | return data 49 | 50 | def word_choice(sel): 51 | keys = sel.keys() 52 | return random.choice(list(keys)) -------------------------------------------------------------------------------- /Chatbot/Slack_Bot/modubot.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on Sun Jun 25 15:35:47 2017 5 | 6 | modu-deepnlp 7 | modubot 8 | 9 | http://www.usefulparadigm.com/2016/04/06/creating-a-slack-bot-with-aws-lambda-and-api-gateway/ 10 | https://www.fullstackpython.com/blog/build-first-slack-bot-python.html 11 | 12 | @author: ryan 13 | https://hooks.slack.com/services/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 14 | 15 | """ 16 | 17 | import sys 18 | sys.path.append('/home/ryan/nlp_ryan/Chatbot/Slack_Bot') 19 | from mcbot_chat import make_sentence 20 | import os, re, json, random 21 | 22 | dict_file = "/home/ryan/nlp_ryan/Chatbot/Slack_Bot/markov-toji.json" 23 | dic = json.load(open(dict_file,"r")) 24 | 25 | import os 26 | import time 27 | from slackclient import SlackClient 28 | import random 29 | 30 | #Bot ID & Token 31 | #slack_client.api_call("api.test") 32 | BOT_NAME = 'modubot' 33 | BOT_ID = 'U5Z492W0J' 34 | slack_token = 'your token' 35 | 36 | #export BOT_NAME='modubot' 37 | #export slack_token='xoxb-203145098018-UFRw9AIzGDiZcuc4aSF1kFdl' 38 | 39 | # instantiate Slack & Twilio clients 40 | slack_client = SlackClient(slack_token) 41 | 42 | #Check if everything is alright 43 | is_ok = slack_client.api_call("users.list").get('ok') 44 | 45 | # find the id of our slack bot 46 | if(is_ok): 47 | for user in slack_client.api_call("users.list").get('members'): 48 | if user.get('name') == BOT_ID: 49 | print(user.get('id')) 50 | 51 | # how the bot is mentioned on slack 52 | def get_mention(user): 53 | return '<@{user}>'.format(user=user) 54 | 55 | slack_mention = get_mention(BOT_ID) 56 | 57 | #Start Chatbot 58 | SOCKET_DELAY = 1 59 | 60 | def is_private(event): 61 | """Checks if private slack channel""" 62 | return event.get('channel').startswith('D') 63 | 64 | def is_for_me(event): 65 | #chekc if not my own event 66 | type = event.get('type') 67 | if type and type == 'message' and not(event.get('user')==BOT_ID): 68 | #in case it is a private message 69 | if is_private(event): 70 | return True 71 | #in case it is not a private 72 | text = event.get('text') 73 | channel = event.get('channel') 74 | if slack_mention in text.strip().split(): 75 | return True 76 | 77 | def post_message(message, channel): 78 | slack_client.api_call('chat.postMessage', channel=channel, 79 | text=message, as_user=True) 80 | 81 | import nltk 82 | 83 | def is_hi(message): 84 | tokens = [word.lower() for word in message.strip().split()] 85 | return any(g in tokens 86 | for g in ['안녕', '안녕하세요', '테스트']) 87 | 88 | def is_bye(message): 89 | tokens = [word.lower() for word in message.strip().split()] 90 | return any(g in tokens 91 | for g in ['bye', 'goodbye', 'revoir', 'adios', 'later', 'cya']) 92 | 93 | def say_hi(user_mention): 94 | """Say Hi to a user by formatting their mention""" 95 | response_template = random.choice([make_sentence(dic)]) 96 | return response_template.format(mention=user_mention) 97 | 98 | def say_bye(user_mention): 99 | """Say Goodbye to a user""" 100 | response_template = random.choice(['see you later, alligator...', 101 | 'adios amigo', 102 | 'Bye {mention}!', 103 | 'Au revoir!']) 104 | return response_template.format(mention=user_mention) 105 | 106 | 107 | def handle_message(message, user, channel): 108 | if is_hi(message): 109 | user_mention = get_mention(user) 110 | post_message(message=say_hi(user_mention), channel=channel) 111 | elif is_bye(message): 112 | user_mention = get_mention(user) 113 | post_message(message=say_bye(user_mention), channel=channel) 114 | 115 | def run(): 116 | if slack_client.rtm_connect(): 117 | print('[.] modubot is ON...') 118 | while True: 119 | event_list = slack_client.rtm_read() 120 | if len(event_list) > 0: 121 | for event in event_list: 122 | print(event) 123 | if is_for_me(event): 124 | handle_message(message=event.get('text'), user=event.get('user'), channel=event.get('channel')) 125 | time.sleep(SOCKET_DELAY) 126 | else: 127 | print('[!] Connection to Slack failed.') 128 | 129 | if __name__=='__main__': 130 | run() 131 | -------------------------------------------------------------------------------- /Chatbot/Slack_Bot/print_bot_id.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on Sun Jun 25 15:35:47 2017 5 | 6 | modu-deepnlp 7 | modubot 8 | @author: ryan 9 | https://hooks.slack.com/services/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 10 | 11 | """ 12 | 13 | import os 14 | from slackclient import SlackClient 15 | 16 | token = 'your token' 17 | slack_client = SlackClient(token) 18 | #slack_client = SlackClient(os.environ.get('SLACK_BOT_TOKEN')) 19 | print(slack_client.api_call("api.test")) 20 | print(slack_client.api_call("api.test")) 21 | 22 | if __name__ == "__main__": 23 | api_call = slack_client.api_call("users.list") 24 | if api_call.get('ok'): 25 | # retrieve all users so we can find our bot 26 | users = api_call.get('members') 27 | for user in users: 28 | if 'name' in user and user.get('name') == BOT_NAME: 29 | print("Bot ID for '" + user['name'] + "' is " + user.get('id')) 30 | else: 31 | print("could not find bot user with the name " + BOT_NAME) 32 | 33 | 34 | 35 | -------------------------------------------------------------------------------- /Chatbot/Slack_Bot/toji.model: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rainmaker712/nlp_ryan/108ce890659ed29d4a143e41e5546f613aa878ca/Chatbot/Slack_Bot/toji.model -------------------------------------------------------------------------------- /Chatbot/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on Thu Aug 10 16:42:07 2017 5 | 6 | @author: naver 7 | """ 8 | 9 | -------------------------------------------------------------------------------- /Chatbot/__pycache__/helpers.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rainmaker712/nlp_ryan/108ce890659ed29d4a143e41e5546f613aa878ca/Chatbot/__pycache__/helpers.cpython-36.pyc -------------------------------------------------------------------------------- /Chatbot/helpers.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on Thu Aug 10 16:38:16 2017 5 | 6 | @author: naver 7 | """ 8 | 9 | import numpy as np 10 | 11 | def batch(inputs, max_sequence_length=None): 12 | """ 13 | Args: 14 | inputs: 15 | list of sentences (integer lists) 16 | max_sequence_length: 17 | integer specifying how large should `max_time` dimension be. 18 | If None, maximum sequence length would be used 19 | 20 | Outputs: 21 | inputs_time_major: 22 | input sentences transformed into time-major matrix 23 | (shape [max_time, batch_size]) padded with 0s 24 | sequence_lengths: 25 | batch-sized list of integers specifying amount of active 26 | time steps in each input sequence 27 | """ 28 | 29 | sequence_lengths = [len(seq) for seq in inputs] 30 | batch_size = len(inputs) 31 | 32 | if max_sequence_length is None: 33 | max_sequence_length = max(sequence_lengths) 34 | 35 | inputs_batch_major = np.zeros(shape=[batch_size, max_sequence_length], dtype=np.int32) # == PAD 36 | 37 | for i, seq in enumerate(inputs): 38 | for j, element in enumerate(seq): 39 | inputs_batch_major[i, j] = element 40 | 41 | # [batch_size, max_time] -> [max_time, batch_size] 42 | inputs_time_major = inputs_batch_major.swapaxes(0, 1) 43 | 44 | return inputs_time_major, sequence_lengths 45 | 46 | 47 | def random_sequences(length_from, length_to, 48 | vocab_lower, vocab_upper, 49 | batch_size): 50 | """ Generates batches of random integer sequences, 51 | sequence length in [length_from, length_to], 52 | vocabulary in [vocab_lower, vocab_upper] 53 | """ 54 | if length_from > length_to: 55 | raise ValueError('length_from > length_to') 56 | 57 | def random_length(): 58 | if length_from == length_to: 59 | return length_from 60 | return np.random.randint(length_from, length_to + 1) 61 | 62 | while True: 63 | yield [ 64 | np.random.randint(low=vocab_lower, 65 | high=vocab_upper, 66 | size=random_length()).tolist() 67 | for _ in range(batch_size) 68 | ] -------------------------------------------------------------------------------- /Chatbot/seq2seq.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 5, 6 | "metadata": { 7 | "collapsed": true 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "import numpy as np\n", 12 | "\n", 13 | "def batch(inputs, max_sequence_length=None):\n", 14 | " \"\"\"\n", 15 | " Args:\n", 16 | " inputs:\n", 17 | " list of sentences (integer lists)\n", 18 | " max_sequence_length:\n", 19 | " integer specifying how large should `max_time` dimension be.\n", 20 | " If None, maximum sequence length would be used\n", 21 | " \n", 22 | " Outputs:\n", 23 | " inputs_time_major:\n", 24 | " input sentences transformed into time-major matrix \n", 25 | " (shape [max_time, batch_size]) padded with 0s\n", 26 | " sequence_lengths:\n", 27 | " batch-sized list of integers specifying amount of active \n", 28 | " time steps in each input sequence\n", 29 | " \"\"\"\n", 30 | " \n", 31 | " sequence_lengths = [len(seq) for seq in inputs]\n", 32 | " batch_size = len(inputs)\n", 33 | " \n", 34 | " if max_sequence_length is None:\n", 35 | " max_sequence_length = max(sequence_lengths)\n", 36 | " \n", 37 | " inputs_batch_major = np.zeros(shape=[batch_size, max_sequence_length], dtype=np.int32) # == PAD\n", 38 | " \n", 39 | " for i, seq in enumerate(inputs):\n", 40 | " for j, element in enumerate(seq):\n", 41 | " inputs_batch_major[i, j] = element\n", 42 | "\n", 43 | " # [batch_size, max_time] -> [max_time, batch_size]\n", 44 | " inputs_time_major = inputs_batch_major.swapaxes(0, 1)\n", 45 | "\n", 46 | " return inputs_time_major, sequence_lengths\n", 47 | "\n", 48 | "\n", 49 | "def random_sequences(length_from, length_to,\n", 50 | " vocab_lower, vocab_upper,\n", 51 | " batch_size):\n", 52 | " \"\"\" Generates batches of random integer sequences,\n", 53 | " sequence length in [length_from, length_to],\n", 54 | " vocabulary in [vocab_lower, vocab_upper]\n", 55 | " \"\"\"\n", 56 | " if length_from > length_to:\n", 57 | " raise ValueError('length_from > length_to')\n", 58 | "\n", 59 | " def random_length():\n", 60 | " if length_from == length_to:\n", 61 | " return length_from\n", 62 | " return np.random.randint(length_from, length_to + 1)\n", 63 | " \n", 64 | " while True:\n", 65 | " yield [\n", 66 | " np.random.randint(low=vocab_lower,\n", 67 | " high=vocab_upper,\n", 68 | " size=random_length()).tolist()\n", 69 | " for _ in range(batch_size)\n", 70 | " ]" 71 | ] 72 | }, 73 | { 74 | "cell_type": "code", 75 | "execution_count": 6, 76 | "metadata": {}, 77 | "outputs": [], 78 | "source": [ 79 | "x = [[5, 7, 8], [6, 3], [3], [1]]\n", 80 | "\n", 81 | "xt, xlen = batch(x)" 82 | ] 83 | } 84 | ], 85 | "metadata": { 86 | "kernelspec": { 87 | "display_name": "Python 3", 88 | "language": "python", 89 | "name": "python3" 90 | }, 91 | "language_info": { 92 | "codemirror_mode": { 93 | "name": "ipython", 94 | "version": 3 95 | }, 96 | "file_extension": ".py", 97 | "mimetype": "text/x-python", 98 | "name": "python", 99 | "nbconvert_exporter": "python", 100 | "pygments_lexer": "ipython3", 101 | "version": "3.6.1" 102 | } 103 | }, 104 | "nbformat": 4, 105 | "nbformat_minor": 2 106 | } 107 | -------------------------------------------------------------------------------- /Dataset/dataset.md: -------------------------------------------------------------------------------- 1 | Premade Datasets 2 | 1. http://research.microsoft.com/en-us/um/redmond/projects/mctest/index.html 3 | MCTest is a freely available set of 660 stories and associated questions intended for research on the machine comprehension of text. 4 | 2. http://www.gutenberg.org/wiki/Gutenberg:Offline_Catalogs 5 | Gutenberge has a lot of books 6 | 3. https://catalog.ldc.upenn.edu/LDC2006T13 7 | Web 1T 5-gram Version 1, contributed by Google Inc., contains English word n-grams and their observed frequency counts. 8 | The length of the n-grams ranges from unigrams (single words) to five-grams. 9 | This data is expected to be useful for statistical language modeling, e.g., for machine translation or speech recognition, etc. 10 | 4. http://www.iesl.cs.umass.edu/data 11 | A lot of datasets 12 | 5. http://webdatacommons.org/webtables/ 13 | A subset of the HTML tables on the Web contains relational data which can be useful for various applications. 14 | The Web Data Commons project has extracted two large corpora of relational Web tables from the Common Crawl and offers them for public download. 15 | This page provides an overview of the corpora as well as their use cases. 16 | 6. http://statmt.org/ngrams/ 17 | Unpruend Unpruned 5-gram counts and language models trained on 9 billion web pages -- Large amounts of raw data in many languages 18 | 7. https://en.wikipedia.org/wiki/Wikipedia:Database_download 19 | Wikipedia Database Download 20 | 8. https://aws.amazon.com/ko/datasets/google-books-ngrams/ 21 | A data set containing Google Books n-gram corpora. 22 | 9. https://aws.amazon.com/ko/public-datasets/common-crawl/ 23 | The Common Crawl corpus includes web crawl data collected over 8 years. 24 | Common Crawl offers the largest, most comprehensive, open repository of web crawl data on the cloud. 25 | 10. http://commoncrawl.org/the-data/tutorials/ 26 | 착한 아이들 ㅋㅋ 27 | 11. https://wikireverse.org/data 28 | The full dataset of 36 million links can be downloaded as a torrent. 29 | The download is a tarball containing 4 tab-delimited files. 30 | The data is 1.1 GB when compressed and 5.4 GB when extracted. 31 | 12. https://www.cs.cornell.edu/~cristian/Cornell_Movie-Dialogs_Corpus.html 32 | This corpus contains a large metadata-rich collection of fictional conversations extracted from raw movie scripts. 33 | 13. https://www.uow.edu.au/~dlee/corpora.htm 34 | several dozens of english corpus 35 | 14. http://research.google.com/research-outreach.html#/research-outreach/research-datasets 36 | Google Datasets 37 | 15. http://www.cs.cornell.edu/home/llee/data/ 38 | Collection of Cornell Datasets 39 | 16. https://github.com/rkadlec/ubuntu-ranking-dataset-creator 40 | Ubuntu Dialogue Datasets 41 | 17. http://ebiquity.umbc.edu/resource/html/id/351 42 | The UMBC webBase corpus (http://ebiq.org/r/351) is a dataset containing a collection of English paragraphs with over three billion words 43 | processed from the February 2007 crawl from the Stanford WebBase project (http://bit.ly/WebBase). Compressed, it is about 13GB in size. 44 | 45 | 46 | 47 | Movie Subtitles Datasets (BE AWARE OF COPYRIGHTS!!!) 48 | 49 | http://www.opensubtitles.org/en/search 50 | https://subscene.com/ 51 | http://www.moviesubtitles.org/ 52 | http://www.divxsubtitles.net/ 53 | http://www.subs4free.com/ 54 | 55 | https://videoconverter.iskysoft.com/video-tips/download-subtitles.html (15 Best Subtitle Software and Top 10 Subtitle Download Sites) 56 | 57 | 58 | 59 | Q&A Datasets 60 | https://www.researchgate.net/post/What_are_the_datasets_available_for_question_answering_system 61 | https://archive.org/details/stackexchange 62 | https://rajpurkar.github.io/SQuAD-explorer/ 63 | https://www.quora.com/Datasets-How-can-I-get-corpus-of-a-question-answering-website-like-Quora-or-Yahoo-Answers-or-Stack-Overflow-for-analyzing-answer-quality 64 | http://jmcauley.ucsd.edu/data/amazon/qa/ 65 | 66 | 67 | A lot of Datasets 68 | https://www.reddit.com/r/datasets/comments/3bxlg7/i_have_every_publicly_available_reddit_comment/ 69 | https://github.com/caesar0301/awesome-public-datasets#natural-language 70 | 71 | 72 | Miscellaneous 73 | https://github.com/deepmind/rc-data 74 | http://u.cs.biu.ac.il/~koppel/BlogCorpus.htm 75 | http://wiki.dbpedia.org/Downloads2015-10 76 | https://aws.amazon.com/ko/datasets/google-books-ngrams/ -------------------------------------------------------------------------------- /HTML/code_academy.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Ship To It - Company Packing List 5 | 6 | 7 | 8 | 9 | 10 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 |
Company NameNumber of Items to ShipNext Action
Adam's Greenworks14Package Items
Davie's Burgers2Send Invoice
Baker's Bike Shop3Send Invoice
Miss Sally's Southern4Ship
Summit Resort Rentals4Ship
Strike Fitness1Enter Order
60 | 61 | 62 | -------------------------------------------------------------------------------- /Keras_Basic/Keras_Cheat_Sheet_Python.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rainmaker712/nlp_ryan/108ce890659ed29d4a143e41e5546f613aa878ca/Keras_Basic/Keras_Cheat_Sheet_Python.pdf -------------------------------------------------------------------------------- /Keras_Basic/Keras_basic_fin.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on Sun Apr 30 15:20:50 2017 5 | 6 | @author: ryan 7 | 8 | Most of infomation from DataCamp Keras Course 9 | https://www.datacamp.com/community/blog/new-course-deep-learning-in-python-first-keras-2-0-online-course#gs.8RUVmWM 10 | 11 | """ 12 | 13 | # Import necessary modules 14 | #import keras 15 | from keras.layers import Dense 16 | from keras.models import Sequential 17 | from keras.datasets import boston_housing 18 | from keras.wrappers.scikit_learn import KerasRegressor 19 | from sklearn.model_selection import cross_val_score 20 | from sklearn.model_selection import KFold 21 | import numpy as np 22 | 23 | (x_train, y_train), (x_test, y_test) = boston_housing.load_data() 24 | 25 | print(x_train.shape, y_train.shape) #(404, 13) / (404,) 26 | 27 | # Save the number of columns in training set: n_cols 28 | n_cols = x_train.shape[1] 29 | 30 | #Define Model for boston data 31 | 32 | # Set up the model: model 33 | model = Sequential() 34 | model.add(Dense(13, activation='relu', input_shape=(n_cols,), kernel_initializer = 'normal')) 35 | # Add the output layer 36 | model.add(Dense(1, kernel_initializer='normal')) 37 | #Complile model 일반적으로 Adam을 추천 (CS231 강의에서도 잘 모르겠으면 Adam 사용 추천) 38 | model.compile(optimizer='adam', loss='mean_squared_error') 39 | 40 | # Verify that model contains information from compiling 41 | print("Loss function: " + model.loss) 42 | 43 | """ 44 | 모델 학습 / 구조 확인 및 시각화 45 | """ 46 | model.summary() #모델의 구조를 확인 47 | # Fit the model 48 | history = model.fit(x_train, y_train, epochs=100) 49 | # Test the model 50 | '''Predictions''' 51 | # Calculate predictions: predictions 52 | score = model.evaluate(x_test, y_test) 53 | 54 | # list all data in history 55 | print(history.history.keys()) 56 | 57 | #Loss 시각화 58 | import matplotlib.pyplot as plt 59 | 60 | plt.plot(history.history['loss']) 61 | plt.title('model loss') 62 | plt.ylabel('loss') 63 | plt.xlabel('epoch') 64 | plt.legend(['train'], loc='upper left') 65 | plt.show() 66 | 67 | 68 | -------------------------------------------------------------------------------- /Keras_Basic/Keras_classification_basic.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on Sun Apr 30 15:20:50 2017 5 | 6 | @author: ryan 7 | 8 | Most of infomation from DataCamp Keras Course 9 | https://www.datacamp.com/community/blog/new-course-deep-learning-in-python-first-keras-2-0-online-course#gs.8RUVmWM 10 | 11 | """ 12 | 13 | # Import necessary modules 14 | import keras 15 | from keras.layers import Dense 16 | from keras.models import Sequential 17 | from keras.utils import to_categorical 18 | 19 | # Convert the target to categorical: target 20 | target = to_categorical(df.survived) 21 | 22 | model = Sequential() 23 | model.add(Dense(32, activation='relu', input_shape=(n_cols,))) 24 | model.add(Dense(2, activation='softmax')) 25 | # Compile the model 26 | model.compile(optimizer='sgd', loss='categorical_crossentropy', metrics=['accuracy']) 27 | # Fit the model 28 | model.fit(predictors, target) 29 | 30 | '''Predictions''' 31 | # Calculate predictions: predictions 32 | predictions = model.predict(pred_data) 33 | 34 | # Calculate predicted probability of survival: predicted_prob_true 35 | predicted_prob_true = predictions[:,1] 36 | 37 | # print predicted_prob_true 38 | print(predicted_prob_true) 39 | 40 | ''' 41 | Save and Load 42 | ''' 43 | 44 | from keras.models import load_model 45 | model.save('model_file.h5') 46 | my_model = load_model('my_model.h5') 47 | 48 | 49 | 50 | 51 | -------------------------------------------------------------------------------- /Keras_Basic/Keras_fine_tuning_basic.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on Sun Apr 30 18:27:14 2017 5 | 6 | @author: ryan 7 | """ 8 | 9 | 10 | """ 11 | Model Optimization 12 | 13 | 1. loss options 14 | - mean_squared_error 15 | - mean_squared_lograithmic_error 16 | - mean_absolute_error 17 | - mean_ablsolute_percentage_error 18 | - binary_crossentropy 19 | - categorical_crossentropy 20 | 21 | 2. L1/L2 regeularization 22 | 23 | from keras import regularizers 24 | model.add(Dense(50, input_dim=100, activation="sigmoid", W_regularizer=regularizers.l2(0.01))) 25 | 26 | 3. Dropout -> 마지막에 가중치 p를 곱하여 스케일링 27 | 28 | model.add(Dropout(0.5)) 29 | model.compile(optimizer=SGD(0.5), loss='categorical_crossentropy', metrics=["acc"]) 30 | 31 | 4. Weight initialization 32 | model.add(Dense(100, input_dim=10, activation="sigmoid", "init"=uniform)) 33 | 34 | 5. Softmax 35 | 36 | model.Sequential() 37 | model.add(Dense(15, input_dim=100, activation='sigmoid', init="global_uniform")) 38 | model.add(Dense(10, activation='softmax', init='global_uniform")) 39 | model.compile(optimizer=SGD(), loss='categorical_crossentropy', metrics=["accuracy"]) 40 | 41 | """ 42 | 43 | # Import the SGD optimizer 44 | from keras.optimizers import SGD 45 | 46 | # Create list of learning rates: lr_to_test 47 | lr_to_test = [.000001, 0.01, 1] 48 | 49 | # Loop over learning rates 50 | for lr in lr_to_test: 51 | print('\n\nTesting model with learning rate: %f\n'%lr ) 52 | 53 | # Build new model to test, unaffected by previous models 54 | model = get_new_model() 55 | 56 | # Create SGD optimizer with specified learning rate: my_optimizer 57 | my_optimizer = SGD(lr=lr) 58 | 59 | # Compile the model 60 | model.compile(optimizer = my_optimizer, loss = 'categorical_crossentropy') 61 | 62 | # Fit the model 63 | model.fit(predictors, target) 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | """ 72 | Model validation 73 | 74 | 75 | model.fit(predictors, target, validation_split=0.3) 76 | Early Stopping 77 | stop traiing if validation is same (patient) 78 | 79 | Experimentation 80 | - Experiment with different architectures 81 | - More layers 82 | - Fewer layers 83 | - Layers with more nodes 84 | - Layers with fewer nodes 85 | - Creating a great model requires experimentation 86 | 87 | """" 88 | #Validation Set 89 | # Save the number of columns in predictors: n_cols 90 | n_cols = predictors.shape[1] 91 | input_shape = (n_cols,) 92 | 93 | # Specify the model 94 | model = Sequential() 95 | model.add(Dense(100, activation='relu', input_shape = input_shape)) 96 | model.add(Dense(100, activation='relu')) 97 | model.add(Dense(2, activation='softmax')) 98 | 99 | # Compile the model 100 | model.compile(optimizer = 'adam', loss = 'categorical_crossentropy', metrics=['accuracy']) 101 | 102 | # Fit the model 103 | hist = model.fit(predictors, target, validation_split=0.3) 104 | 105 | """ 106 | #Early Stopping 107 | """ 108 | 109 | # Import EarlyStopping 110 | from keras.callbacks import EarlyStopping 111 | 112 | # Save the number of columns in predictors: n_cols 113 | n_cols = predictors.shape[1] 114 | input_shape = (n_cols,) 115 | 116 | # Specify the model 117 | model = Sequential() 118 | model.add(Dense(100, activation='relu', input_shape = input_shape)) 119 | model.add(Dense(100, activation='relu')) 120 | model.add(Dense(2, activation='softmax')) 121 | 122 | # Compile the model 123 | model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy']) 124 | 125 | # Define early_stopping_monitor 126 | early_stopping_monitor = EarlyStopping(patience = 2) 127 | 128 | # Fit the model 129 | model.fit(predictors, target, epochs=30, validation_split=0.3, callbacks = [early_stopping_monitor]) 130 | 131 | """ 132 | ##Experimenting with wider networks 133 | 134 | verbose=False / logging output, tell me everything 135 | 136 | """ 137 | # Define early_stopping_monitor 138 | early_stopping_monitor = EarlyStopping(patience=2) 139 | 140 | # Create the new model: model_2 141 | model_2 = Sequential() 142 | 143 | # Add the first and second layers 144 | model_2.add(Dense(100, activation="relu", input_shape=input_shape)) 145 | model_2.add(Dense(100, activation="relu")) 146 | 147 | # Add the output layer 148 | model_2.add(Dense(2, activation="softmax")) 149 | 150 | # Compile model_2 151 | model_2.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy']) 152 | 153 | # Fit model_1 154 | model_1_training = model_1.fit(predictors, target, epochs=15, validation_split=0.2, callbacks=[early_stopping_monitor], verbose=False) 155 | 156 | # Fit model_2 157 | model_2_training = model_2.fit(predictors, target, epochs=15, validation_split=0.2, callbacks=[early_stopping_monitor], verbose=False) 158 | 159 | # Create the plot 160 | plt.plot(model_1_training.history['val_loss'], 'r', model_2_training.history['val_loss'], 'b') 161 | plt.xlabel('Epochs') 162 | plt.ylabel('Validation score') 163 | plt.show() 164 | 165 | 166 | 167 | -------------------------------------------------------------------------------- /Keras_Basic/Keras_tutorial_text_generation.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": { 7 | "collapsed": true 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "'''Example script to generate text from Nietzsche's writings.\n", 12 | "At least 20 epochs are required before the generated text\n", 13 | "starts sounding coherent.\n", 14 | "It is recommended to run this script on GPU, as recurrent\n", 15 | "networks are quite computationally intensive.\n", 16 | "If you try this script on new data, make sure your corpus\n", 17 | "has at least ~100k characters. ~1M is better.\n", 18 | "'''\n", 19 | "\n", 20 | "from __future__ import print_function\n", 21 | "from keras.models import Sequential\n", 22 | "from keras.layers import Dense, Activation\n", 23 | "from keras.layers import LSTM\n", 24 | "from keras.optimizers import RMSprop\n", 25 | "from keras.utils.data_utils import get_file\n", 26 | "import numpy as np\n", 27 | "import random\n", 28 | "import sys\n", 29 | "\n", 30 | "path = get_file('nietzsche.txt', origin='https://s3.amazonaws.com/text-datasets/nietzsche.txt')\n", 31 | "text = open(path).read().lower()\n", 32 | "print('corpus length:', len(text))\n", 33 | "\n", 34 | "chars = sorted(list(set(text)))\n", 35 | "print('total chars:', len(chars))\n", 36 | "char_indices = dict((c, i) for i, c in enumerate(chars))\n", 37 | "indices_char = dict((i, c) for i, c in enumerate(chars))\n", 38 | "\n", 39 | "# cut the text in semi-redundant sequences of maxlen characters\n", 40 | "maxlen = 40\n", 41 | "step = 3\n", 42 | "sentences = []\n", 43 | "next_chars = []\n", 44 | "for i in range(0, len(text) - maxlen, step):\n", 45 | " sentences.append(text[i: i + maxlen])\n", 46 | " next_chars.append(text[i + maxlen])\n", 47 | "print('nb sequences:', len(sentences))\n", 48 | "\n", 49 | "print('Vectorization...')\n", 50 | "X = np.zeros((len(sentences), maxlen, len(chars)), dtype=np.bool)\n", 51 | "y = np.zeros((len(sentences), len(chars)), dtype=np.bool)\n", 52 | "for i, sentence in enumerate(sentences):\n", 53 | " for t, char in enumerate(sentence):\n", 54 | " X[i, t, char_indices[char]] = 1\n", 55 | " y[i, char_indices[next_chars[i]]] = 1\n", 56 | "\n", 57 | "\n", 58 | "# build the model: a single LSTM\n", 59 | "print('Build model...')\n", 60 | "model = Sequential()\n", 61 | "model.add(LSTM(128, input_shape=(maxlen, len(chars))))\n", 62 | "model.add(Dense(len(chars)))\n", 63 | "model.add(Activation('softmax'))\n", 64 | "\n", 65 | "optimizer = RMSprop(lr=0.01)\n", 66 | "model.compile(loss='categorical_crossentropy', optimizer=optimizer)\n", 67 | "\n", 68 | "\n", 69 | "def sample(preds, temperature=1.0):\n", 70 | " # helper function to sample an index from a probability array\n", 71 | " preds = np.asarray(preds).astype('float64')\n", 72 | " preds = np.log(preds) / temperature\n", 73 | " exp_preds = np.exp(preds)\n", 74 | " preds = exp_preds / np.sum(exp_preds)\n", 75 | " probas = np.random.multinomial(1, preds, 1)\n", 76 | " return np.argmax(probas)\n", 77 | "\n", 78 | "# train the model, output generated text after each iteration\n", 79 | "for iteration in range(1, 60):\n", 80 | " print()\n", 81 | " print('-' * 50)\n", 82 | " print('Iteration', iteration)\n", 83 | " model.fit(X, y,\n", 84 | " batch_size=128,\n", 85 | " epochs=1)\n", 86 | "\n", 87 | " start_index = random.randint(0, len(text) - maxlen - 1)\n", 88 | "\n", 89 | " for diversity in [0.2, 0.5, 1.0, 1.2]:\n", 90 | " print()\n", 91 | " print('----- diversity:', diversity)\n", 92 | "\n", 93 | " generated = ''\n", 94 | " sentence = text[start_index: start_index + maxlen]\n", 95 | " generated += sentence\n", 96 | " print('----- Generating with seed: \"' + sentence + '\"')\n", 97 | " sys.stdout.write(generated)\n", 98 | "\n", 99 | " for i in range(400):\n", 100 | " x = np.zeros((1, maxlen, len(chars)))\n", 101 | " for t, char in enumerate(sentence):\n", 102 | " x[0, t, char_indices[char]] = 1.\n", 103 | "\n", 104 | " preds = model.predict(x, verbose=0)[0]\n", 105 | " next_index = sample(preds, diversity)\n", 106 | " next_char = indices_char[next_index]\n", 107 | "\n", 108 | " generated += next_char\n", 109 | " sentence = sentence[1:] + next_char\n", 110 | "\n", 111 | " sys.stdout.write(next_char)\n", 112 | " sys.stdout.flush()\n", 113 | " print()" 114 | ] 115 | } 116 | ], 117 | "metadata": { 118 | "anaconda-cloud": {}, 119 | "kernelspec": { 120 | "display_name": "Python [conda root]", 121 | "language": "python", 122 | "name": "conda-root-py" 123 | }, 124 | "language_info": { 125 | "codemirror_mode": { 126 | "name": "ipython", 127 | "version": 3 128 | }, 129 | "file_extension": ".py", 130 | "mimetype": "text/x-python", 131 | "name": "python", 132 | "nbconvert_exporter": "python", 133 | "pygments_lexer": "ipython3", 134 | "version": "3.5.2" 135 | } 136 | }, 137 | "nbformat": 4, 138 | "nbformat_minor": 1 139 | } 140 | -------------------------------------------------------------------------------- /Machine_Comprehension/Attention_Keras/.Rhistory: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rainmaker712/nlp_ryan/108ce890659ed29d4a143e41e5546f613aa878ca/Machine_Comprehension/Attention_Keras/.Rhistory -------------------------------------------------------------------------------- /Machine_Comprehension/Attention_Keras/CNNQA_weights.h5.7z: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rainmaker712/nlp_ryan/108ce890659ed29d4a143e41e5546f613aa878ca/Machine_Comprehension/Attention_Keras/CNNQA_weights.h5.7z -------------------------------------------------------------------------------- /Machine_Comprehension/Attention_Keras/KerasQA.ods: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rainmaker712/nlp_ryan/108ce890659ed29d4a143e41e5546f613aa878ca/Machine_Comprehension/Attention_Keras/KerasQA.ods -------------------------------------------------------------------------------- /Machine_Comprehension/Attention_Keras/embedding_data.h5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rainmaker712/nlp_ryan/108ce890659ed29d4a143e41e5546f613aa878ca/Machine_Comprehension/Attention_Keras/embedding_data.h5 -------------------------------------------------------------------------------- /Machine_Comprehension/DMN_QA/DynamicMemoryNetwork.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rainmaker712/nlp_ryan/108ce890659ed29d4a143e41e5546f613aa878ca/Machine_Comprehension/DMN_QA/DynamicMemoryNetwork.pdf -------------------------------------------------------------------------------- /Machine_Comprehension/DMN_QA/bAbi.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rainmaker712/nlp_ryan/108ce890659ed29d4a143e41e5546f613aa878ca/Machine_Comprehension/DMN_QA/bAbi.pdf -------------------------------------------------------------------------------- /Machine_Comprehension/DMN_QA/dataset/babi_tasks_1-20_v1-2.tar.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rainmaker712/nlp_ryan/108ce890659ed29d4a143e41e5546f613aa878ca/Machine_Comprehension/DMN_QA/dataset/babi_tasks_1-20_v1-2.tar.gz -------------------------------------------------------------------------------- /Machine_Comprehension/DMN_QA/image/algo_process1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rainmaker712/nlp_ryan/108ce890659ed29d4a143e41e5546f613aa878ca/Machine_Comprehension/DMN_QA/image/algo_process1.png -------------------------------------------------------------------------------- /Machine_Comprehension/DMN_QA/image/algo_process2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rainmaker712/nlp_ryan/108ce890659ed29d4a143e41e5546f613aa878ca/Machine_Comprehension/DMN_QA/image/algo_process2.png -------------------------------------------------------------------------------- /Machine_Comprehension/DMN_QA/image/algo_process3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rainmaker712/nlp_ryan/108ce890659ed29d4a143e41e5546f613aa878ca/Machine_Comprehension/DMN_QA/image/algo_process3.png -------------------------------------------------------------------------------- /Machine_Comprehension/DMN_QA/image/babi_dataset.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rainmaker712/nlp_ryan/108ce890659ed29d4a143e41e5546f613aa878ca/Machine_Comprehension/DMN_QA/image/babi_dataset.png -------------------------------------------------------------------------------- /Machine_Comprehension/Readme_MC.md: -------------------------------------------------------------------------------- 1 | # Machine Comprehension 2 | 3 | *Feedback: sungjin7127@gmail.com* 4 | 5 | ## Attention Keras 6 | * [Teaching Machines to Read and Comprehend (Paper) - DeepMind](https://arxiv.org/abs/1506.03340) 7 | * [Code to Generate](https://github.com/deepmind/rc-data) 8 | * [DeepMind Q&A Dataset (CNN, Daily Mail)](http://cs.nyu.edu/~kcho/DMQA/) 9 | * [by Keras](https://github.com/dandxy89/DeepLearning_MachineLearning/tree/master/Keras/Attention) -------------------------------------------------------------------------------- /Math_Stat/.Rhistory: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rainmaker712/nlp_ryan/108ce890659ed29d4a143e41e5546f613aa878ca/Math_Stat/.Rhistory -------------------------------------------------------------------------------- /Math_Stat/.ipynb_checkpoints/ML_Basic_Siraj Raval-checkpoint.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Intro_to_Math_of_Intelligence" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": 3, 13 | "metadata": { 14 | "collapsed": false 15 | }, 16 | "outputs": [ 17 | { 18 | "name": "stdout", 19 | "output_type": "stream", 20 | "text": [ 21 | "Starting gradient descent at b = 0, m = 0, error = 5565.107834483211\n", 22 | "Running...\n", 23 | "After 10000 iterations b = 0.6078985997054931, m = 1.4675440436333027, error = 112.31533427075733\n" 24 | ] 25 | } 26 | ], 27 | "source": [ 28 | "#The optimal values of m and b can be actually calculated with way less effort than doing a linear regression. \n", 29 | "#this is just to demonstrate gradient descent\n", 30 | "\n", 31 | "from numpy import *\n", 32 | "\n", 33 | "# y = mx + b\n", 34 | "# m is slope, b is y-intercept\n", 35 | "def compute_error_for_line_given_points(b, m, points):\n", 36 | " totalError = 0\n", 37 | " for i in range(0, len(points)):\n", 38 | " x = points[i, 0]\n", 39 | " y = points[i, 1]\n", 40 | " totalError += (y - (m * x + b)) ** 2\n", 41 | " return totalError / float(len(points))\n", 42 | "\n", 43 | "def step_gradient(b_current, m_current, points, learningRate):\n", 44 | " b_gradient = 0\n", 45 | " m_gradient = 0\n", 46 | " N = float(len(points))\n", 47 | " for i in range(0, len(points)):\n", 48 | " x = points[i, 0]\n", 49 | " y = points[i, 1]\n", 50 | " b_gradient += -(2/N) * (y - ((m_current * x) + b_current))\n", 51 | " m_gradient += -(2/N) * x * (y - ((m_current * x) + b_current))\n", 52 | " new_b = b_current - (learningRate * b_gradient)\n", 53 | " new_m = m_current - (learningRate * m_gradient)\n", 54 | " return [new_b, new_m]\n", 55 | "\n", 56 | "def gradient_descent_runner(points, starting_b, starting_m, learning_rate, num_iterations):\n", 57 | " b = starting_b\n", 58 | " m = starting_m\n", 59 | " for i in range(num_iterations):\n", 60 | " b, m = step_gradient(b, m, array(points), learning_rate)\n", 61 | " return [b, m]\n", 62 | "\n", 63 | "def run():\n", 64 | " points = genfromtxt(\"data.csv\", delimiter=\",\")\n", 65 | " learning_rate = 0.0001\n", 66 | " initial_b = 0 # initial y-intercept guess\n", 67 | " initial_m = 0 # initial slope guess\n", 68 | " num_iterations = 10000\n", 69 | " print(\"Starting gradient descent at b = {0}, m = {1}, error = {2}\".format(initial_b, initial_m, compute_error_for_line_given_points(initial_b, initial_m, points)))\n", 70 | " print(\"Running...\")\n", 71 | " [b, m] = gradient_descent_runner(points, initial_b, initial_m, learning_rate, num_iterations)\n", 72 | " print(\"After {0} iterations b = {1}, m = {2}, error = {3}\".format(num_iterations, b, m, compute_error_for_line_given_points(b, m, points)))\n", 73 | "\n", 74 | "if __name__ == '__main__':\n", 75 | " run()" 76 | ] 77 | } 78 | ], 79 | "metadata": { 80 | "anaconda-cloud": {}, 81 | "kernelspec": { 82 | "display_name": "Python [conda root]", 83 | "language": "python", 84 | "name": "conda-root-py" 85 | }, 86 | "language_info": { 87 | "codemirror_mode": { 88 | "name": "ipython", 89 | "version": 3 90 | }, 91 | "file_extension": ".py", 92 | "mimetype": "text/x-python", 93 | "name": "python", 94 | "nbconvert_exporter": "python", 95 | "pygments_lexer": "ipython3", 96 | "version": "3.5.2" 97 | } 98 | }, 99 | "nbformat": 4, 100 | "nbformat_minor": 1 101 | } 102 | -------------------------------------------------------------------------------- /Math_Stat/ML_Basic_Siraj Raval.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Intro_to_Math_of_Intelligence" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": 3, 13 | "metadata": { 14 | "collapsed": false, 15 | "scrolled": true 16 | }, 17 | "outputs": [ 18 | { 19 | "name": "stdout", 20 | "output_type": "stream", 21 | "text": [ 22 | "Starting gradient descent at b = 0, m = 0, error = 5565.107834483211\n", 23 | "Running...\n", 24 | "After 10000 iterations b = 0.6078985997054931, m = 1.4675440436333027, error = 112.31533427075733\n" 25 | ] 26 | } 27 | ], 28 | "source": [ 29 | "#The optimal values of m and b can be actually calculated with way less effort than doing a linear regression. \n", 30 | "#this is just to demonstrate gradient descent\n", 31 | "\n", 32 | "from numpy import *\n", 33 | "\n", 34 | "# y = mx + b\n", 35 | "# m is slope, b is y-intercept\n", 36 | "def compute_error_for_line_given_points(b, m, points):\n", 37 | " totalError = 0\n", 38 | " for i in range(0, len(points)):\n", 39 | " x = points[i, 0]\n", 40 | " y = points[i, 1]\n", 41 | " totalError += (y - (m * x + b)) ** 2\n", 42 | " return totalError / float(len(points))\n", 43 | "\n", 44 | "def step_gradient(b_current, m_current, points, learningRate):\n", 45 | " b_gradient = 0\n", 46 | " m_gradient = 0\n", 47 | " N = float(len(points))\n", 48 | " for i in range(0, len(points)):\n", 49 | " x = points[i, 0]\n", 50 | " y = points[i, 1]\n", 51 | " b_gradient += -(2/N) * (y - ((m_current * x) + b_current))\n", 52 | " m_gradient += -(2/N) * x * (y - ((m_current * x) + b_current))\n", 53 | " new_b = b_current - (learningRate * b_gradient)\n", 54 | " new_m = m_current - (learningRate * m_gradient)\n", 55 | " return [new_b, new_m]\n", 56 | "\n", 57 | "def gradient_descent_runner(points, starting_b, starting_m, learning_rate, num_iterations):\n", 58 | " b = starting_b\n", 59 | " m = starting_m\n", 60 | " for i in range(num_iterations):\n", 61 | " b, m = step_gradient(b, m, array(points), learning_rate)\n", 62 | " return [b, m]\n", 63 | "\n", 64 | "def run():\n", 65 | " points = genfromtxt(\"data.csv\", delimiter=\",\")\n", 66 | " learning_rate = 0.0001\n", 67 | " initial_b = 0 # initial y-intercept guess\n", 68 | " initial_m = 0 # initial slope guess\n", 69 | " num_iterations = 10000\n", 70 | " print(\"Starting gradient descent at b = {0}, m = {1}, error = {2}\".format(initial_b, initial_m, compute_error_for_line_given_points(initial_b, initial_m, points)))\n", 71 | " print(\"Running...\")\n", 72 | " [b, m] = gradient_descent_runner(points, initial_b, initial_m, learning_rate, num_iterations)\n", 73 | " print(\"After {0} iterations b = {1}, m = {2}, error = {3}\".format(num_iterations, b, m, compute_error_for_line_given_points(b, m, points)))\n", 74 | "\n", 75 | "if __name__ == '__main__':\n", 76 | " run()" 77 | ] 78 | }, 79 | { 80 | "cell_type": "code", 81 | "execution_count": null, 82 | "metadata": { 83 | "collapsed": true 84 | }, 85 | "outputs": [], 86 | "source": [] 87 | } 88 | ], 89 | "metadata": { 90 | "anaconda-cloud": {}, 91 | "kernelspec": { 92 | "display_name": "Python [conda root]", 93 | "language": "python", 94 | "name": "conda-root-py" 95 | }, 96 | "language_info": { 97 | "codemirror_mode": { 98 | "name": "ipython", 99 | "version": 3 100 | }, 101 | "file_extension": ".py", 102 | "mimetype": "text/x-python", 103 | "name": "python", 104 | "nbconvert_exporter": "python", 105 | "pygments_lexer": "ipython3", 106 | "version": "3.5.2" 107 | } 108 | }, 109 | "nbformat": 4, 110 | "nbformat_minor": 1 111 | } 112 | -------------------------------------------------------------------------------- /Math_Stat/Readme.md: -------------------------------------------------------------------------------- 1 | # Math&Stat Basic 2 | 3 | # *To-Do* 4 | 5 | ## Math & Stat 6 | 7 | * [Fundamentals of Engineering Exam Review](https://www.coursera.org/learn/fe-exam/home/welcome) - Week2 & Week3 (Due July 2017) 8 | 9 | * [Data Science Math Skills](https://www.coursera.org/learn/datasciencemathskills/home/welcome) - (Due August 2017) 10 | 11 | * [Bayesian Statistics: From Concept to Data Analysis](https://www.coursera.org/learn/bayesian-statistics/home/welcome) - (Due 2017) 12 | 13 | ## Python Skills for Data (One Course per week) 14 | 15 | * Introduction to Data Visualization with Python 16 | * pandas Foundation 17 | * Maniplulating DataFrames with pandas 18 | * Merging DataFrames with pandas 19 | * Statistical Thinking in Python 1 & 2 20 | * Introduction to Databases in Python 21 | * Supervised & Unsupervised Learning in Python 22 | * Intermediate Python for Data Science 23 | 24 | 25 | 26 | # *참고자료* 27 | 28 | ## Data Preprocessing 29 | - [Chris ALBon: Python/R](https://ch/home/ryan/nlp_ryan/Readme.mdrisalbon.com/) 30 | -------------------------------------------------------------------------------- /Math_Stat/data.csv: -------------------------------------------------------------------------------- 1 | 32.502345269453031,31.70700584656992 2 | 53.426804033275019,68.77759598163891 3 | 61.530358025636438,62.562382297945803 4 | 47.475639634786098,71.546632233567777 5 | 59.813207869512318,87.230925133687393 6 | 55.142188413943821,78.211518270799232 7 | 52.211796692214001,79.64197304980874 8 | 39.299566694317065,59.171489321869508 9 | 48.10504169176825,75.331242297063056 10 | 52.550014442733818,71.300879886850353 11 | 45.419730144973755,55.165677145959123 12 | 54.351634881228918,82.478846757497919 13 | 44.164049496773352,62.008923245725825 14 | 58.16847071685779,75.392870425994957 15 | 56.727208057096611,81.43619215887864 16 | 48.955888566093719,60.723602440673965 17 | 44.687196231480904,82.892503731453715 18 | 60.297326851333466,97.379896862166078 19 | 45.618643772955828,48.847153317355072 20 | 38.816817537445637,56.877213186268506 21 | 66.189816606752601,83.878564664602763 22 | 65.41605174513407,118.59121730252249 23 | 47.48120860786787,57.251819462268969 24 | 41.57564261748702,51.391744079832307 25 | 51.84518690563943,75.380651665312357 26 | 59.370822011089523,74.765564032151374 27 | 57.31000343834809,95.455052922574737 28 | 63.615561251453308,95.229366017555307 29 | 46.737619407976972,79.052406169565586 30 | 50.556760148547767,83.432071421323712 31 | 52.223996085553047,63.358790317497878 32 | 35.567830047746632,41.412885303700563 33 | 42.436476944055642,76.617341280074044 34 | 58.16454011019286,96.769566426108199 35 | 57.504447615341789,74.084130116602523 36 | 45.440530725319981,66.588144414228594 37 | 61.89622268029126,77.768482417793024 38 | 33.093831736163963,50.719588912312084 39 | 36.436009511386871,62.124570818071781 40 | 37.675654860850742,60.810246649902211 41 | 44.555608383275356,52.682983366387781 42 | 43.318282631865721,58.569824717692867 43 | 50.073145632289034,82.905981485070512 44 | 43.870612645218372,61.424709804339123 45 | 62.997480747553091,115.24415280079529 46 | 32.669043763467187,45.570588823376085 47 | 40.166899008703702,54.084054796223612 48 | 53.575077531673656,87.994452758110413 49 | 33.864214971778239,52.725494375900425 50 | 64.707138666121296,93.576118692658241 51 | 38.119824026822805,80.166275447370964 52 | 44.502538064645101,65.101711570560326 53 | 40.599538384552318,65.562301260400375 54 | 41.720676356341293,65.280886920822823 55 | 51.088634678336796,73.434641546324301 56 | 55.078095904923202,71.13972785861894 57 | 41.377726534895203,79.102829683549857 58 | 62.494697427269791,86.520538440347153 59 | 49.203887540826003,84.742697807826218 60 | 41.102685187349664,59.358850248624933 61 | 41.182016105169822,61.684037524833627 62 | 50.186389494880601,69.847604158249183 63 | 52.378446219236217,86.098291205774103 64 | 50.135485486286122,59.108839267699643 65 | 33.644706006191782,69.89968164362763 66 | 39.557901222906828,44.862490711164398 67 | 56.130388816875467,85.498067778840223 68 | 57.362052133238237,95.536686846467219 69 | 60.269214393997906,70.251934419771587 70 | 35.678093889410732,52.721734964774988 71 | 31.588116998132829,50.392670135079896 72 | 53.66093226167304,63.642398775657753 73 | 46.682228649471917,72.247251068662365 74 | 43.107820219102464,57.812512976181402 75 | 70.34607561504933,104.25710158543822 76 | 44.492855880854073,86.642020318822006 77 | 57.50453330326841,91.486778000110135 78 | 36.930076609191808,55.231660886212836 79 | 55.805733357942742,79.550436678507609 80 | 38.954769073377065,44.847124242467601 81 | 56.901214702247074,80.207523139682763 82 | 56.868900661384046,83.14274979204346 83 | 34.33312470421609,55.723489260543914 84 | 59.04974121466681,77.634182511677864 85 | 57.788223993230673,99.051414841748269 86 | 54.282328705967409,79.120646274680027 87 | 51.088719898979143,69.588897851118475 88 | 50.282836348230731,69.510503311494389 89 | 44.211741752090113,73.687564318317285 90 | 38.005488008060688,61.366904537240131 91 | 32.940479942618296,67.170655768995118 92 | 53.691639571070056,85.668203145001542 93 | 68.76573426962166,114.85387123391394 94 | 46.230966498310252,90.123572069967423 95 | 68.319360818255362,97.919821035242848 96 | 50.030174340312143,81.536990783015028 97 | 49.239765342753763,72.111832469615663 98 | 50.039575939875988,85.232007342325673 99 | 48.149858891028863,66.224957888054632 100 | 25.128484647772304,53.454394214850524 101 | -------------------------------------------------------------------------------- /Natural Language Generation/lstm_keras_generation.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on Mon Jun 26 21:23:43 2017 5 | 6 | @author: ryan 7 | """ 8 | 9 | '''Example script to generate text from Nietzsche's writings. 10 | At least 20 epochs are required before the generated text 11 | starts sounding coherent. 12 | It is recommended to run this script on GPU, as recurrent 13 | networks are quite computationally intensive. 14 | If you try this script on new data, make sure your corpus 15 | has at least ~100k characters. ~1M is better. 16 | ''' 17 | 18 | from __future__ import print_function 19 | from keras.models import Sequential 20 | from keras.layers import Dense, Activation 21 | from keras.layers import LSTM 22 | from keras.optimizers import RMSprop 23 | from keras.utils.data_utils import get_file 24 | import numpy as np 25 | import random 26 | import sys 27 | 28 | path = get_file('nietzsche.txt', origin='https://s3.amazonaws.com/text-datasets/nietzsche.txt') 29 | text = open(path).read().lower() 30 | print('corpus length:', len(text)) 31 | 32 | chars = sorted(list(set(text))) 33 | print('total chars:', len(chars)) 34 | char_indices = dict((c, i) for i, c in enumerate(chars)) 35 | indices_char = dict((i, c) for i, c in enumerate(chars)) 36 | 37 | # cut the text in semi-redundant sequences of maxlen characters 38 | maxlen = 40 39 | step = 3 40 | sentences = [] 41 | next_chars = [] 42 | for i in range(0, len(text) - maxlen, step): 43 | sentences.append(text[i: i + maxlen]) 44 | next_chars.append(text[i + maxlen]) 45 | print('nb sequences:', len(sentences)) 46 | 47 | print('Vectorization...') 48 | X = np.zeros((len(sentences), maxlen, len(chars)), dtype=np.bool) 49 | y = np.zeros((len(sentences), len(chars)), dtype=np.bool) 50 | for i, sentence in enumerate(sentences): 51 | for t, char in enumerate(sentence): 52 | X[i, t, char_indices[char]] = 1 53 | y[i, char_indices[next_chars[i]]] = 1 54 | 55 | 56 | # build the model: a single LSTM 57 | print('Build model...') 58 | model = Sequential() 59 | model.add(LSTM(128, input_shape=(maxlen, len(chars)))) 60 | model.add(Dense(len(chars))) 61 | model.add(Activation('softmax')) 62 | 63 | optimizer = RMSprop(lr=0.01) 64 | model.compile(loss='categorical_crossentropy', optimizer=optimizer) 65 | 66 | 67 | def sample(preds, temperature=1.0): 68 | # helper function to sample an index from a probability array 69 | preds = np.asarray(preds).astype('float64') 70 | preds = np.log(preds) / temperature 71 | exp_preds = np.exp(preds) 72 | preds = exp_preds / np.sum(exp_preds) 73 | probas = np.random.multinomial(1, preds, 1) 74 | return np.argmax(probas) 75 | 76 | # train the model, output generated text after each iteration 77 | for iteration in range(1, 60): 78 | print() 79 | print('-' * 50) 80 | print('Iteration', iteration) 81 | model.fit(X, y, 82 | batch_size=128, 83 | epochs=1) 84 | 85 | start_index = random.randint(0, len(text) - maxlen - 1) 86 | 87 | for diversity in [0.2, 0.5, 1.0, 1.2]: 88 | print() 89 | print('----- diversity:', diversity) 90 | 91 | generated = '' 92 | sentence = text[start_index: start_index + maxlen] 93 | generated += sentence 94 | print('----- Generating with seed: "' + sentence + '"') 95 | sys.stdout.write(generated) 96 | 97 | for i in range(400): 98 | x = np.zeros((1, maxlen, len(chars))) 99 | for t, char in enumerate(sentence): 100 | x[0, t, char_indices[char]] = 1. 101 | 102 | preds = model.predict(x, verbose=0)[0] 103 | next_index = sample(preds, diversity) 104 | next_char = indices_char[next_index] 105 | 106 | generated += next_char 107 | sentence = sentence[1:] + next_char 108 | 109 | sys.stdout.write(next_char) 110 | sys.stdout.flush() 111 | print() -------------------------------------------------------------------------------- /Python/.ipynb_checkpoints/Python_Review-checkpoint.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# 7. 함수 이해하기" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": 1, 13 | "metadata": { 14 | "collapsed": true 15 | }, 16 | "outputs": [], 17 | "source": [ 18 | "def func(pa1, pa2):\n", 19 | " \"\"\"함수란 이런것이다.\n", 20 | " \n", 21 | " \"\"\"\n", 22 | " pa1, pa2 = pa2, pa1\n", 23 | " \n", 24 | " return pa1, pa2" 25 | ] 26 | }, 27 | { 28 | "cell_type": "code", 29 | "execution_count": 2, 30 | "metadata": { 31 | "collapsed": false, 32 | "scrolled": true 33 | }, 34 | "outputs": [ 35 | { 36 | "name": "stdout", 37 | "output_type": "stream", 38 | "text": [ 39 | "Help on function func in module __main__:\n", 40 | "\n", 41 | "func(pa1, pa2)\n", 42 | " 함수란 이런것이다.\n", 43 | "\n" 44 | ] 45 | } 46 | ], 47 | "source": [ 48 | "help(func)" 49 | ] 50 | }, 51 | { 52 | "cell_type": "markdown", 53 | "metadata": { 54 | "collapsed": true 55 | }, 56 | "source": [ 57 | "# 수정범위\n", 58 | "\n", 59 | "P6. 일급객체\n" 60 | ] 61 | }, 62 | { 63 | "cell_type": "code", 64 | "execution_count": 3, 65 | "metadata": { 66 | "collapsed": false 67 | }, 68 | "outputs": [ 69 | { 70 | "name": "stdout", 71 | "output_type": "stream", 72 | "text": [ 73 | "{'count': 0}\n", 74 | "call count 1\n", 75 | "20\n", 76 | "call count 2\n", 77 | "22\n", 78 | "{'count': 2}\n" 79 | ] 80 | } 81 | ], 82 | "source": [ 83 | "def add(x,y):\n", 84 | " add.count += 1\n", 85 | " print(\"call count\" , add.count)\n", 86 | " return x+y\n", 87 | "\n", 88 | "add.count = 0\n", 89 | "print(add.__dict__)\n", 90 | "print(add(10,10))\n", 91 | "print(add(11,11))\n", 92 | "print(add.__dict__)" 93 | ] 94 | }, 95 | { 96 | "cell_type": "code", 97 | "execution_count": 7, 98 | "metadata": { 99 | "collapsed": false 100 | }, 101 | "outputs": [ 102 | { 103 | "name": "stdout", 104 | "output_type": "stream", 105 | "text": [ 106 | "\n", 107 | "\n" 108 | ] 109 | } 110 | ], 111 | "source": [ 112 | "def add(x,y):\n", 113 | " return x+y\n", 114 | "\n", 115 | "print(globals()[\"add\"])\n", 116 | "print(add)" 117 | ] 118 | }, 119 | { 120 | "cell_type": "code", 121 | "execution_count": 8, 122 | "metadata": { 123 | "collapsed": false 124 | }, 125 | "outputs": [ 126 | { 127 | "name": "stdout", 128 | "output_type": "stream", 129 | "text": [ 130 | "10\n" 131 | ] 132 | } 133 | ], 134 | "source": [ 135 | "def func(func, x, y):\n", 136 | " return func(x, y)\n", 137 | "\n", 138 | "print(func(add,5,5))" 139 | ] 140 | }, 141 | { 142 | "cell_type": "code", 143 | "execution_count": 10, 144 | "metadata": { 145 | "collapsed": false 146 | }, 147 | "outputs": [ 148 | { 149 | "name": "stdout", 150 | "output_type": "stream", 151 | "text": [ 152 | "dahl\n", 153 | "dahl\n" 154 | ] 155 | } 156 | ], 157 | "source": [ 158 | "#함수에서 클레스 접근 예시\n", 159 | "class A:\n", 160 | " name = \"dahl\"\n", 161 | " \n", 162 | "def getName():\n", 163 | " return A.name\n", 164 | "\n", 165 | "print(getName())\n", 166 | "\n", 167 | "#함수에서 인스턴스 접근 예시\n", 168 | "#instance = 변수 in class\n", 169 | "class Person:\n", 170 | " def __init__(self,name):\n", 171 | " self.name = name\n", 172 | " \n", 173 | "def func(obj):\n", 174 | " return obj.name\n", 175 | "\n", 176 | "p = Person(\"dahl\")\n", 177 | "print(func(p))" 178 | ] 179 | }, 180 | { 181 | "cell_type": "code", 182 | "execution_count": 13, 183 | "metadata": { 184 | "collapsed": false 185 | }, 186 | "outputs": [ 187 | { 188 | "name": "stdout", 189 | "output_type": "stream", 190 | "text": [ 191 | "10\n", 192 | "20\n" 193 | ] 194 | } 195 | ], 196 | "source": [ 197 | "#익명함수\n", 198 | "fn = lambda x : x\n", 199 | "print(fn(10))\n", 200 | "\n", 201 | "#익명 함수도 객체임\n", 202 | "\n", 203 | "#익명 함수에서 함수 적용\n", 204 | "fn1 = lambda x : add(x,x)\n", 205 | "print(fn1(10))\n", 206 | "\n", 207 | "#익명함수 내의 파라메터 초기값 처리\n", 208 | "x = 20\n", 209 | "lam = lambda x=x : list(x+n for n in range(3))" 210 | ] 211 | } 212 | ], 213 | "metadata": { 214 | "anaconda-cloud": {}, 215 | "kernelspec": { 216 | "display_name": "Python [conda root]", 217 | "language": "python", 218 | "name": "conda-root-py" 219 | }, 220 | "language_info": { 221 | "codemirror_mode": { 222 | "name": "ipython", 223 | "version": 3 224 | }, 225 | "file_extension": ".py", 226 | "mimetype": "text/x-python", 227 | "name": "python", 228 | "nbconvert_exporter": "python", 229 | "pygments_lexer": "ipython3", 230 | "version": "3.5.2" 231 | } 232 | }, 233 | "nbformat": 4, 234 | "nbformat_minor": 1 235 | } 236 | -------------------------------------------------------------------------------- /Python/Cheat_Sheet.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on Sun Jun 11 19:26:51 2017 5 | 6 | @author: ryan 7 | """ 8 | 9 | #-----------------Sklearn-------------------- 10 | #1. Divide train and test data 11 | from sklearn.model_selection import train_test_split 12 | x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.33, random_state=42) 13 | 14 | #---------------Keras---------------- 15 | 16 | #Create the plot 17 | import matplotlib.pyplot as plt 18 | plt.plot(model['acc'], 'r') 19 | plt.xlabel('Epochs') 20 | plt.ylabel('acc') 21 | plt.show() 22 | 23 | #Save Model 24 | from keras.models import load_model 25 | model.save('domain_classify.h5') 26 | 27 | #Load Model 28 | my_model = load_model('domain_classify.h5') 29 | 30 | #Use Model (Make sure input as same dim.) 31 | my_model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy']) 32 | my_model.predict_classes(np.array(sent).shape) 33 | 34 | 35 | -------------------------------------------------------------------------------- /Python/Decorator.py: -------------------------------------------------------------------------------- 1 | #11월 20일, Unit 44 데코레이터 2 | #https://dojang.io/mod/page/view.php?id=1131 3 | 4 | #데코레이터는 함수를 수정하지 않은 상태에서 추가 기능을 구현하기 위해 사용 5 | 6 | class Calc: 7 | @staticmethod 8 | def add(a,b): 9 | print(a,b) 10 | 11 | #함수의 시작과 끝을 출력하는 데코레이터 12 | def trace(func): 13 | def wrapper(): 14 | print(func.__name__, '함수 시작') 15 | func() 16 | print(func.__name__, '함수 끝') 17 | return wrapper 18 | 19 | @trace 20 | def hello(): 21 | print('hello') 22 | 23 | @trace 24 | def world(): 25 | print('world') 26 | 27 | # trace_hello = trace(hello) #데코레이터에 호출할 함수 넣기 28 | # trace_hello() #반환된 함수를 호출 29 | # trace_world = trace(world) 30 | # trace_world() 31 | 32 | hello() 33 | world() -------------------------------------------------------------------------------- /Python/Visualization/.ipynb_checkpoints/Bokeh-checkpoint.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [], 3 | "metadata": {}, 4 | "nbformat": 4, 5 | "nbformat_minor": 2 6 | } 7 | -------------------------------------------------------------------------------- /Python/Visualization/Bokeh.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": { 7 | "collapsed": true 8 | }, 9 | "outputs": [], 10 | "source": [] 11 | } 12 | ], 13 | "metadata": { 14 | "kernelspec": { 15 | "display_name": "Python 3", 16 | "language": "python", 17 | "name": "python3" 18 | }, 19 | "language_info": { 20 | "codemirror_mode": { 21 | "name": "ipython", 22 | "version": 3 23 | }, 24 | "file_extension": ".py", 25 | "mimetype": "text/x-python", 26 | "name": "python", 27 | "nbconvert_exporter": "python", 28 | "pygments_lexer": "ipython3", 29 | "version": "3.6.1" 30 | } 31 | }, 32 | "nbformat": 4, 33 | "nbformat_minor": 2 34 | } 35 | -------------------------------------------------------------------------------- /Python/attribute.py: -------------------------------------------------------------------------------- 1 | class Person: 2 | def __init__(self): 3 | self.hello = '안녕하세요.' 4 | 5 | def greeting(self): 6 | print(self.hello) 7 | 8 | james = Person() 9 | james.greeting() # 안녕하세요. -------------------------------------------------------------------------------- /Quora_insincere/.gitignore: -------------------------------------------------------------------------------- 1 | /2_NLP_Study 2 | .DS_Store 3 | .ipynb_checkpoints/ 4 | data_in/ 5 | sh/ 6 | input/ 7 | 8 | # Byte-compiled / optimized / DLL files 9 | __pycache__/ 10 | *.py[cod] 11 | *$py.class 12 | 13 | # C extensions 14 | *.so 15 | 16 | # Distribution / packaging 17 | .Python 18 | build/ 19 | develop-eggs/ 20 | dist/ 21 | downloads/ 22 | eggs/ 23 | .eggs/ 24 | lib/ 25 | lib64/ 26 | parts/ 27 | sdist/ 28 | var/ 29 | wheels/ 30 | *.egg-info/ 31 | .installed.cfg 32 | *.egg 33 | MANIFEST 34 | 35 | # PyInstaller 36 | # Usually these files are written by a python script from a template 37 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 38 | *.manifest 39 | *.spec 40 | 41 | # Installer logs 42 | pip-log.txt 43 | pip-delete-this-directory.txt 44 | 45 | # Unit test / coverage reports 46 | htmlcov/ 47 | .tox/ 48 | .coverage 49 | .coverage.* 50 | .cache 51 | nosetests.xml 52 | coverage.xml 53 | *.cover 54 | .hypothesis/ 55 | .pytest_cache/ 56 | 57 | # Translations 58 | *.mo 59 | *.pot 60 | 61 | # Django stuff: 62 | *.log 63 | local_settings.py 64 | db.sqlite3 65 | 66 | # Flask stuff: 67 | instance/ 68 | .webassets-cache 69 | 70 | # Scrapy stuff: 71 | .scrapy 72 | 73 | # Sphinx documentation 74 | docs/_build/ 75 | 76 | # PyBuilder 77 | target/ 78 | 79 | # Jupyter Notebook 80 | .ipynb_checkpoints 81 | 82 | # pyenv 83 | .python-version 84 | 85 | # celery beat schedule file 86 | celerybeat-schedule 87 | 88 | # SageMath parsed files 89 | *.sage.py 90 | 91 | # Environments 92 | .env 93 | .venv 94 | env/ 95 | venv/ 96 | ENV/ 97 | env.bak/ 98 | venv.bak/ 99 | 100 | # Spyder project settings 101 | .spyderproject 102 | .spyproject 103 | 104 | # Rope project settings 105 | .ropeproject 106 | 107 | # mkdocs documentation 108 | /site 109 | 110 | # mypy 111 | .mypy_cache/ 112 | 113 | .vscode 114 | .ipynb_checkpoints 115 | 116 | *.voc 117 | checkPoint 118 | *.log 119 | 120 | OLD/ -------------------------------------------------------------------------------- /Quora_insincere/README.md: -------------------------------------------------------------------------------- 1 | Kaggle 2 | 3 | https://www.kaggle.com/c/quora-insincere-questions-classification/data -------------------------------------------------------------------------------- /Tensorflow/.gitignore: -------------------------------------------------------------------------------- 1 | # Created by .ignore support plugin (hsz.mobi) 2 | ### JetBrains template 3 | # Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio and Webstorm 4 | # Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839 5 | 6 | # User-specific stuff: 7 | .idea/**/workspace.xml 8 | .idea/**/tasks.xml 9 | .idea/dictionaries 10 | 11 | # Sensitive or high-churn files: 12 | .idea/**/dataSources/ 13 | .idea/**/dataSources.ids 14 | .idea/**/dataSources.xml 15 | .idea/**/dataSources.local.xml 16 | .idea/**/sqlDataSources.xml 17 | .idea/**/dynamic.xml 18 | .idea/**/uiDesigner.xml 19 | 20 | # Gradle: 21 | .idea/**/gradle.xml 22 | .idea/**/libraries 23 | 24 | # CMake 25 | cmake-build-debug/ 26 | 27 | # Mongo Explorer plugin: 28 | .idea/**/mongoSettings.xml 29 | 30 | ## File-based project format: 31 | *.iws 32 | 33 | ## Plugin-specific files: 34 | 35 | # IntelliJ 36 | out/ 37 | 38 | # mpeltonen/sbt-idea plugin 39 | .idea_modules/ 40 | 41 | # JIRA plugin 42 | atlassian-ide-plugin.xml 43 | 44 | # Cursive Clojure plugin 45 | .idea/replstate.xml 46 | 47 | # Crashlytics plugin (for Android Studio and IntelliJ) 48 | com_crashlytics_export_strings.xml 49 | crashlytics.properties 50 | crashlytics-build.properties 51 | fabric.properties 52 | ### Python template 53 | # Byte-compiled / optimized / DLL files 54 | __pycache__/ 55 | *.py[cod] 56 | *$py.class 57 | 58 | # C extensions 59 | *.so 60 | 61 | # Distribution / packaging 62 | .Python 63 | build/ 64 | develop-eggs/ 65 | dist/ 66 | downloads/ 67 | eggs/ 68 | .eggs/ 69 | lib/ 70 | lib64/ 71 | parts/ 72 | sdist/ 73 | var/ 74 | wheels/ 75 | *.egg-info/ 76 | .installed.cfg 77 | *.egg 78 | MANIFEST 79 | 80 | # PyInstaller 81 | # Usually these files are written by a python script from a template 82 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 83 | *.manifest 84 | *.spec 85 | 86 | # Installer logs 87 | pip-log.txt 88 | pip-delete-this-directory.txt 89 | 90 | # Unit test / coverage reports 91 | htmlcov/ 92 | .tox/ 93 | .coverage 94 | .coverage.* 95 | .cache 96 | nosetests.xml 97 | coverage.xml 98 | *.cover 99 | .hypothesis/ 100 | 101 | # Translations 102 | *.mo 103 | *.pot 104 | 105 | # Django stuff: 106 | *.log 107 | .static_storage/ 108 | .media/ 109 | local_settings.py 110 | 111 | # Flask stuff: 112 | instance/ 113 | .webassets-cache 114 | 115 | # Scrapy stuff: 116 | .scrapy 117 | 118 | # Sphinx documentation 119 | docs/_build/ 120 | 121 | # PyBuilder 122 | target/ 123 | 124 | # Jupyter Notebook 125 | .ipynb_checkpoints 126 | 127 | # pyenv 128 | .python-version 129 | 130 | # celery beat schedule file 131 | celerybeat-schedule 132 | 133 | # SageMath parsed files 134 | *.sage.py 135 | 136 | # Environments 137 | .env 138 | .venv 139 | env/ 140 | venv/ 141 | ENV/ 142 | env.bak/ 143 | venv.bak/ 144 | 145 | # Spyder project settings 146 | .spyderproject 147 | .spyproject 148 | 149 | # Rope project settings 150 | .ropeproject 151 | 152 | # mkdocs documentation 153 | /site 154 | 155 | # mypy 156 | .mypy_cache/ 157 | ### macOS template 158 | # General 159 | .DS_Store 160 | .AppleDouble 161 | .LSOverride 162 | 163 | # Icon must end with two \r 164 | Icon 165 | 166 | # Thumbnails 167 | ._* 168 | 169 | # Files that might appear in the root of a volume 170 | .DocumentRevisions-V100 171 | .fseventsd 172 | .Spotlight-V100 173 | .TemporaryItems 174 | .Trashes 175 | .VolumeIcon.icns 176 | .com.apple.timemachine.donotpresent 177 | 178 | # Directories potentially created on remote AFP share 179 | .AppleDB 180 | .AppleDesktop 181 | Network Trash Folder 182 | Temporary Items 183 | .apdisk 184 | 185 | .idea/ 186 | data_out/* 187 | checkpoint/ 188 | logs/ 189 | OLD/ 190 | practice/ 191 | scala_data_pre/ 192 | target/ 193 | .vscode/ 194 | .ipynb_checkpoints/ 195 | .DS_Store 196 | .DS_Store* 197 | my_test_model/ 198 | result/ 199 | sh/ -------------------------------------------------------------------------------- /Tensorflow/04_word2vec_eager.py: -------------------------------------------------------------------------------- 1 | """ starter code for word2vec skip-gram model with NCE loss 2 | Eager execution 3 | CS 20: "TensorFlow for Deep Learning Research" 4 | cs20.stanford.edu 5 | Chip Huyen (chiphuyen@cs.stanford.edu) & Akshay Agrawal (akshayka@cs.stanford.edu) 6 | Lecture 04 7 | """ 8 | 9 | import os 10 | os.environ['TF_CPP_MIN_LOG_LEVEL']='2' 11 | 12 | import numpy as np 13 | import tensorflow as tf 14 | import tensorflow.contrib.eager as tfe 15 | 16 | import utils 17 | import word2vec_utils 18 | 19 | tfe.enable_eager_execution() 20 | 21 | # Model hyperparameters 22 | VOCAB_SIZE = 50000 23 | BATCH_SIZE = 128 24 | EMBED_SIZE = 128 # dimension of the word embedding vectors 25 | SKIP_WINDOW = 1 # the context window 26 | NUM_SAMPLED = 64 # number of negative examples to sample 27 | LEARNING_RATE = 1.0 28 | NUM_TRAIN_STEPS = 100000 29 | VISUAL_FLD = 'visualization' 30 | SKIP_STEP = 5000 31 | 32 | # Parameters for downloading data 33 | DOWNLOAD_URL = 'http://mattmahoney.net/dc/text8.zip' 34 | EXPECTED_BYTES = 31344016 35 | 36 | class Word2Vec(object): 37 | def __init__(self, vocab_size, embed_size, num_sampled=NUM_SAMPLED): 38 | self.vocab_size = vocab_size 39 | self.num_sampled = num_sampled 40 | self.embed_matrix = tfe.Variable(tf.random_uniform( 41 | [vocab_size, embed_size])) 42 | self.nce_weight = tfe.Variable(tf.truncated_normal( 43 | [vocab_size, embed_size], 44 | stddev=1.0 / (embed_size ** 0.5))) 45 | self.nce_bias = tfe.Variable(tf.zeros([vocab_size])) 46 | 47 | def compute_loss(self, center_words, target_words): 48 | """Computes the forward pass of word2vec with the NCE loss.""" 49 | embed = tf.nn.embedding_lookup(self.embed_matrix, center_words) 50 | loss = tf.reduce_mean(tf.nn.nce_loss(weights=self.nce_weight, 51 | biases=self.nce_bias, 52 | labels=target_words, 53 | inputs=embed, 54 | num_sampled=self.num_sampled, 55 | num_classes=self.vocab_size)) 56 | return loss 57 | 58 | 59 | def gen(): 60 | yield from word2vec_utils.batch_gen(DOWNLOAD_URL, EXPECTED_BYTES, 61 | VOCAB_SIZE, BATCH_SIZE, SKIP_WINDOW, 62 | VISUAL_FLD) 63 | 64 | def main(): 65 | dataset = tf.data.Dataset.from_generator(gen, (tf.int32, tf.int32), 66 | (tf.TensorShape([BATCH_SIZE]), 67 | tf.TensorShape([BATCH_SIZE, 1]))) 68 | optimizer = tf.train.GradientDescentOptimizer(LEARNING_RATE) 69 | model = Word2Vec(vocab_size=VOCAB_SIZE, embed_size=EMBED_SIZE) 70 | grad_fn = tfe.implicit_value_and_gradients(model.compute_loss) 71 | total_loss = 0.0 # for average loss in the last SKIP_STEP steps 72 | num_train_steps = 0 73 | while num_train_steps < NUM_TRAIN_STEPS: 74 | for center_words, target_words in tfe.Iterator(dataset): 75 | if num_train_steps >= NUM_TRAIN_STEPS: 76 | break 77 | loss_batch, grads = grad_fn(center_words, target_words) 78 | total_loss += loss_batch 79 | optimizer.apply_gradients(grads) 80 | if (num_train_steps + 1) % SKIP_STEP == 0: 81 | print('Average loss at step {}: {:5.1f}'.format( 82 | num_train_steps, total_loss / SKIP_STEP)) 83 | total_loss = 0.0 84 | num_train_steps += 1 85 | 86 | 87 | if __name__ == '__main__': 88 | main() 89 | -------------------------------------------------------------------------------- /Tensorflow/TF_README.md: -------------------------------------------------------------------------------- 1 | # Deep Learning NLP Tutorial from Scratch 2 | 3 | *Feedback: sungjin7127@gmail.com* 4 | 5 | # *To-Do* 6 | 7 | ##Tensorflow Tutorial (1.4 or Later) 8 | 9 | * [Conversation-Tensorflow](https://github.com/DoungjunLee/conversation-tensorflow) 10 | * [Hvass Tensorflow Tutorial](https://github.com/Hvass-Labs/TensorFlow-Tutorials) 11 | -------------------------------------------------------------------------------- /Tensorflow/standford_example/02_lazy_loading.py: -------------------------------------------------------------------------------- 1 | """ Example of lazy vs normal loading 2 | Created by Chip Huyen (chiphuyen@cs.stanford.edu) 3 | CS20: "TensorFlow for Deep Learning Research" 4 | cs20.stanford.edu 5 | Lecture 02 6 | """ 7 | import os 8 | os.environ['TF_CPP_MIN_LOG_LEVEL']='2' 9 | 10 | import tensorflow as tf 11 | 12 | ######################################## 13 | ## NORMAL LOADING ## 14 | ## print out a graph with 1 Add node ## 15 | ######################################## 16 | 17 | x = tf.Variable(10, name='x') 18 | y = tf.Variable(20, name='y') 19 | z = tf.add(x, y) 20 | 21 | with tf.Session() as sess: 22 | sess.run(tf.global_variables_initializer()) 23 | writer = tf.summary.FileWriter('graphs/normal_loading', sess.graph) 24 | for _ in range(10): 25 | sess.run(z) 26 | print(tf.get_default_graph().as_graph_def()) 27 | writer.close() 28 | 29 | ######################################## 30 | ## LAZY LOADING ## 31 | ## print out a graph with 10 Add nodes## 32 | ######################################## 33 | 34 | x = tf.Variable(10, name='x') 35 | y = tf.Variable(20, name='y') 36 | 37 | with tf.Session() as sess: 38 | sess.run(tf.global_variables_initializer()) 39 | writer = tf.summary.FileWriter('graphs/lazy_loading', sess.graph) 40 | for _ in range(10): 41 | sess.run(tf.add(x, y)) 42 | print(tf.get_default_graph().as_graph_def()) 43 | writer.close() -------------------------------------------------------------------------------- /Tensorflow/standford_example/02_placeholder.py: -------------------------------------------------------------------------------- 1 | """ Placeholder and feed_dict example 2 | Created by Chip Huyen (chiphuyen@cs.stanford.edu) 3 | CS20: "TensorFlow for Deep Learning Research" 4 | cs20.stanford.edu 5 | Lecture 02 6 | """ 7 | import os 8 | os.environ['TF_CPP_MIN_LOG_LEVEL']='2' 9 | 10 | import tensorflow as tf 11 | 12 | # Example 1: feed_dict with placeholder 13 | 14 | # a is a placeholderfor a vector of 3 elements, type tf.float32 15 | a = tf.placeholder(tf.float32, shape=[3]) 16 | b = tf.constant([5, 5, 5], tf.float32) 17 | 18 | # use the placeholder as you would a constant 19 | c = a + b # short for tf.add(a, b) 20 | 21 | writer = tf.summary.FileWriter('graphs/placeholders', tf.get_default_graph()) 22 | with tf.Session() as sess: 23 | # compute the value of c given the value of a is [1, 2, 3] 24 | print(sess.run(c, {a: [1, 2, 3]})) # [6. 7. 8.] 25 | writer.close() 26 | 27 | 28 | # Example 2: feed_dict with variables 29 | a = tf.add(2, 5) 30 | b = tf.multiply(a, 3) 31 | 32 | with tf.Session() as sess: 33 | print(sess.run(b)) # >> 21 34 | # compute the value of b given the value of a is 15 35 | print(sess.run(b, feed_dict={a: 15})) # >> 45 -------------------------------------------------------------------------------- /Tensorflow/standford_example/02_simple_tf.py: -------------------------------------------------------------------------------- 1 | """ Simple TensorFlow's ops 2 | Created by Chip Huyen (chiphuyen@cs.stanford.edu) 3 | CS20: "TensorFlow for Deep Learning Research" 4 | cs20.stanford.edu 5 | """ 6 | import os 7 | os.environ['TF_CPP_MIN_LOG_LEVEL']='2' 8 | 9 | import numpy as np 10 | import tensorflow as tf 11 | 12 | # Example 1: Simple ways to create log file writer 13 | a = tf.constant(2, name='a') 14 | b = tf.constant(3, name='b') 15 | x = tf.add(a, b, name='add') 16 | writer = tf.summary.FileWriter('./graphs/simple', tf.get_default_graph()) 17 | with tf.Session() as sess: 18 | # writer = tf.summary.FileWriter('./graphs', sess.graph) 19 | print(sess.run(x)) 20 | writer.close() # close the writer when you’re done using it 21 | 22 | # Example 2: The wonderful wizard of div 23 | a = tf.constant([2, 2], name='a') 24 | b = tf.constant([[0, 1], [2, 3]], name='b') 25 | 26 | with tf.Session() as sess: 27 | print(sess.run(tf.div(b, a))) 28 | print(sess.run(tf.divide(b, a))) 29 | print(sess.run(tf.truediv(b, a))) 30 | print(sess.run(tf.floordiv(b, a))) 31 | # print(sess.run(tf.realdiv(b, a))) 32 | print(sess.run(tf.truncatediv(b, a))) 33 | print(sess.run(tf.floor_div(b, a))) 34 | 35 | # Example 3: multiplying tensors 36 | a = tf.constant([10, 20], name='a') 37 | b = tf.constant([2, 3], name='b') 38 | 39 | with tf.Session() as sess: 40 | print(sess.run(tf.multiply(a, b))) 41 | print(sess.run(tf.tensordot(a, b, 1))) 42 | 43 | # Example 4: Python native type 44 | t_0 = 19 45 | x = tf.zeros_like(t_0) # ==> 0 46 | y = tf.ones_like(t_0) # ==> 1 47 | 48 | t_1 = ['apple', 'peach', 'banana'] 49 | x = tf.zeros_like(t_1) # ==> ['' '' ''] 50 | # y = tf.ones_like(t_1) # ==> TypeError: Expected string, got 1 of type 'int' instead. 51 | 52 | t_2 = [[True, False, False], 53 | [False, False, True], 54 | [False, True, False]] 55 | x = tf.zeros_like(t_2) # ==> 3x3 tensor, all elements are False 56 | y = tf.ones_like(t_2) # ==> 3x3 tensor, all elements are True 57 | 58 | print(tf.int32.as_numpy_dtype()) 59 | 60 | # Example 5: printing your graph's definition 61 | my_const = tf.constant([1.0, 2.0], name='my_const') 62 | print(tf.get_default_graph().as_graph_def()) -------------------------------------------------------------------------------- /Tensorflow/standford_example/02_variables.py: -------------------------------------------------------------------------------- 1 | """ Variable exmaples 2 | Created by Chip Huyen (chiphuyen@cs.stanford.edu) 3 | CS20: "TensorFlow for Deep Learning Research" 4 | cs20.stanford.edu 5 | Lecture 02 6 | """ 7 | import os 8 | os.environ['TF_CPP_MIN_LOG_LEVEL']='2' 9 | 10 | import numpy as np 11 | import tensorflow as tf 12 | 13 | # Example 1: creating variables 14 | s = tf.Variable(2, name='scalar') 15 | m = tf.Variable([[0, 1], [2, 3]], name='matrix') 16 | W = tf.Variable(tf.zeros([784,10]), name='big_matrix') 17 | V = tf.Variable(tf.truncated_normal([784, 10]), name='normal_matrix') 18 | 19 | s = tf.get_variable('scalar', initializer=tf.constant(2)) 20 | m = tf.get_variable('matrix', initializer=tf.constant([[0, 1], [2, 3]])) 21 | W = tf.get_variable('big_matrix', shape=(784, 10), initializer=tf.zeros_initializer()) 22 | V = tf.get_variable('normal_matrix', shape=(784, 10), initializer=tf.truncated_normal_initializer()) 23 | 24 | with tf.Session() as sess: 25 | sess.run(tf.global_variables_initializer()) 26 | print(V.eval()) 27 | 28 | # Example 2: assigning values to variables 29 | W = tf.Variable(10) 30 | W.assign(100) 31 | with tf.Session() as sess: 32 | sess.run(W.initializer) 33 | print(sess.run(W)) # >> 10 34 | 35 | W = tf.Variable(10) 36 | assign_op = W.assign(100) 37 | with tf.Session() as sess: 38 | sess.run(assign_op) 39 | print(W.eval()) # >> 100 40 | 41 | # create a variable whose original value is 2 42 | a = tf.get_variable('scalar', initializer=tf.constant(2)) 43 | a_times_two = a.assign(a * 2) 44 | with tf.Session() as sess: 45 | sess.run(tf.global_variables_initializer()) 46 | sess.run(a_times_two) # >> 4 47 | sess.run(a_times_two) # >> 8 48 | sess.run(a_times_two) # >> 16 49 | 50 | W = tf.Variable(10) 51 | with tf.Session() as sess: 52 | sess.run(W.initializer) 53 | print(sess.run(W.assign_add(10))) # >> 20 54 | print(sess.run(W.assign_sub(2))) # >> 18 55 | 56 | # Example 3: Each session has its own copy of variable 57 | W = tf.Variable(10) 58 | sess1 = tf.Session() 59 | sess2 = tf.Session() 60 | sess1.run(W.initializer) 61 | sess2.run(W.initializer) 62 | print(sess1.run(W.assign_add(10))) # >> 20 63 | print(sess2.run(W.assign_sub(2))) # >> 8 64 | print(sess1.run(W.assign_add(100))) # >> 120 65 | print(sess2.run(W.assign_sub(50))) # >> -42 66 | sess1.close() 67 | sess2.close() 68 | 69 | # Example 4: create a variable with the initial value depending on another variable 70 | W = tf.Variable(tf.truncated_normal([700, 10])) 71 | U = tf.Variable(W * 2) -------------------------------------------------------------------------------- /Tensorflow/standford_example/03_linreg_dataset.py: -------------------------------------------------------------------------------- 1 | """ Solution for simple linear regression example using tf.data 2 | Created by Chip Huyen (chiphuyen@cs.stanford.edu) 3 | CS20: "TensorFlow for Deep Learning Research" 4 | cs20.stanford.edu 5 | Lecture 03 6 | """ 7 | import os 8 | os.environ['TF_CPP_MIN_LOG_LEVEL']='2' 9 | import time 10 | 11 | import numpy as np 12 | import matplotlib.pyplot as plt 13 | import tensorflow as tf 14 | 15 | import utils 16 | 17 | DATA_FILE = 'data/birth_life_2010.txt' 18 | 19 | # Step 1: read in the data 20 | data, n_samples = utils.read_birth_life_data(DATA_FILE) 21 | 22 | # Step 2: create Dataset and iterator 23 | dataset = tf.data.Dataset.from_tensor_slices((data[:,0], data[:,1])) 24 | 25 | iterator = dataset.make_initializable_iterator() 26 | X, Y = iterator.get_next() 27 | 28 | # Step 3: create weight and bias, initialized to 0 29 | w = tf.get_variable('weights', initializer=tf.constant(0.0)) 30 | b = tf.get_variable('bias', initializer=tf.constant(0.0)) 31 | 32 | # Step 4: build model to predict Y 33 | Y_predicted = X * w + b 34 | 35 | # Step 5: use the square error as the loss function 36 | loss = tf.square(Y - Y_predicted, name='loss') 37 | # loss = utils.huber_loss(Y, Y_predicted) 38 | 39 | # Step 6: using gradient descent with learning rate of 0.001 to minimize loss 40 | optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.001).minimize(loss) 41 | 42 | start = time.time() 43 | with tf.Session() as sess: 44 | # Step 7: initialize the necessary variables, in this case, w and b 45 | sess.run(tf.global_variables_initializer()) 46 | writer = tf.summary.FileWriter('./graphs/linear_reg', sess.graph) 47 | 48 | # Step 8: train the model for 100 epochs 49 | for i in range(100): 50 | sess.run(iterator.initializer) # initialize the iterator 51 | total_loss = 0 52 | try: 53 | while True: 54 | _, l = sess.run([optimizer, loss]) 55 | total_loss += l 56 | except tf.errors.OutOfRangeError: 57 | pass 58 | 59 | print('Epoch {0}: {1}'.format(i, total_loss/n_samples)) 60 | 61 | # close the writer when you're done using it 62 | writer.close() 63 | 64 | # Step 9: output the values of w and b 65 | w_out, b_out = sess.run([w, b]) 66 | print('w: %f, b: %f' %(w_out, b_out)) 67 | print('Took: %f seconds' %(time.time() - start)) 68 | 69 | # plot the results 70 | plt.plot(data[:,0], data[:,1], 'bo', label='Real data') 71 | plt.plot(data[:,0], data[:,0] * w_out + b_out, 'r', label='Predicted data with squared error') 72 | # plt.plot(data[:,0], data[:,0] * (-5.883589) + 85.124306, 'g', label='Predicted data with Huber loss') 73 | plt.legend() 74 | plt.show() -------------------------------------------------------------------------------- /Tensorflow/standford_example/03_linreg_placeholder.py: -------------------------------------------------------------------------------- 1 | """ Solution for simple linear regression example using placeholders 2 | Created by Chip Huyen (chiphuyen@cs.stanford.edu) 3 | CS20: "TensorFlow for Deep Learning Research" 4 | cs20.stanford.edu 5 | Lecture 03 6 | """ 7 | import os 8 | os.environ['TF_CPP_MIN_LOG_LEVEL']='2' 9 | import time 10 | 11 | import numpy as np 12 | import matplotlib.pyplot as plt 13 | import tensorflow as tf 14 | 15 | import utils 16 | 17 | DATA_FILE = 'data/birth_life_2010.txt' 18 | 19 | # Step 1: read in data from the .txt file 20 | data, n_samples = utils.read_birth_life_data(DATA_FILE) 21 | 22 | # Step 2: create placeholders for X (birth rate) and Y (life expectancy) 23 | X = tf.placeholder(tf.float32, name='X') 24 | Y = tf.placeholder(tf.float32, name='Y') 25 | 26 | # Step 3: create weight and bias, initialized to 0 27 | w = tf.get_variable('weights', initializer=tf.constant(0.0)) 28 | b = tf.get_variable('bias', initializer=tf.constant(0.0)) 29 | 30 | # Step 4: build model to predict Y 31 | Y_predicted = w * X + b 32 | 33 | # Step 5: use the squared error as the loss function 34 | # you can use either mean squared error or Huber loss 35 | loss = tf.square(Y - Y_predicted, name='loss') 36 | # loss = utils.huber_loss(Y, Y_predicted) 37 | 38 | # Step 6: using gradient descent with learning rate of 0.001 to minimize loss 39 | optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.001).minimize(loss) 40 | 41 | 42 | start = time.time() 43 | writer = tf.summary.FileWriter('./graphs/linear_reg', tf.get_default_graph()) 44 | with tf.Session() as sess: 45 | # Step 7: initialize the necessary variables, in this case, w and b 46 | sess.run(tf.global_variables_initializer()) 47 | 48 | # Step 8: train the model for 100 epochs 49 | for i in range(100): 50 | total_loss = 0 51 | for x, y in data: 52 | # Session execute optimizer and fetch values of loss 53 | _, l = sess.run([optimizer, loss], feed_dict={X: x, Y:y}) 54 | total_loss += l 55 | print('Epoch {0}: {1}'.format(i, total_loss/n_samples)) 56 | 57 | # close the writer when you're done using it 58 | writer.close() 59 | 60 | # Step 9: output the values of w and b 61 | w_out, b_out = sess.run([w, b]) 62 | 63 | print('Took: %f seconds' %(time.time() - start)) 64 | 65 | # plot the results 66 | plt.plot(data[:,0], data[:,1], 'bo', label='Real data') 67 | plt.plot(data[:,0], data[:,0] * w_out + b_out, 'r', label='Predicted data') 68 | plt.legend() 69 | plt.show() -------------------------------------------------------------------------------- /Tensorflow/standford_example/03_linreg_starter.py: -------------------------------------------------------------------------------- 1 | """ Starter code for simple linear regression example using placeholders 2 | Created by Chip Huyen (huyenn@cs.stanford.edu) 3 | CS20: "TensorFlow for Deep Learning Research" 4 | cs20.stanford.edu 5 | Lecture 03 6 | """ 7 | import os 8 | os.environ['TF_CPP_MIN_LOG_LEVEL']='2' 9 | import time 10 | 11 | import numpy as np 12 | import matplotlib.pyplot as plt 13 | import tensorflow as tf 14 | 15 | import utils 16 | 17 | DATA_FILE = 'data/birth_life_2010.txt' 18 | 19 | # Step 1: read in data from the .txt file 20 | data, n_samples = utils.read_birth_life_data(DATA_FILE) 21 | 22 | # Step 2: create placeholders for X (birth rate) and Y (life expectancy) 23 | # Remember both X and Y are scalars with type float 24 | X, Y = None, None 25 | ############################# 26 | ########## TO DO ############ 27 | ############################# 28 | 29 | # Step 3: create weight and bias, initialized to 0.0 30 | # Make sure to use tf.get_variable 31 | w, b = None, None 32 | ############################# 33 | ########## TO DO ############ 34 | ############################# 35 | 36 | # Step 4: build model to predict Y 37 | # e.g. how would you derive at Y_predicted given X, w, and b 38 | Y_predicted = None 39 | ############################# 40 | ########## TO DO ############ 41 | ############################# 42 | 43 | # Step 5: use the square error as the loss function 44 | loss = None 45 | ############################# 46 | ########## TO DO ############ 47 | ############################# 48 | 49 | # Step 6: using gradient descent with learning rate of 0.001 to minimize loss 50 | optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.001).minimize(loss) 51 | 52 | start = time.time() 53 | 54 | # Create a filewriter to write the model's graph to TensorBoard 55 | ############################# 56 | ########## TO DO ############ 57 | ############################# 58 | 59 | with tf.Session() as sess: 60 | # Step 7: initialize the necessary variables, in this case, w and b 61 | ############################# 62 | ########## TO DO ############ 63 | ############################# 64 | 65 | # Step 8: train the model for 100 epochs 66 | for i in range(100): 67 | total_loss = 0 68 | for x, y in data: 69 | # Execute train_op and get the value of loss. 70 | # Don't forget to feed in data for placeholders 71 | _, loss = ########## TO DO ############ 72 | total_loss += loss 73 | 74 | print('Epoch {0}: {1}'.format(i, total_loss/n_samples)) 75 | 76 | # close the writer when you're done using it 77 | ############################# 78 | ########## TO DO ############ 79 | ############################# 80 | writer.close() 81 | 82 | # Step 9: output the values of w and b 83 | w_out, b_out = None, None 84 | ############################# 85 | ########## TO DO ############ 86 | ############################# 87 | 88 | print('Took: %f seconds' %(time.time() - start)) 89 | 90 | # uncomment the following lines to see the plot 91 | # plt.plot(data[:,0], data[:,1], 'bo', label='Real data') 92 | # plt.plot(data[:,0], data[:,0] * w_out + b_out, 'r', label='Predicted data') 93 | # plt.legend() 94 | # plt.show() -------------------------------------------------------------------------------- /Tensorflow/standford_example/03_logreg.py: -------------------------------------------------------------------------------- 1 | """ Solution for simple logistic regression model for MNIST 2 | with tf.data module 3 | MNIST dataset: yann.lecun.com/exdb/mnist/ 4 | Created by Chip Huyen (chiphuyen@cs.stanford.edu) 5 | CS20: "TensorFlow for Deep Learning Research" 6 | cs20.stanford.edu 7 | Lecture 03 8 | """ 9 | import os 10 | os.environ['TF_CPP_MIN_LOG_LEVEL']='2' 11 | 12 | import numpy as np 13 | import tensorflow as tf 14 | import time 15 | 16 | import utils 17 | 18 | # Define paramaters for the model 19 | learning_rate = 0.01 20 | batch_size = 128 21 | n_epochs = 30 22 | n_train = 60000 23 | n_test = 10000 24 | 25 | # Step 1: Read in data 26 | mnist_folder = 'data/mnist' 27 | utils.download_mnist(mnist_folder) 28 | train, val, test = utils.read_mnist(mnist_folder, flatten=True) 29 | 30 | # Step 2: Create datasets and iterator 31 | train_data = tf.data.Dataset.from_tensor_slices(train) 32 | train_data = train_data.shuffle(10000) # if you want to shuffle your data 33 | train_data = train_data.batch(batch_size) 34 | 35 | test_data = tf.data.Dataset.from_tensor_slices(test) 36 | test_data = test_data.batch(batch_size) 37 | 38 | iterator = tf.data.Iterator.from_structure(train_data.output_types, 39 | train_data.output_shapes) 40 | img, label = iterator.get_next() 41 | 42 | train_init = iterator.make_initializer(train_data) # initializer for train_data 43 | test_init = iterator.make_initializer(test_data) # initializer for train_data 44 | 45 | # Step 3: create weights and bias 46 | # w is initialized to random variables with mean of 0, stddev of 0.01 47 | # b is initialized to 0 48 | # shape of w depends on the dimension of X and Y so that Y = tf.matmul(X, w) 49 | # shape of b depends on Y 50 | w = tf.get_variable(name='weights', shape=(784, 10), initializer=tf.random_normal_initializer(0, 0.01)) 51 | b = tf.get_variable(name='bias', shape=(1, 10), initializer=tf.zeros_initializer()) 52 | 53 | # Step 4: build model 54 | # the model that returns the logits. 55 | # this logits will be later passed through softmax layer 56 | logits = tf.matmul(img, w) + b 57 | 58 | # Step 5: define loss function 59 | # use cross entropy of softmax of logits as the loss function 60 | entropy = tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=label, name='entropy') 61 | loss = tf.reduce_mean(entropy, name='loss') # computes the mean over all the examples in the batch 62 | 63 | # Step 6: define training op 64 | # using gradient descent with learning rate of 0.01 to minimize loss 65 | optimizer = tf.train.AdamOptimizer(learning_rate).minimize(loss) 66 | 67 | # Step 7: calculate accuracy with test set 68 | preds = tf.nn.softmax(logits) 69 | correct_preds = tf.equal(tf.argmax(preds, 1), tf.argmax(label, 1)) 70 | accuracy = tf.reduce_sum(tf.cast(correct_preds, tf.float32)) 71 | 72 | writer = tf.summary.FileWriter('./graphs/logreg', tf.get_default_graph()) 73 | with tf.Session() as sess: 74 | 75 | start_time = time.time() 76 | sess.run(tf.global_variables_initializer()) 77 | 78 | # train the model n_epochs times 79 | for i in range(n_epochs): 80 | sess.run(train_init) # drawing samples from train_data 81 | total_loss = 0 82 | n_batches = 0 83 | try: 84 | while True: 85 | _, l = sess.run([optimizer, loss]) 86 | total_loss += l 87 | n_batches += 1 88 | except tf.errors.OutOfRangeError: 89 | pass 90 | print('Average loss epoch {0}: {1}'.format(i, total_loss/n_batches)) 91 | print('Total time: {0} seconds'.format(time.time() - start_time)) 92 | 93 | # test the model 94 | sess.run(test_init) # drawing samples from test_data 95 | total_correct_preds = 0 96 | try: 97 | while True: 98 | accuracy_batch = sess.run(accuracy) 99 | total_correct_preds += accuracy_batch 100 | except tf.errors.OutOfRangeError: 101 | pass 102 | 103 | print('Accuracy {0}'.format(total_correct_preds/n_test)) 104 | writer.close() 105 | -------------------------------------------------------------------------------- /Tensorflow/standford_example/03_logreg_placeholder.py: -------------------------------------------------------------------------------- 1 | """ Solution for simple logistic regression model for MNIST 2 | with placeholder 3 | MNIST dataset: yann.lecun.com/exdb/mnist/ 4 | Created by Chip Huyen (huyenn@cs.stanford.edu) 5 | CS20: "TensorFlow for Deep Learning Research" 6 | cs20.stanford.edu 7 | Lecture 03 8 | """ 9 | import os 10 | os.environ['TF_CPP_MIN_LOG_LEVEL']='2' 11 | 12 | import numpy as np 13 | import tensorflow as tf 14 | from tensorflow.examples.tutorials.mnist import input_data 15 | import time 16 | 17 | import utils 18 | 19 | # Define paramaters for the model 20 | learning_rate = 0.01 21 | batch_size = 128 22 | n_epochs = 30 23 | 24 | # Step 1: Read in data 25 | # using TF Learn's built in function to load MNIST data to the folder data/mnist 26 | mnist = input_data.read_data_sets('data/mnist', one_hot=True) 27 | X_batch, Y_batch = mnist.train.next_batch(batch_size) 28 | 29 | # Step 2: create placeholders for features and labels 30 | # each image in the MNIST data is of shape 28*28 = 784 31 | # therefore, each image is represented with a 1x784 tensor 32 | # there are 10 classes for each image, corresponding to digits 0 - 9. 33 | # each lable is one hot vector. 34 | X = tf.placeholder(tf.float32, [batch_size, 784], name='image') 35 | Y = tf.placeholder(tf.int32, [batch_size, 10], name='label') 36 | 37 | # Step 3: create weights and bias 38 | # w is initialized to random variables with mean of 0, stddev of 0.01 39 | # b is initialized to 0 40 | # shape of w depends on the dimension of X and Y so that Y = tf.matmul(X, w) 41 | # shape of b depends on Y 42 | w = tf.get_variable(name='weights', shape=(784, 10), initializer=tf.random_normal_initializer()) 43 | b = tf.get_variable(name='bias', shape=(1, 10), initializer=tf.zeros_initializer()) 44 | 45 | # Step 4: build model 46 | # the model that returns the logits. 47 | # this logits will be later passed through softmax layer 48 | logits = tf.matmul(X, w) + b 49 | 50 | # Step 5: define loss function 51 | # use cross entropy of softmax of logits as the loss function 52 | entropy = tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=Y, name='loss') 53 | loss = tf.reduce_mean(entropy) # computes the mean over all the examples in the batch 54 | # loss = tf.reduce_mean(-tf.reduce_sum(tf.nn.softmax(logits) * tf.log(Y), reduction_indices=[1])) 55 | 56 | # Step 6: define training op 57 | # using gradient descent with learning rate of 0.01 to minimize loss 58 | optimizer = tf.train.AdamOptimizer(learning_rate).minimize(loss) 59 | 60 | # Step 7: calculate accuracy with test set 61 | preds = tf.nn.softmax(logits) 62 | correct_preds = tf.equal(tf.argmax(preds, 1), tf.argmax(Y, 1)) 63 | accuracy = tf.reduce_sum(tf.cast(correct_preds, tf.float32)) 64 | 65 | writer = tf.summary.FileWriter('./graphs/logreg_placeholder', tf.get_default_graph()) 66 | with tf.Session() as sess: 67 | start_time = time.time() 68 | sess.run(tf.global_variables_initializer()) 69 | n_batches = int(mnist.train.num_examples/batch_size) 70 | 71 | # train the model n_epochs times 72 | for i in range(n_epochs): 73 | total_loss = 0 74 | 75 | for j in range(n_batches): 76 | X_batch, Y_batch = mnist.train.next_batch(batch_size) 77 | _, loss_batch = sess.run([optimizer, loss], {X: X_batch, Y:Y_batch}) 78 | total_loss += loss_batch 79 | print('Average loss epoch {0}: {1}'.format(i, total_loss/n_batches)) 80 | print('Total time: {0} seconds'.format(time.time() - start_time)) 81 | 82 | # test the model 83 | n_batches = int(mnist.test.num_examples/batch_size) 84 | total_correct_preds = 0 85 | 86 | for i in range(n_batches): 87 | X_batch, Y_batch = mnist.test.next_batch(batch_size) 88 | accuracy_batch = sess.run(accuracy, {X: X_batch, Y:Y_batch}) 89 | total_correct_preds += accuracy_batch 90 | 91 | print('Accuracy {0}'.format(total_correct_preds/mnist.test.num_examples)) 92 | 93 | writer.close() 94 | -------------------------------------------------------------------------------- /Tensorflow/standford_example/03_logreg_starter.py: -------------------------------------------------------------------------------- 1 | """ Starter code for simple logistic regression model for MNIST 2 | with tf.data module 3 | MNIST dataset: yann.lecun.com/exdb/mnist/ 4 | Created by Chip Huyen (chiphuyen@cs.stanford.edu) 5 | CS20: "TensorFlow for Deep Learning Research" 6 | cs20.stanford.edu 7 | Lecture 03 8 | """ 9 | import os 10 | os.environ['TF_CPP_MIN_LOG_LEVEL']='2' 11 | 12 | import numpy as np 13 | import tensorflow as tf 14 | import time 15 | 16 | import utils 17 | 18 | # Define paramaters for the model 19 | learning_rate = 0.01 20 | batch_size = 128 21 | n_epochs = 30 22 | n_train = 60000 23 | n_test = 10000 24 | 25 | # Step 1: Read in data 26 | mnist_folder = 'data/mnist' 27 | utils.download_mnist(mnist_folder) 28 | train, val, test = utils.read_mnist(mnist_folder, flatten=True) 29 | 30 | # Step 2: Create datasets and iterator 31 | # create training Dataset and batch it 32 | train_data = tf.data.Dataset.from_tensor_slices(train) 33 | train_data = train_data.shuffle(10000) # if you want to shuffle your data 34 | train_data = train_data.batch(batch_size) 35 | 36 | # create testing Dataset and batch it 37 | test_data = None 38 | ############################# 39 | ########## TO DO ############ 40 | ############################# 41 | 42 | 43 | # create one iterator and initialize it with different datasets 44 | iterator = tf.data.Iterator.from_structure(train_data.output_types, 45 | train_data.output_shapes) 46 | img, label = iterator.get_next() 47 | 48 | train_init = iterator.make_initializer(train_data) # initializer for train_data 49 | test_init = iterator.make_initializer(test_data) # initializer for train_data 50 | 51 | # Step 3: create weights and bias 52 | # w is initialized to random variables with mean of 0, stddev of 0.01 53 | # b is initialized to 0 54 | # shape of w depends on the dimension of X and Y so that Y = tf.matmul(X, w) 55 | # shape of b depends on Y 56 | w, b = None, None 57 | ############################# 58 | ########## TO DO ############ 59 | ############################# 60 | 61 | 62 | # Step 4: build model 63 | # the model that returns the logits. 64 | # this logits will be later passed through softmax layer 65 | logits = None 66 | ############################# 67 | ########## TO DO ############ 68 | ############################# 69 | 70 | 71 | # Step 5: define loss function 72 | # use cross entropy of softmax of logits as the loss function 73 | loss = None 74 | ############################# 75 | ########## TO DO ############ 76 | ############################# 77 | 78 | 79 | # Step 6: define optimizer 80 | # using Adamn Optimizer with pre-defined learning rate to minimize loss 81 | optimizer = None 82 | ############################# 83 | ########## TO DO ############ 84 | ############################# 85 | 86 | 87 | # Step 7: calculate accuracy with test set 88 | preds = tf.nn.softmax(logits) 89 | correct_preds = tf.equal(tf.argmax(preds, 1), tf.argmax(label, 1)) 90 | accuracy = tf.reduce_sum(tf.cast(correct_preds, tf.float32)) 91 | 92 | writer = tf.summary.FileWriter('./graphs/logreg', tf.get_default_graph()) 93 | with tf.Session() as sess: 94 | 95 | start_time = time.time() 96 | sess.run(tf.global_variables_initializer()) 97 | 98 | # train the model n_epochs times 99 | for i in range(n_epochs): 100 | sess.run(train_init) # drawing samples from train_data 101 | total_loss = 0 102 | n_batches = 0 103 | try: 104 | while True: 105 | _, l = sess.run([optimizer, loss]) 106 | total_loss += l 107 | n_batches += 1 108 | except tf.errors.OutOfRangeError: 109 | pass 110 | print('Average loss epoch {0}: {1}'.format(i, total_loss/n_batches)) 111 | print('Total time: {0} seconds'.format(time.time() - start_time)) 112 | 113 | # test the model 114 | sess.run(test_init) # drawing samples from test_data 115 | total_correct_preds = 0 116 | try: 117 | while True: 118 | accuracy_batch = sess.run(accuracy) 119 | total_correct_preds += accuracy_batch 120 | except tf.errors.OutOfRangeError: 121 | pass 122 | 123 | print('Accuracy {0}'.format(total_correct_preds/n_test)) 124 | writer.close() -------------------------------------------------------------------------------- /Tensorflow/standford_example/04_linreg_eager.py: -------------------------------------------------------------------------------- 1 | """ Starter code for a simple regression example using eager execution. 2 | Created by Akshay Agrawal (akshayka@cs.stanford.edu) 3 | CS20: "TensorFlow for Deep Learning Research" 4 | cs20.stanford.edu 5 | Lecture 04 6 | """ 7 | import time 8 | 9 | import tensorflow as tf 10 | import tensorflow.contrib.eager as tfe 11 | import matplotlib.pyplot as plt 12 | 13 | import utils 14 | 15 | DATA_FILE = 'data/birth_life_2010.txt' 16 | 17 | # In order to use eager execution, `tfe.enable_eager_execution()` must be 18 | # called at the very beginning of a TensorFlow program. 19 | tfe.enable_eager_execution() 20 | 21 | # Read the data into a dataset. 22 | data, n_samples = utils.read_birth_life_data(DATA_FILE) 23 | dataset = tf.data.Dataset.from_tensor_slices((data[:,0], data[:,1])) 24 | 25 | # Create variables. 26 | w = tfe.Variable(0.0) 27 | b = tfe.Variable(0.0) 28 | 29 | # Define the linear predictor. 30 | def prediction(x): 31 | return x * w + b 32 | 33 | # Define loss functions of the form: L(y, y_predicted) 34 | def squared_loss(y, y_predicted): 35 | return (y - y_predicted) ** 2 36 | 37 | def huber_loss(y, y_predicted, m=1.0): 38 | """Huber loss.""" 39 | t = y - y_predicted 40 | # Note that enabling eager execution lets you use Python control flow and 41 | # specificy dynamic TensorFlow computations. Contrast this implementation 42 | # to the graph-construction one found in `utils`, which uses `tf.cond`. 43 | return t ** 2 if tf.abs(t) <= m else m * (2 * tf.abs(t) - m) 44 | 45 | def train(loss_fn): 46 | """Train a regression model evaluated using `loss_fn`.""" 47 | print('Training; loss function: ' + loss_fn.__name__) 48 | optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.01) 49 | 50 | # Define the function through which to differentiate. 51 | def loss_for_example(x, y): 52 | return loss_fn(y, prediction(x)) 53 | 54 | # `grad_fn(x_i, y_i)` returns (1) the value of `loss_for_example` 55 | # evaluated at `x_i`, `y_i` and (2) the gradients of any variables used in 56 | # calculating it. 57 | grad_fn = tfe.implicit_value_and_gradients(loss_for_example) 58 | 59 | start = time.time() 60 | for epoch in range(100): 61 | total_loss = 0.0 62 | for x_i, y_i in tfe.Iterator(dataset): 63 | loss, gradients = grad_fn(x_i, y_i) 64 | # Take an optimization step and update variables. 65 | optimizer.apply_gradients(gradients) 66 | total_loss += loss 67 | if epoch % 10 == 0: 68 | print('Epoch {0}: {1}'.format(epoch, total_loss / n_samples)) 69 | print('Took: %f seconds' % (time.time() - start)) 70 | print('Eager execution exhibits significant overhead per operation. ' 71 | 'As you increase your batch size, the impact of the overhead will ' 72 | 'become less noticeable. Eager execution is under active development: ' 73 | 'expect performance to increase substantially in the near future!') 74 | 75 | train(huber_loss) 76 | plt.plot(data[:,0], data[:,1], 'bo') 77 | # The `.numpy()` method of a tensor retrieves the NumPy array backing it. 78 | # In future versions of eager, you won't need to call `.numpy()` and will 79 | # instead be able to, in most cases, pass Tensors wherever NumPy arrays are 80 | # expected. 81 | plt.plot(data[:,0], data[:,0] * w.numpy() + b.numpy(), 'r', 82 | label="huber regression") 83 | plt.legend() 84 | plt.show() 85 | -------------------------------------------------------------------------------- /Tensorflow/standford_example/04_linreg_eager_starter.py: -------------------------------------------------------------------------------- 1 | """ Starter code for a simple regression example using eager execution. 2 | Created by Akshay Agrawal (akshayka@cs.stanford.edu) 3 | CS20: "TensorFlow for Deep Learning Research" 4 | cs20.stanford.edu 5 | Lecture 04 6 | """ 7 | import time 8 | 9 | import tensorflow as tf 10 | import tensorflow.contrib.eager as tfe 11 | import matplotlib.pyplot as plt 12 | 13 | import utils 14 | 15 | DATA_FILE = 'data/birth_life_2010.txt' 16 | 17 | # In order to use eager execution, `tfe.enable_eager_execution()` must be 18 | # called at the very beginning of a TensorFlow program. 19 | ############################# 20 | ########## TO DO ############ 21 | ############################# 22 | 23 | # Read the data into a dataset. 24 | data, n_samples = utils.read_birth_life_data(DATA_FILE) 25 | dataset = tf.data.Dataset.from_tensor_slices((data[:,0], data[:,1])) 26 | 27 | # Create weight and bias variables, initialized to 0.0. 28 | ############################# 29 | ########## TO DO ############ 30 | ############################# 31 | w = None 32 | b = None 33 | 34 | # Define the linear predictor. 35 | def prediction(x): 36 | ############################# 37 | ########## TO DO ############ 38 | ############################# 39 | pass 40 | 41 | # Define loss functions of the form: L(y, y_predicted) 42 | def squared_loss(y, y_predicted): 43 | ############################# 44 | ########## TO DO ############ 45 | ############################# 46 | pass 47 | 48 | def huber_loss(y, y_predicted): 49 | """Huber loss with `m` set to `1.0`.""" 50 | ############################# 51 | ########## TO DO ############ 52 | ############################# 53 | pass 54 | 55 | def train(loss_fn): 56 | """Train a regression model evaluated using `loss_fn`.""" 57 | print('Training; loss function: ' + loss_fn.__name__) 58 | optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.01) 59 | 60 | # Define the function through which to differentiate. 61 | ############################# 62 | ########## TO DO ############ 63 | ############################# 64 | def loss_for_example(x, y): 65 | pass 66 | 67 | # Obtain a gradients function using `tfe.implicit_value_and_gradients`. 68 | ############################# 69 | ########## TO DO ############ 70 | ############################# 71 | grad_fn = None 72 | 73 | start = time.time() 74 | for epoch in range(100): 75 | total_loss = 0.0 76 | for x_i, y_i in tfe.Iterator(dataset): 77 | # Compute the loss and gradient, and take an optimization step. 78 | ############################# 79 | ########## TO DO ############ 80 | ############################# 81 | optimizer.apply_gradients(gradients) 82 | total_loss += loss 83 | if epoch % 10 == 0: 84 | print('Epoch {0}: {1}'.format(epoch, total_loss / n_samples)) 85 | print('Took: %f seconds' % (time.time() - start)) 86 | print('Eager execution exhibits significant overhead per operation. ' 87 | 'As you increase your batch size, the impact of the overhead will ' 88 | 'become less noticeable. Eager execution is under active development: ' 89 | 'expect performance to increase substantially in the near future!') 90 | 91 | train(huber_loss) 92 | plt.plot(data[:,0], data[:,1], 'bo') 93 | # The `.numpy()` method of a tensor retrieves the NumPy array backing it. 94 | # In future versions of eager, you won't need to call `.numpy()` and will 95 | # instead be able to, in most cases, pass Tensors wherever NumPy arrays are 96 | # expected. 97 | plt.plot(data[:,0], data[:,0] * w.numpy() + b.numpy(), 'r', 98 | label="huber regression") 99 | plt.legend() 100 | plt.show() 101 | -------------------------------------------------------------------------------- /Tensorflow/standford_example/04_word2vec.py: -------------------------------------------------------------------------------- 1 | """ starter code for word2vec skip-gram model with NCE loss 2 | CS 20: "TensorFlow for Deep Learning Research" 3 | cs20.stanford.edu 4 | Chip Huyen (chiphuyen@cs.stanford.edu) 5 | Lecture 04 6 | """ 7 | 8 | import os 9 | os.environ['TF_CPP_MIN_LOG_LEVEL']='2' 10 | 11 | import numpy as np 12 | from tensorflow.contrib.tensorboard.plugins import projector 13 | import tensorflow as tf 14 | 15 | import utils 16 | import word2vec_utils 17 | 18 | # Model hyperparameters 19 | VOCAB_SIZE = 50000 20 | BATCH_SIZE = 128 21 | EMBED_SIZE = 128 # dimension of the word embedding vectors 22 | SKIP_WINDOW = 1 # the context window 23 | NUM_SAMPLED = 64 # number of negative examples to sample 24 | LEARNING_RATE = 1.0 25 | NUM_TRAIN_STEPS = 100000 26 | VISUAL_FLD = 'visualization' 27 | SKIP_STEP = 5000 28 | 29 | # Parameters for downloading data 30 | DOWNLOAD_URL = 'http://mattmahoney.net/dc/text8.zip' 31 | EXPECTED_BYTES = 31344016 32 | NUM_VISUALIZE = 3000 # number of tokens to visualize 33 | 34 | 35 | def word2vec(dataset): 36 | """ Build the graph for word2vec model and train it """ 37 | # Step 1: get input, output from the dataset 38 | with tf.name_scope('data'): 39 | iterator = dataset.make_initializable_iterator() 40 | center_words, target_words = iterator.get_next() 41 | 42 | """ Step 2 + 3: define weights and embedding lookup. 43 | In word2vec, it's actually the weights that we care about 44 | """ 45 | with tf.name_scope('embed'): 46 | embed_matrix = tf.get_variable('embed_matrix', 47 | shape=[VOCAB_SIZE, EMBED_SIZE], 48 | initializer=tf.random_uniform_initializer()) 49 | embed = tf.nn.embedding_lookup(embed_matrix, center_words, name='embedding') 50 | 51 | # Step 4: construct variables for NCE loss and define loss function 52 | with tf.name_scope('loss'): 53 | nce_weight = tf.get_variable('nce_weight', shape=[VOCAB_SIZE, EMBED_SIZE], 54 | initializer=tf.truncated_normal_initializer(stddev=1.0 / (EMBED_SIZE ** 0.5))) 55 | nce_bias = tf.get_variable('nce_bias', initializer=tf.zeros([VOCAB_SIZE])) 56 | 57 | # define loss function to be NCE loss function 58 | loss = tf.reduce_mean(tf.nn.nce_loss(weights=nce_weight, 59 | biases=nce_bias, 60 | labels=target_words, 61 | inputs=embed, 62 | num_sampled=NUM_SAMPLED, 63 | num_classes=VOCAB_SIZE), name='loss') 64 | 65 | # Step 5: define optimizer 66 | with tf.name_scope('optimizer'): 67 | optimizer = tf.train.GradientDescentOptimizer(LEARNING_RATE).minimize(loss) 68 | 69 | utils.safe_mkdir('checkpoints') 70 | 71 | with tf.Session() as sess: 72 | sess.run(iterator.initializer) 73 | sess.run(tf.global_variables_initializer()) 74 | 75 | total_loss = 0.0 # we use this to calculate late average loss in the last SKIP_STEP steps 76 | writer = tf.summary.FileWriter('graphs/word2vec_simple', sess.graph) 77 | 78 | for index in range(NUM_TRAIN_STEPS): 79 | try: 80 | loss_batch, _ = sess.run([loss, optimizer]) 81 | total_loss += loss_batch 82 | if (index + 1) % SKIP_STEP == 0: 83 | print('Average loss at step {}: {:5.1f}'.format(index, total_loss / SKIP_STEP)) 84 | total_loss = 0.0 85 | except tf.errors.OutOfRangeError: 86 | sess.run(iterator.initializer) 87 | writer.close() 88 | 89 | def gen(): 90 | yield from word2vec_utils.batch_gen(DOWNLOAD_URL, EXPECTED_BYTES, VOCAB_SIZE, 91 | BATCH_SIZE, SKIP_WINDOW, VISUAL_FLD) 92 | 93 | def main(): 94 | dataset = tf.data.Dataset.from_generator(gen, 95 | (tf.int32, tf.int32), 96 | (tf.TensorShape([BATCH_SIZE]), tf.TensorShape([BATCH_SIZE, 1]))) 97 | word2vec(dataset) 98 | 99 | if __name__ == '__main__': 100 | main() 101 | -------------------------------------------------------------------------------- /Tensorflow/standford_example/04_word2vec_eager.py: -------------------------------------------------------------------------------- 1 | """ starter code for word2vec skip-gram model with NCE loss 2 | Eager execution 3 | CS 20: "TensorFlow for Deep Learning Research" 4 | cs20.stanford.edu 5 | Chip Huyen (chiphuyen@cs.stanford.edu) & Akshay Agrawal (akshayka@cs.stanford.edu) 6 | Lecture 04 7 | """ 8 | 9 | import os 10 | os.environ['TF_CPP_MIN_LOG_LEVEL']='2' 11 | 12 | import numpy as np 13 | import tensorflow as tf 14 | import tensorflow.contrib.eager as tfe 15 | 16 | import utils 17 | import word2vec_utils 18 | 19 | tfe.enable_eager_execution() 20 | 21 | # Model hyperparameters 22 | VOCAB_SIZE = 50000 23 | BATCH_SIZE = 128 24 | EMBED_SIZE = 128 # dimension of the word embedding vectors 25 | SKIP_WINDOW = 1 # the context window 26 | NUM_SAMPLED = 64 # number of negative examples to sample 27 | LEARNING_RATE = 1.0 28 | NUM_TRAIN_STEPS = 100000 29 | VISUAL_FLD = 'visualization' 30 | SKIP_STEP = 5000 31 | 32 | # Parameters for downloading data 33 | DOWNLOAD_URL = 'http://mattmahoney.net/dc/text8.zip' 34 | EXPECTED_BYTES = 31344016 35 | 36 | class Word2Vec(object): 37 | def __init__(self, vocab_size, embed_size, num_sampled=NUM_SAMPLED): 38 | self.vocab_size = vocab_size 39 | self.num_sampled = num_sampled 40 | self.embed_matrix = tfe.Variable(tf.random_uniform( 41 | [vocab_size, embed_size])) 42 | self.nce_weight = tfe.Variable(tf.truncated_normal( 43 | [vocab_size, embed_size], 44 | stddev=1.0 / (embed_size ** 0.5))) 45 | self.nce_bias = tfe.Variable(tf.zeros([vocab_size])) 46 | 47 | def compute_loss(self, center_words, target_words): 48 | """Computes the forward pass of word2vec with the NCE loss.""" 49 | embed = tf.nn.embedding_lookup(self.embed_matrix, center_words) 50 | loss = tf.reduce_mean(tf.nn.nce_loss(weights=self.nce_weight, 51 | biases=self.nce_bias, 52 | labels=target_words, 53 | inputs=embed, 54 | num_sampled=self.num_sampled, 55 | num_classes=self.vocab_size)) 56 | return loss 57 | 58 | 59 | def gen(): 60 | yield from word2vec_utils.batch_gen(DOWNLOAD_URL, EXPECTED_BYTES, 61 | VOCAB_SIZE, BATCH_SIZE, SKIP_WINDOW, 62 | VISUAL_FLD) 63 | 64 | def main(): 65 | dataset = tf.data.Dataset.from_generator(gen, (tf.int32, tf.int32), 66 | (tf.TensorShape([BATCH_SIZE]), 67 | tf.TensorShape([BATCH_SIZE, 1]))) 68 | optimizer = tf.train.GradientDescentOptimizer(LEARNING_RATE) 69 | model = Word2Vec(vocab_size=VOCAB_SIZE, embed_size=EMBED_SIZE) 70 | grad_fn = tfe.implicit_value_and_gradients(model.compute_loss) 71 | total_loss = 0.0 # for average loss in the last SKIP_STEP steps 72 | num_train_steps = 0 73 | while num_train_steps < NUM_TRAIN_STEPS: 74 | for center_words, target_words in tfe.Iterator(dataset): 75 | if num_train_steps >= NUM_TRAIN_STEPS: 76 | break 77 | loss_batch, grads = grad_fn(center_words, target_words) 78 | total_loss += loss_batch 79 | optimizer.apply_gradients(grads) 80 | if (num_train_steps + 1) % SKIP_STEP == 0: 81 | print('Average loss at step {}: {:5.1f}'.format( 82 | num_train_steps, total_loss / SKIP_STEP)) 83 | total_loss = 0.0 84 | num_train_steps += 1 85 | 86 | 87 | if __name__ == '__main__': 88 | main() 89 | -------------------------------------------------------------------------------- /Tensorflow/standford_example/04_word2vec_eager_starter.py: -------------------------------------------------------------------------------- 1 | """ starter code for word2vec skip-gram model with NCE loss 2 | Eager execution 3 | CS 20: "TensorFlow for Deep Learning Research" 4 | cs20.stanford.edu 5 | Chip Huyen (chiphuyen@cs.stanford.edu) & Akshay Agrawal (akshayka@cs.stanford.edu) 6 | Lecture 04 7 | """ 8 | 9 | import os 10 | os.environ['TF_CPP_MIN_LOG_LEVEL']='2' 11 | 12 | import numpy as np 13 | import tensorflow as tf 14 | import tensorflow.contrib.eager as tfe 15 | 16 | import utils 17 | import word2vec_utils 18 | 19 | # Enable eager execution! 20 | ############################# 21 | ########## TO DO ############ 22 | ############################# 23 | 24 | # Model hyperparameters 25 | VOCAB_SIZE = 50000 26 | BATCH_SIZE = 128 27 | EMBED_SIZE = 128 # dimension of the word embedding vectors 28 | SKIP_WINDOW = 1 # the context window 29 | NUM_SAMPLED = 64 # number of negative examples to sample 30 | LEARNING_RATE = 1.0 31 | NUM_TRAIN_STEPS = 100000 32 | VISUAL_FLD = 'visualization' 33 | SKIP_STEP = 5000 34 | 35 | # Parameters for downloading data 36 | DOWNLOAD_URL = 'http://mattmahoney.net/dc/text8.zip' 37 | EXPECTED_BYTES = 31344016 38 | 39 | class Word2Vec(object): 40 | def __init__(self, vocab_size, embed_size, num_sampled=NUM_SAMPLED): 41 | self.vocab_size = vocab_size 42 | self.num_sampled = num_sampled 43 | # Create the variables: an embedding matrix, nce_weight, and nce_bias 44 | ############################# 45 | ########## TO DO ############ 46 | ############################# 47 | self.embed_matrix = None 48 | self.nce_weight = None 49 | self.nce_bias = None 50 | 51 | def compute_loss(self, center_words, target_words): 52 | """Computes the forward pass of word2vec with the NCE loss.""" 53 | # Look up the embeddings for the center words 54 | ############################# 55 | ########## TO DO ############ 56 | ############################# 57 | embed = None 58 | 59 | # Compute the loss, using tf.reduce_mean and tf.nn.nce_loss 60 | ############################# 61 | ########## TO DO ############ 62 | ############################# 63 | loss = None 64 | return loss 65 | 66 | 67 | def gen(): 68 | yield from word2vec_utils.batch_gen(DOWNLOAD_URL, EXPECTED_BYTES, 69 | VOCAB_SIZE, BATCH_SIZE, SKIP_WINDOW, 70 | VISUAL_FLD) 71 | 72 | def main(): 73 | dataset = tf.data.Dataset.from_generator(gen, (tf.int32, tf.int32), 74 | (tf.TensorShape([BATCH_SIZE]), 75 | tf.TensorShape([BATCH_SIZE, 1]))) 76 | optimizer = tf.train.GradientDescentOptimizer(LEARNING_RATE) 77 | # Create the model 78 | ############################# 79 | ########## TO DO ############ 80 | ############################# 81 | model = None 82 | 83 | # Create the gradients function, using `tfe.implicit_value_and_gradients` 84 | ############################# 85 | ########## TO DO ############ 86 | ############################# 87 | grad_fn = None 88 | 89 | total_loss = 0.0 # for average loss in the last SKIP_STEP steps 90 | num_train_steps = 0 91 | while num_train_steps < NUM_TRAIN_STEPS: 92 | for center_words, target_words in tfe.Iterator(dataset): 93 | if num_train_steps >= NUM_TRAIN_STEPS: 94 | break 95 | 96 | # Compute the loss and gradients, and take an optimization step. 97 | ############################# 98 | ########## TO DO ############ 99 | ############################# 100 | 101 | if (num_train_steps + 1) % SKIP_STEP == 0: 102 | print('Average loss at step {}: {:5.1f}'.format( 103 | num_train_steps, total_loss / SKIP_STEP)) 104 | total_loss = 0.0 105 | num_train_steps += 1 106 | 107 | 108 | if __name__ == '__main__': 109 | main() 110 | -------------------------------------------------------------------------------- /Tensorflow/standford_example/05_randomization.py: -------------------------------------------------------------------------------- 1 | """ Examples to demonstrate ops level randomization 2 | CS 20: "TensorFlow for Deep Learning Research" 3 | cs20.stanford.edu 4 | Chip Huyen (chiphuyen@cs.stanford.edu) 5 | Lecture 05 6 | """ 7 | import os 8 | os.environ['TF_CPP_MIN_LOG_LEVEL']='2' 9 | 10 | import tensorflow as tf 11 | 12 | # Example 1: session keeps track of the random state 13 | c = tf.random_uniform([], -10, 10, seed=2) 14 | 15 | with tf.Session() as sess: 16 | print(sess.run(c)) # >> 3.574932 17 | print(sess.run(c)) # >> -5.9731865 18 | 19 | # Example 2: each new session will start the random state all over again. 20 | c = tf.random_uniform([], -10, 10, seed=2) 21 | 22 | with tf.Session() as sess: 23 | print(sess.run(c)) # >> 3.574932 24 | 25 | with tf.Session() as sess: 26 | print(sess.run(c)) # >> 3.574932 27 | 28 | # Example 3: with operation level random seed, each op keeps its own seed. 29 | c = tf.random_uniform([], -10, 10, seed=2) 30 | d = tf.random_uniform([], -10, 10, seed=2) 31 | 32 | with tf.Session() as sess: 33 | print(sess.run(c)) # >> 3.574932 34 | print(sess.run(d)) # >> 3.574932 35 | 36 | # Example 4: graph level random seed 37 | tf.set_random_seed(2) 38 | c = tf.random_uniform([], -10, 10) 39 | d = tf.random_uniform([], -10, 10) 40 | 41 | with tf.Session() as sess: 42 | print(sess.run(c)) # >> 9.123926 43 | print(sess.run(d)) # >> -4.5340395 44 | -------------------------------------------------------------------------------- /Tensorflow/standford_example/05_variable_sharing.py: -------------------------------------------------------------------------------- 1 | """ Examples to demonstrate variable sharing 2 | CS 20: 'TensorFlow for Deep Learning Research' 3 | cs20.stanford.edu 4 | Chip Huyen (chiphuyen@cs.stanford.edu) 5 | Lecture 05 6 | """ 7 | import os 8 | os.environ['TF_CPP_MIN_LOG_LEVEL']='2' 9 | 10 | import tensorflow as tf 11 | 12 | x1 = tf.truncated_normal([200, 100], name='x1') 13 | x2 = tf.truncated_normal([200, 100], name='x2') 14 | 15 | def two_hidden_layers(x): 16 | assert x.shape.as_list() == [200, 100] 17 | w1 = tf.Variable(tf.random_normal([100, 50]), name='h1_weights') 18 | b1 = tf.Variable(tf.zeros([50]), name='h1_biases') 19 | h1 = tf.matmul(x, w1) + b1 20 | assert h1.shape.as_list() == [200, 50] 21 | w2 = tf.Variable(tf.random_normal([50, 10]), name='h2_weights') 22 | b2 = tf.Variable(tf.zeros([10]), name='2_biases') 23 | logits = tf.matmul(h1, w2) + b2 24 | return logits 25 | 26 | def two_hidden_layers_2(x): 27 | assert x.shape.as_list() == [200, 100] 28 | w1 = tf.get_variable('h1_weights', [100, 50], initializer=tf.random_normal_initializer()) 29 | b1 = tf.get_variable('h1_biases', [50], initializer=tf.constant_initializer(0.0)) 30 | h1 = tf.matmul(x, w1) + b1 31 | assert h1.shape.as_list() == [200, 50] 32 | w2 = tf.get_variable('h2_weights', [50, 10], initializer=tf.random_normal_initializer()) 33 | b2 = tf.get_variable('h2_biases', [10], initializer=tf.constant_initializer(0.0)) 34 | logits = tf.matmul(h1, w2) + b2 35 | return logits 36 | 37 | # logits1 = two_hidden_layers(x1) 38 | # logits2 = two_hidden_layers(x2) 39 | 40 | # logits1 = two_hidden_layers_2(x1) 41 | # logits2 = two_hidden_layers_2(x2) 42 | 43 | # with tf.variable_scope('two_layers') as scope: 44 | # logits1 = two_hidden_layers_2(x1) 45 | # scope.reuse_variables() 46 | # logits2 = two_hidden_layers_2(x2) 47 | 48 | # with tf.variable_scope('two_layers') as scope: 49 | # logits1 = two_hidden_layers_2(x1) 50 | # scope.reuse_variables() 51 | # logits2 = two_hidden_layers_2(x2) 52 | 53 | def fully_connected(x, output_dim, scope): 54 | with tf.variable_scope(scope, reuse=tf.AUTO_REUSE) as scope: 55 | w = tf.get_variable('weights', [x.shape[1], output_dim], initializer=tf.random_normal_initializer()) 56 | b = tf.get_variable('biases', [output_dim], initializer=tf.constant_initializer(0.0)) 57 | return tf.matmul(x, w) + b 58 | 59 | def two_hidden_layers(x): 60 | h1 = fully_connected(x, 50, 'h1') 61 | h2 = fully_connected(h1, 10, 'h2') 62 | 63 | with tf.variable_scope('two_layers') as scope: 64 | logits1 = two_hidden_layers(x1) 65 | # scope.reuse_variables() 66 | logits2 = two_hidden_layers(x2) 67 | 68 | writer = tf.summary.FileWriter('./graphs/cool_variables', tf.get_default_graph()) 69 | writer.close() -------------------------------------------------------------------------------- /Tensorflow/standford_example/07_run_kernels.py: -------------------------------------------------------------------------------- 1 | """ 2 | Simple examples of convolution to do some basic filters 3 | Also demonstrates the use of TensorFlow data readers. 4 | 5 | We will use some popular filters for our image. 6 | It seems to be working with grayscale images, but not with rgb images. 7 | It's probably because I didn't choose the right kernels for rgb images. 8 | 9 | kernels for rgb images have dimensions 3 x 3 x 3 x 3 10 | kernels for grayscale images have dimensions 3 x 3 x 1 x 1 11 | 12 | CS 20: "TensorFlow for Deep Learning Research" 13 | cs20.stanford.edu 14 | Chip Huyen (chiphuyen@cs.stanford.edu) 15 | Lecture 07 16 | """ 17 | import os 18 | os.environ['TF_CPP_MIN_LOG_LEVEL']='2' 19 | 20 | import sys 21 | sys.path.append('..') 22 | 23 | from matplotlib import gridspec as gridspec 24 | from matplotlib import pyplot as plt 25 | import tensorflow as tf 26 | 27 | import kernels 28 | 29 | def read_one_image(filename): 30 | ''' This method is to show how to read image from a file into a tensor. 31 | The output is a tensor object. 32 | ''' 33 | image_string = tf.read_file(filename) 34 | image_decoded = tf.image.decode_image(image_string) 35 | image = tf.cast(image_decoded, tf.float32) / 256.0 36 | return image 37 | 38 | def convolve(image, kernels, rgb=True, strides=[1, 3, 3, 1], padding='SAME'): 39 | images = [image[0]] 40 | for i, kernel in enumerate(kernels): 41 | filtered_image = tf.nn.conv2d(image, 42 | kernel, 43 | strides=strides, 44 | padding=padding)[0] 45 | if i == 2: 46 | filtered_image = tf.minimum(tf.nn.relu(filtered_image), 255) 47 | images.append(filtered_image) 48 | return images 49 | 50 | def show_images(images, rgb=True): 51 | gs = gridspec.GridSpec(1, len(images)) 52 | for i, image in enumerate(images): 53 | plt.subplot(gs[0, i]) 54 | if rgb: 55 | plt.imshow(image) 56 | else: 57 | image = image.reshape(image.shape[0], image.shape[1]) 58 | plt.imshow(image, cmap='gray') 59 | plt.axis('off') 60 | plt.show() 61 | 62 | def main(): 63 | rgb = False 64 | if rgb: 65 | kernels_list = [kernels.BLUR_FILTER_RGB, 66 | kernels.SHARPEN_FILTER_RGB, 67 | kernels.EDGE_FILTER_RGB, 68 | kernels.TOP_SOBEL_RGB, 69 | kernels.EMBOSS_FILTER_RGB] 70 | else: 71 | kernels_list = [kernels.BLUR_FILTER, 72 | kernels.SHARPEN_FILTER, 73 | kernels.EDGE_FILTER, 74 | kernels.TOP_SOBEL, 75 | kernels.EMBOSS_FILTER] 76 | 77 | kernels_list = kernels_list[1:] 78 | image = read_one_image('data/friday.jpg') 79 | if not rgb: 80 | image = tf.image.rgb_to_grayscale(image) 81 | image = tf.expand_dims(image, 0) # make it into a batch of 1 element 82 | images = convolve(image, kernels_list, rgb) 83 | with tf.Session() as sess: 84 | images = sess.run(images) # convert images from tensors to float values 85 | show_images(images, rgb) 86 | 87 | if __name__ == '__main__': 88 | main() -------------------------------------------------------------------------------- /Tensorflow/standford_example/kernels.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import tensorflow as tf 3 | 4 | a = np.zeros([3, 3, 3, 3]) 5 | a[1, 1, :, :] = 0.25 6 | a[0, 1, :, :] = 0.125 7 | a[1, 0, :, :] = 0.125 8 | a[2, 1, :, :] = 0.125 9 | a[1, 2, :, :] = 0.125 10 | a[0, 0, :, :] = 0.0625 11 | a[0, 2, :, :] = 0.0625 12 | a[2, 0, :, :] = 0.0625 13 | a[2, 2, :, :] = 0.0625 14 | 15 | BLUR_FILTER_RGB = tf.constant(a, dtype=tf.float32) 16 | 17 | a = np.zeros([3, 3, 1, 1]) 18 | # a[1, 1, :, :] = 0.25 19 | # a[0, 1, :, :] = 0.125 20 | # a[1, 0, :, :] = 0.125 21 | # a[2, 1, :, :] = 0.125 22 | # a[1, 2, :, :] = 0.125 23 | # a[0, 0, :, :] = 0.0625 24 | # a[0, 2, :, :] = 0.0625 25 | # a[2, 0, :, :] = 0.0625 26 | # a[2, 2, :, :] = 0.0625 27 | a[1, 1, :, :] = 1.0 28 | a[0, 1, :, :] = 1.0 29 | a[1, 0, :, :] = 1.0 30 | a[2, 1, :, :] = 1.0 31 | a[1, 2, :, :] = 1.0 32 | a[0, 0, :, :] = 1.0 33 | a[0, 2, :, :] = 1.0 34 | a[2, 0, :, :] = 1.0 35 | a[2, 2, :, :] = 1.0 36 | BLUR_FILTER = tf.constant(a, dtype=tf.float32) 37 | 38 | a = np.zeros([3, 3, 3, 3]) 39 | a[1, 1, :, :] = 5 40 | a[0, 1, :, :] = -1 41 | a[1, 0, :, :] = -1 42 | a[2, 1, :, :] = -1 43 | a[1, 2, :, :] = -1 44 | 45 | SHARPEN_FILTER_RGB = tf.constant(a, dtype=tf.float32) 46 | 47 | a = np.zeros([3, 3, 1, 1]) 48 | a[1, 1, :, :] = 5 49 | a[0, 1, :, :] = -1 50 | a[1, 0, :, :] = -1 51 | a[2, 1, :, :] = -1 52 | a[1, 2, :, :] = -1 53 | 54 | SHARPEN_FILTER = tf.constant(a, dtype=tf.float32) 55 | 56 | # a = np.zeros([3, 3, 3, 3]) 57 | # a[:, :, :, :] = -1 58 | # a[1, 1, :, :] = 8 59 | 60 | # EDGE_FILTER_RGB = tf.constant(a, dtype=tf.float32) 61 | 62 | EDGE_FILTER_RGB = tf.constant([ 63 | [[[ -1., 0., 0.], [ 0., -1., 0.], [ 0., 0., -1.]], 64 | [[ -1., 0., 0.], [ 0., -1., 0.], [ 0., 0., -1.]], 65 | [[ -1., 0., 0.], [ 0., -1., 0.], [ 0., 0., -1.]]], 66 | [[[ -1., 0., 0.], [ 0., -1., 0.], [ 0., 0., -1.]], 67 | [[ 8., 0., 0.], [ 0., 8., 0.], [ 0., 0., 8.]], 68 | [[ -1., 0., 0.], [ 0., -1., 0.], [ 0., 0., -1.]]], 69 | [[[ -1., 0., 0.], [ 0., -1., 0.], [ 0., 0., -1.]], 70 | [[ -1., 0., 0.], [ 0., -1., 0.], [ 0., 0., -1.]], 71 | [[ -1., 0., 0.], [ 0., -1., 0.], [ 0., 0., -1.]]] 72 | ]) 73 | 74 | a = np.zeros([3, 3, 1, 1]) 75 | # a[:, :, :, :] = -1 76 | # a[1, 1, :, :] = 8 77 | a[0, 1, :, :] = -1 78 | a[1, 0, :, :] = -1 79 | a[1, 2, :, :] = -1 80 | a[2, 1, :, :] = -1 81 | a[1, 1, :, :] = 4 82 | 83 | EDGE_FILTER = tf.constant(a, dtype=tf.float32) 84 | 85 | a = np.zeros([3, 3, 3, 3]) 86 | a[0, :, :, :] = 1 87 | a[0, 1, :, :] = 2 # originally 2 88 | a[2, :, :, :] = -1 89 | a[2, 1, :, :] = -2 90 | 91 | TOP_SOBEL_RGB = tf.constant(a, dtype=tf.float32) 92 | 93 | a = np.zeros([3, 3, 1, 1]) 94 | a[0, :, :, :] = 1 95 | a[0, 1, :, :] = 2 # originally 2 96 | a[2, :, :, :] = -1 97 | a[2, 1, :, :] = -2 98 | 99 | TOP_SOBEL = tf.constant(a, dtype=tf.float32) 100 | 101 | a = np.zeros([3, 3, 3, 3]) 102 | a[0, 0, :, :] = -2 103 | a[0, 1, :, :] = -1 104 | a[1, 0, :, :] = -1 105 | a[1, 1, :, :] = 1 106 | a[1, 2, :, :] = 1 107 | a[2, 1, :, :] = 1 108 | a[2, 2, :, :] = 2 109 | 110 | EMBOSS_FILTER_RGB = tf.constant(a, dtype=tf.float32) 111 | 112 | a = np.zeros([3, 3, 1, 1]) 113 | a[0, 0, :, :] = -2 114 | a[0, 1, :, :] = -1 115 | a[1, 0, :, :] = -1 116 | a[1, 1, :, :] = 1 117 | a[1, 2, :, :] = 1 118 | a[2, 1, :, :] = 1 119 | a[2, 2, :, :] = 2 120 | EMBOSS_FILTER = tf.constant(a, dtype=tf.float32) -------------------------------------------------------------------------------- /Tensorflow/standford_example/word2vec_utils.py: -------------------------------------------------------------------------------- 1 | xfrom collections import Counter 2 | import random 3 | import os 4 | import sys 5 | sys.path.append('..') 6 | import zipfile 7 | 8 | import numpy as np 9 | from six.moves import urllib 10 | import tensorflow as tf 11 | 12 | import utils 13 | 14 | def read_data(file_path): 15 | """ Read data into a list of tokens 16 | There should be 17,005,207 tokens 17 | """ 18 | with zipfile.ZipFile(file_path) as f: 19 | words = tf.compat.as_str(f.read(f.namelist()[0])).split() 20 | return words 21 | 22 | def build_vocab(words, vocab_size, visual_fld): 23 | """ Build vocabulary of VOCAB_SIZE most frequent words and write it to 24 | visualization/vocab.tsv 25 | """ 26 | utils.safe_mkdir(visual_fld) 27 | file = open(os.path.join(visual_fld, 'vocab.tsv'), 'w') 28 | 29 | dictionary = dict() 30 | count = [('UNK', -1)] 31 | index = 0 32 | count.extend(Counter(words).most_common(vocab_size - 1)) 33 | 34 | for word, _ in count: 35 | dictionary[word] = index 36 | index += 1 37 | file.write(word + '\n') 38 | 39 | index_dictionary = dict(zip(dictionary.values(), dictionary.keys())) 40 | file.close() 41 | return dictionary, index_dictionary 42 | 43 | def convert_words_to_index(words, dictionary): 44 | """ Replace each word in the dataset with its index in the dictionary """ 45 | return [dictionary[word] if word in dictionary else 0 for word in words] 46 | 47 | def generate_sample(index_words, context_window_size): 48 | """ Form training pairs according to the skip-gram model. """ 49 | for index, center in enumerate(index_words): 50 | context = random.randint(1, context_window_size) 51 | # get a random target before the center word 52 | for target in index_words[max(0, index - context): index]: 53 | yield center, target 54 | # get a random target after the center wrod 55 | for target in index_words[index + 1: index + context + 1]: 56 | yield center, target 57 | 58 | def most_common_words(visual_fld, num_visualize): 59 | """ create a list of num_visualize most frequent words to visualize on TensorBoard. 60 | saved to visualization/vocab_[num_visualize].tsv 61 | """ 62 | words = open(os.path.join(visual_fld, 'vocab.tsv'), 'r').readlines()[:num_visualize] 63 | words = [word for word in words] 64 | file = open(os.path.join(visual_fld, 'vocab_' + str(num_visualize) + '.tsv'), 'w') 65 | for word in words: 66 | file.write(word) 67 | file.close() 68 | 69 | def batch_gen(download_url, expected_byte, vocab_size, batch_size, 70 | skip_window, visual_fld): 71 | local_dest = 'data/text8.zip' 72 | utils.download_one_file(download_url, local_dest, expected_byte) 73 | words = read_data(local_dest) 74 | dictionary, _ = build_vocab(words, vocab_size, visual_fld) 75 | index_words = convert_words_to_index(words, dictionary) 76 | del words # to save memory 77 | single_gen = generate_sample(index_words, skip_window) 78 | 79 | while True: 80 | center_batch = np.zeros(batch_size, dtype=np.int32) 81 | target_batch = np.zeros([batch_size, 1]) 82 | for index in range(batch_size): 83 | center_batch[index], target_batch[index] = next(single_gen) 84 | yield center_batch, target_batch -------------------------------------------------------------------------------- /Text_Classification/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rainmaker712/nlp_ryan/108ce890659ed29d4a143e41e5546f613aa878ca/Text_Classification/.DS_Store -------------------------------------------------------------------------------- /Text_Classification/.ipynb_checkpoints/cnn_textclassification_keras-checkpoint.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# 6.3 \n", 8 | "\n", 9 | "https://github.com/jarfo/kchar\n", 10 | "https://github.com/carpedm20/lstm-char-cnn-tensorflow\n", 11 | "https://github.com/fchollet/keras/blob/master/examples/imdb_cnn.py" 12 | ] 13 | }, 14 | { 15 | "cell_type": "code", 16 | "execution_count": 1, 17 | "metadata": { 18 | "collapsed": false 19 | }, 20 | "outputs": [ 21 | { 22 | "name": "stderr", 23 | "output_type": "stream", 24 | "text": [ 25 | "Using TensorFlow backend.\n" 26 | ] 27 | } 28 | ], 29 | "source": [ 30 | "from keras.preprocessing import sequence\n", 31 | "from keras.models import Sequential\n", 32 | "from keras.layers import Dense, Dropout, Activation\n", 33 | "from keras.layers import Embedding\n", 34 | "from keras.layers import Conv1D, GlobalMaxPooling1D\n", 35 | "from keras.datasets import imdb" 36 | ] 37 | }, 38 | { 39 | "cell_type": "code", 40 | "execution_count": 2, 41 | "metadata": { 42 | "collapsed": true 43 | }, 44 | "outputs": [], 45 | "source": [ 46 | "# set parameters:\n", 47 | "max_features = 5000\n", 48 | "maxlen = 400\n", 49 | "batch_size = 32\n", 50 | "embedding_dims = 50\n", 51 | "filters = 250\n", 52 | "kernel_size = 3\n", 53 | "hidden_dims = 250\n", 54 | "epochs = 2" 55 | ] 56 | }, 57 | { 58 | "cell_type": "code", 59 | "execution_count": 3, 60 | "metadata": { 61 | "collapsed": false 62 | }, 63 | "outputs": [ 64 | { 65 | "name": "stdout", 66 | "output_type": "stream", 67 | "text": [ 68 | "Loading data...\n", 69 | "25000 train sequences\n", 70 | "25000 test sequences\n" 71 | ] 72 | } 73 | ], 74 | "source": [ 75 | "print('Loading data...')\n", 76 | "(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=max_features)\n", 77 | "print(len(x_train), 'train sequences')\n", 78 | "print(len(x_test), 'test sequences')" 79 | ] 80 | }, 81 | { 82 | "cell_type": "code", 83 | "execution_count": 4, 84 | "metadata": { 85 | "collapsed": false 86 | }, 87 | "outputs": [ 88 | { 89 | "name": "stdout", 90 | "output_type": "stream", 91 | "text": [ 92 | "Pad sequences (samples x time)\n", 93 | "x_train shape: (25000, 400)\n", 94 | "x_test shape: (25000, 400)\n", 95 | "Build model...\n" 96 | ] 97 | } 98 | ], 99 | "source": [ 100 | "print('Pad sequences (samples x time)')\n", 101 | "x_train = sequence.pad_sequences(x_train, maxlen=maxlen)\n", 102 | "x_test = sequence.pad_sequences(x_test, maxlen=maxlen)\n", 103 | "print('x_train shape:', x_train.shape)\n", 104 | "print('x_test shape:', x_test.shape)\n", 105 | "print('Build model...')\n" 106 | ] 107 | }, 108 | { 109 | "cell_type": "code", 110 | "execution_count": 8, 111 | "metadata": { 112 | "collapsed": false 113 | }, 114 | "outputs": [], 115 | "source": [ 116 | "model = Sequential()\n", 117 | "\n", 118 | "# we start off with an efficient embedding layer which maps\n", 119 | "# our vocab indices into embedding_dims dimensions\n", 120 | "model.add(Embedding(max_features,\n", 121 | " embedding_dims,\n", 122 | " input_length=maxlen))\n", 123 | "model.add(Dropout(0.2))" 124 | ] 125 | }, 126 | { 127 | "cell_type": "code", 128 | "execution_count": null, 129 | "metadata": { 130 | "collapsed": true 131 | }, 132 | "outputs": [], 133 | "source": [ 134 | "# we add a Convolution1D, which will learn filters\n", 135 | "# word group filters of size filter_length:\n", 136 | "model.add(Conv1D(filters,\n", 137 | " kernel_size,\n", 138 | " padding='valid',\n", 139 | " activation='relu',\n", 140 | " strides=1))\n", 141 | "# we use max pooling:\n", 142 | "model.add(GlobalMaxPooling1D())\n", 143 | "\n", 144 | "# We add a vanilla hidden layer:\n", 145 | "model.add(Dense(hidden_dims))\n", 146 | "model.add(Dropout(0.2))\n", 147 | "model.add(Activation('relu'))\n", 148 | "\n", 149 | "# We project onto a single unit output layer, and squash it with a sigmoid:\n", 150 | "model.add(Dense(1))\n", 151 | "model.add(Activation('sigmoid'))\n", 152 | "\n", 153 | "model.compile(loss='binary_crossentropy',\n", 154 | " optimizer='adam',\n", 155 | " metrics=['accuracy'])\n", 156 | "model.fit(x_train, y_train,\n", 157 | " batch_size=batch_size,\n", 158 | " epochs=epochs,\n", 159 | " validation_data=(x_test, y_test))" 160 | ] 161 | } 162 | ], 163 | "metadata": { 164 | "anaconda-cloud": {}, 165 | "kernelspec": { 166 | "display_name": "Python [conda root]", 167 | "language": "python", 168 | "name": "conda-root-py" 169 | }, 170 | "language_info": { 171 | "codemirror_mode": { 172 | "name": "ipython", 173 | "version": 3 174 | }, 175 | "file_extension": ".py", 176 | "mimetype": "text/x-python", 177 | "name": "python", 178 | "nbconvert_exporter": "python", 179 | "pygments_lexer": "ipython3", 180 | "version": "3.5.2" 181 | } 182 | }, 183 | "nbformat": 4, 184 | "nbformat_minor": 1 185 | } 186 | -------------------------------------------------------------------------------- /Text_Classification/.ipynb_checkpoints/seq2seq_keras-checkpoint.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [], 3 | "metadata": {}, 4 | "nbformat": 4, 5 | "nbformat_minor": 1 6 | } 7 | -------------------------------------------------------------------------------- /Text_Classification/To_Do/cnn_keras_text_class_kor.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on Sat May 6 15:08:54 2017 5 | 6 | #Data 7 | 영화 한글 데이터: https://github.com/e9t/nsmc 8 | 9 | @author: ryan 10 | 11 | On Progress 12 | """ -------------------------------------------------------------------------------- /Text_Classification/To_Do/hierachical_attention_keras_text_class_eng.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on Sat May 6 15:08:54 2017 5 | 6 | #Data 7 | 8 | @author: ryan 9 | 10 | On Progress 11 | """ 12 | -------------------------------------------------------------------------------- /Text_Classification/To_Do/lstm_keras_text_class_eng.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on Sat May 6 15:08:54 2017 5 | 6 | #Data 7 | 8 | @author: ryan 9 | 10 | On Progress 11 | """ 12 | -------------------------------------------------------------------------------- /Text_Classification/To_Do/rnn_attention_keras_text_class_eng .py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on Sat May 6 15:08:54 2017 5 | 6 | #Data 7 | 8 | @author: ryan 9 | 10 | On Progress 11 | """ 12 | -------------------------------------------------------------------------------- /Text_Classification/cnn_keras_text_class_imdb.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on Sat May 6 22:55:55 2017 5 | 6 | @author: ryan 7 | """ 8 | 9 | import numpy as np 10 | import pandas as pd 11 | import pickle 12 | from collections import defaultdict 13 | import re 14 | 15 | from bs4 import BeautifulSoup 16 | 17 | import sys 18 | import os 19 | 20 | from keras.preprocessing import sequence 21 | from keras.models import Sequential 22 | from keras.layers import Dense, Dropout, Activation 23 | from keras.layers import Embedding 24 | from keras.layers import Conv1D, GlobalMaxPooling1D 25 | from keras.datasets import imdb 26 | 27 | 28 | # set parameters: 29 | max_features = 5000 30 | maxlen = 400 31 | batch_size = 32 32 | embedding_dims = 50 33 | filters = 250 34 | kernel_size = 3 35 | hidden_dims = 250 36 | epochs = 2 37 | 38 | print('Loading data...') 39 | (x_train, y_train), (x_val, y_val) = imdb.load_data(num_words=max_features) 40 | print(len(x_train), 'train sequences') 41 | print(len(x_val), 'test sequences') 42 | 43 | print('Pad sequences (samples x time)') 44 | x_train = sequence.pad_sequences(x_train, maxlen=maxlen) 45 | x_test = sequence.pad_sequences(x_test, maxlen=maxlen) 46 | print('x_train shape:', x_train.shape) 47 | print('x_test shape:', x_test.shape) 48 | 49 | print('Build model...') 50 | model = Sequential() 51 | 52 | # we start off with an efficient embedding layer which maps 53 | # our vocab indices into embedding_dims dimensions 54 | model.add(Embedding(max_features, 55 | embedding_dims, 56 | input_length=maxlen)) 57 | model.add(Dropout(0.2)) 58 | 59 | # we add a Convolution1D, which will learn filters 60 | # word group filters of size filter_length: 61 | model.add(Conv1D(filters, 62 | kernel_size, 63 | padding='valid', 64 | activation='relu', 65 | strides=1)) 66 | # we use max pooling: 67 | model.add(GlobalMaxPooling1D()) 68 | 69 | # We add a vanilla hidden layer: 70 | model.add(Dense(hidden_dims)) 71 | model.add(Dropout(0.2)) 72 | model.add(Activation('relu')) 73 | 74 | # We project onto a single unit output layer, and squash it with a sigmoid: 75 | model.add(Dense(1)) 76 | model.add(Activation('sigmoid')) 77 | 78 | model.compile(loss='binary_crossentropy', 79 | optimizer='adam', 80 | metrics=['accuracy']) 81 | history = model.fit(x_train, y_train, 82 | batch_size=batch_size, 83 | epochs=epochs, 84 | validation_data=(x_test, y_test)) 85 | 86 | # list all data in history 87 | print(history.history.keys()) 88 | 89 | # summarize history for accuracy 90 | import matplotlib.pyplot as plt 91 | 92 | plt.plot(history.history['acc']) 93 | plt.plot(history.history['val_acc']) 94 | plt.title('model accuracy') 95 | plt.ylabel('accuracy') 96 | plt.xlabel('epoch') 97 | plt.legend(['train', 'test'], loc='upper left') 98 | plt.show() 99 | # summarize history for loss 100 | plt.plot(history.history['loss']) 101 | plt.plot(history.history['val_loss']) 102 | plt.title('model loss') 103 | plt.ylabel('loss') 104 | plt.xlabel('epoch') 105 | plt.legend(['train', 'test'], loc='upper left') 106 | plt.show() -------------------------------------------------------------------------------- /Text_Classification/cnn_keras_text_class_imdb2.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on Sat Jun 3 18:51:43 2017 5 | 6 | @author: ryan 7 | """ 8 | 9 | '''This example demonstrates the use of Convolution1D for text classification. 10 | Gets to 0.89 test accuracy after 2 epochs. 11 | 90s/epoch on Intel i5 2.4Ghz CPU. 12 | 10s/epoch on Tesla K40 GPU. 13 | ''' 14 | 15 | from __future__ import print_function 16 | 17 | from keras.preprocessing import sequence 18 | from keras.models import Sequential 19 | from keras.layers import Dense, Dropout, Activation 20 | from keras.layers import Embedding 21 | from keras.layers import Conv1D, GlobalMaxPooling1D 22 | from keras.datasets import imdb 23 | 24 | # set parameters: 25 | max_features = 5000 26 | maxlen = 400 27 | batch_size = 32 28 | embedding_dims = 50 29 | filters = 250 30 | kernel_size = 3 31 | hidden_dims = 250 32 | epochs = 2 33 | 34 | print('Loading data...') 35 | (x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=max_features) 36 | print(len(x_train), 'train sequences') 37 | print(len(x_test), 'test sequences') 38 | 39 | print('Pad sequences (samples x time)') 40 | x_train = sequence.pad_sequences(x_train, maxlen=maxlen) 41 | x_test = sequence.pad_sequences(x_test, maxlen=maxlen) 42 | print('x_train shape:', x_train.shape) 43 | print('x_test shape:', x_test.shape) 44 | 45 | print('Build model...') 46 | model = Sequential() 47 | 48 | # we start off with an efficient embedding layer which maps 49 | # our vocab indices into embedding_dims dimensions 50 | model.add(Embedding(max_features, 51 | embedding_dims, 52 | input_length=maxlen)) 53 | model.add(Dropout(0.2)) 54 | 55 | # we add a Convolution1D, which will learn filters 56 | # word group filters of size filter_length: 57 | model.add(Conv1D(filters, 58 | kernel_size, 59 | padding='valid', 60 | activation='relu', 61 | strides=1)) 62 | # we use max pooling: 63 | model.add(GlobalMaxPooling1D()) 64 | 65 | # We add a vanilla hidden layer: 66 | model.add(Dense(hidden_dims)) 67 | model.add(Dropout(0.2)) 68 | model.add(Activation('relu')) 69 | 70 | # We project onto a single unit output layer, and squash it with a sigmoid: 71 | model.add(Dense(1)) 72 | model.add(Activation('sigmoid')) 73 | 74 | model.compile(loss='binary_crossentropy', 75 | optimizer='adam', 76 | metrics=['accuracy']) 77 | model.fit(x_train, y_train, 78 | batch_size=batch_size, 79 | epochs=epochs, 80 | validation_data=(x_test, y_test)) -------------------------------------------------------------------------------- /Text_Classification/nets/__pycache__/text_cnn.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rainmaker712/nlp_ryan/108ce890659ed29d4a143e41e5546f613aa878ca/Text_Classification/nets/__pycache__/text_cnn.cpython-35.pyc -------------------------------------------------------------------------------- /Text_Classification/nets/text_cnn.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import tensorflow as tf 3 | from collections import OrderedDict 4 | 5 | def textcnn(input_placeholder, target_placeholder, vocab_size, embedding_dim, filter_sizes, num_filters, is_training=True, keep_prob=0.8, scope='TextCNN'): 6 | 7 | # Get 'sequence_length' and 'num_classes' 8 | sequence_length = input_placeholder.get_shape()[1] 9 | num_classes = target_placeholder.get_shape()[1] 10 | 11 | # Declare 'end_points' which is an ordered dictionary 12 | end_points = OrderedDict() 13 | 14 | # tf.random_uniform_initializer의 형태를 간소화 15 | random_uniform = lambda minval, maxval: tf.random_uniform_initializer(minval=minval, maxval=maxval) 16 | 17 | # tf.truncated_normal_initializer의 형태를 간소화 18 | trunc_normal = lambda stddev: tf.truncated_normal_initializer(mean=0.0, stddev=stddev) 19 | 20 | # tf.contrib.layers.xavier_initializer의 형태를 간소화 21 | xavier = tf.contrib.layers.xavier_initializer() 22 | 23 | # tf.contrib.layers.xavier_initializer_conv2d의 형태를 간소화 24 | xavier_conv = tf.contrib.layers.xavier_initializer_conv2d() 25 | 26 | # tf.constant_initializer의 형태를 간소화 27 | constant = lambda value: tf.constant_initializer(value=value) 28 | 29 | with tf.variable_scope(scope): 30 | 31 | end_point = 'Embedding' 32 | with tf.variable_scope(end_point): 33 | w_embedding = tf.get_variable(name='w_embedding', shape=[vocab_size, embedding_dim], 34 | initializer=random_uniform(-1.0, 1.0)) 35 | embedded_chars = tf.nn.embedding_lookup(params=w_embedding, ids=input_placeholder, name='embedded_chars') 36 | embedded_chars_expanded = tf.expand_dims(input=embedded_chars, axis=-1, name='embedded_chars_expanded') 37 | end_points[end_point] = w_embedding 38 | 39 | pooled_output = [] 40 | for i, filter_size in enumerate(filter_sizes): 41 | end_point = 'Conv-maxpool-%d' % filter_size 42 | with tf.variable_scope(end_point): 43 | filter_shape = [filter_size, embedding_dim, 1, num_filters] 44 | bias_shape = [num_filters] 45 | w_conv = tf.get_variable(name='w_conv', shape=filter_shape, initializer=trunc_normal(0.01)) 46 | b_conv = tf.get_variable(name='b_conv', shape=bias_shape, initializer=constant(0.0)) 47 | conv = tf.nn.conv2d(input=embedded_chars_expanded, filter=w_conv, strides=[1, 1, 1, 1], padding='VALID', name='conv') 48 | activated = tf.nn.relu(features=tf.nn.bias_add(conv, b_conv), name='relu') 49 | pooled = tf.nn.max_pool(value=activated, ksize=[1, sequence_length - filter_size + 1, 1, 1], strides=[1, 1, 1, 1], padding='VALID', name='maxpool') 50 | pooled_output.append(pooled) 51 | end_points[end_point] = pooled 52 | 53 | end_point = 'Flatten' 54 | with tf.variable_scope(end_point): 55 | num_filters_total = num_filters * len(filter_sizes) 56 | h_pool = tf.concat(values=pooled_output, axis=3, name='concat') 57 | h_pool_flat = tf.reshape(tensor=h_pool, shape=[-1, num_filters_total], name='flatten') 58 | end_points[end_point] = h_pool_flat 59 | 60 | end_point = 'Fully-connected' 61 | with tf.variable_scope(end_point): 62 | dropout = tf.contrib.slim.dropout(h_pool_flat, keep_prob=keep_prob, is_training=is_training, scope='dropout') 63 | w_fc = tf.get_variable(name='w_fc', shape=[num_filters_total, num_classes], initializer=xavier) 64 | b_fc = tf.get_variable(name='b_fc', shape=[num_classes], initializer=constant(0.0)) 65 | logits = tf.nn.xw_plus_b(x=dropout, weights=w_fc, biases=b_fc, name='logits') 66 | end_points[end_point] = logits 67 | 68 | return logits, end_points -------------------------------------------------------------------------------- /Text_Classification/seq2seq_keras.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": { 7 | "collapsed": true 8 | }, 9 | "outputs": [], 10 | "source": [] 11 | } 12 | ], 13 | "metadata": { 14 | "anaconda-cloud": {}, 15 | "kernelspec": { 16 | "display_name": "Python [conda root]", 17 | "language": "python", 18 | "name": "conda-root-py" 19 | }, 20 | "language_info": { 21 | "codemirror_mode": { 22 | "name": "ipython", 23 | "version": 3 24 | }, 25 | "file_extension": ".py", 26 | "mimetype": "text/x-python", 27 | "name": "python", 28 | "nbconvert_exporter": "python", 29 | "pygments_lexer": "ipython3", 30 | "version": "3.5.2" 31 | } 32 | }, 33 | "nbformat": 4, 34 | "nbformat_minor": 1 35 | } 36 | -------------------------------------------------------------------------------- /VAE/vae_sample.py: -------------------------------------------------------------------------------- 1 | """ 2 | Arxiv Insights: https://www.youtube.com/watch?v=9zKuYvjFFS8&t=609s 3 | https://github.com/hwalsuklee/tensorflow-mnist-CVAE 4 | """ 5 | 6 | import tensorflow as tf 7 | 8 | # Gaussian MLP as conditional encoder 9 | def gaussian_MLP_conditional_encoder(x, y, n_hidden, n_output, keep_prob): 10 | with tf.variable_scope("gaussian_MLP_encoder"): 11 | # concatenate condition and image 12 | dim_y = int(y.get_shape()[1]) 13 | input = tf.concat(axis=1, values=[x, y]) 14 | 15 | # initializers 16 | w_init = tf.contrib.layers.variance_scaling_initializer() 17 | b_init = tf.constant_initializer(0.) 18 | 19 | # 1st hidden layer 20 | w0 = tf.get_variable('w0', [input.get_shape()[1], n_hidden+dim_y], initializer=w_init) 21 | b0 = tf.get_variable('b0', [n_hidden+dim_y], initializer=b_init) 22 | h0 = tf.matmul(input, w0) + b0 23 | h0 = tf.nn.elu(h0) 24 | h0 = tf.nn.dropout(h0, keep_prob) 25 | 26 | # 2nd hidden layer 27 | w1 = tf.get_variable('w1', [h0.get_shape()[1], n_hidden], initializer=w_init) 28 | b1 = tf.get_variable('b1', [n_hidden], initializer=b_init) 29 | h1 = tf.matmul(h0, w1) + b1 30 | h1 = tf.nn.tanh(h1) 31 | h1 = tf.nn.dropout(h1, keep_prob) 32 | 33 | # output layer 34 | # borrowed from https: // github.com / altosaar / vae / blob / master / vae.py 35 | wo = tf.get_variable('wo', [h1.get_shape()[1], n_output * 2], initializer=w_init) 36 | bo = tf.get_variable('bo', [n_output * 2], initializer=b_init) 37 | 38 | gaussian_params = tf.matmul(h1, wo) + bo 39 | 40 | # The mean parameter is unconstrained 41 | mean = gaussian_params[:, :n_output] 42 | # The standard deviation must be positive. Parametrize with a softplus and 43 | # add a small epsilon for numerical stability 44 | stddev = 1e-6 + tf.nn.softplus(gaussian_params[:, n_output:]) 45 | 46 | return mean, stddev 47 | 48 | # Bernoulli MLP as conditional decoder 49 | def bernoulli_MLP_conditional_decoder(z, y, n_hidden, n_output, keep_prob, reuse=False): 50 | 51 | with tf.variable_scope("bernoulli_MLP_decoder", reuse=reuse): 52 | # concatenate condition and latent vectors 53 | input = tf.concat(axis=1, values=[z, y]) 54 | 55 | # initializers 56 | w_init = tf.contrib.layers.variance_scaling_initializer() 57 | b_init = tf.constant_initializer(0.) 58 | 59 | # 1st hidden layer 60 | w0 = tf.get_variable('w0', [input.get_shape()[1], n_hidden], initializer=w_init) 61 | b0 = tf.get_variable('b0', [n_hidden], initializer=b_init) 62 | h0 = tf.matmul(input, w0) + b0 63 | h0 = tf.nn.tanh(h0) 64 | h0 = tf.nn.dropout(h0, keep_prob) 65 | 66 | # 2nd hidden layer 67 | w1 = tf.get_variable('w1', [h0.get_shape()[1], n_hidden], initializer=w_init) 68 | b1 = tf.get_variable('b1', [n_hidden], initializer=b_init) 69 | h1 = tf.matmul(h0, w1) + b1 70 | h1 = tf.nn.elu(h1) 71 | h1 = tf.nn.dropout(h1, keep_prob) 72 | 73 | # output layer-mean 74 | wo = tf.get_variable('wo', [h1.get_shape()[1], n_output], initializer=w_init) 75 | bo = tf.get_variable('bo', [n_output], initializer=b_init) 76 | y = tf.sigmoid(tf.matmul(h1, wo) + bo) 77 | 78 | return y 79 | 80 | # Gateway 81 | def autoencoder(x_hat, x, y, dim_img, dim_z, n_hidden, keep_prob): 82 | 83 | # encoding 84 | mu, sigma = gaussian_MLP_conditional_encoder(x_hat, y, n_hidden, dim_z, keep_prob) 85 | 86 | # sampling by re-parameterization technique 87 | z = mu + sigma * tf.random_normal(tf.shape(mu), 0, 1, dtype=tf.float32) #Latent vector 88 | 89 | # decoding 90 | x_ = bernoulli_MLP_conditional_decoder(z, y, n_hidden, dim_img, keep_prob) 91 | x_ = tf.clip_by_value(x_, 1e-8, 1 - 1e-8) 92 | 93 | # ELBO 94 | marginal_likelihood = tf.reduce_sum(x * tf.log(x_) + (1 - x) * tf.log(1 - x_), 1) 95 | KL_divergence = 0.5 * tf.reduce_sum(tf.square(mu) + tf.square(sigma) - tf.log(1e-8 + tf.square(sigma)) - 1, 1) 96 | 97 | marginal_likelihood = tf.reduce_mean(marginal_likelihood) 98 | KL_divergence = tf.reduce_mean(KL_divergence) 99 | 100 | ELBO = marginal_likelihood - KL_divergence 101 | 102 | # minimize loss instead of maximizing ELBO 103 | loss = -ELBO 104 | 105 | return x_, z, loss, -marginal_likelihood, KL_divergence 106 | 107 | # Conditional Decoder (Generator) 108 | def decoder(z, y, dim_img, n_hidden): 109 | 110 | x_ = bernoulli_MLP_conditional_decoder(z, y, n_hidden, dim_img, 1.0, reuse=True) 111 | 112 | return x_ -------------------------------------------------------------------------------- /dataset/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rainmaker712/nlp_ryan/108ce890659ed29d4a143e41e5546f613aa878ca/dataset/.DS_Store -------------------------------------------------------------------------------- /pytorch_basic/.ipynb_checkpoints/10.pytorch_rnn-checkpoint.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [], 3 | "metadata": {}, 4 | "nbformat": 4, 5 | "nbformat_minor": 1 6 | } 7 | -------------------------------------------------------------------------------- /pytorch_basic/.ipynb_checkpoints/sec 6. Linear regression wih Python-checkpoint.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 2, 6 | "metadata": { 7 | "collapsed": false 8 | }, 9 | "outputs": [ 10 | { 11 | "ename": "NameError", 12 | "evalue": "name 'np' is not defined", 13 | "output_type": "error", 14 | "traceback": [ 15 | "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", 16 | "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", 17 | "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrandom\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mseed\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 2\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3\u001b[0m \u001b[0mx\u001b[0m\u001b[0;34m=\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrandom\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrand\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mn\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 4\u001b[0m \u001b[0my\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mx\u001b[0m \u001b[0;34m**\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrandom\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrand\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mn\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 5\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", 18 | "\u001b[0;31mNameError\u001b[0m: name 'np' is not defined" 19 | ] 20 | } 21 | ], 22 | "source": [ 23 | "np.random.seed(1)\n", 24 | "\n", 25 | "x= np.random.rand(n)\n", 26 | "y = x ** np.random.rand(n)\n", 27 | "\n", 28 | "colors = np.random.rand()\n", 29 | "plt.plot(np.unique(x), np.poly1d(np.ployfit(x,y,1))(np.unique(x)))\n", 30 | "\n", 31 | "plt.scatter(x,y, colors, alpha=0.5)\n", 32 | "plt.show()" 33 | ] 34 | } 35 | ], 36 | "metadata": { 37 | "anaconda-cloud": {}, 38 | "kernelspec": { 39 | "display_name": "Python [conda root]", 40 | "language": "python", 41 | "name": "conda-root-py" 42 | }, 43 | "language_info": { 44 | "codemirror_mode": { 45 | "name": "ipython", 46 | "version": 3 47 | }, 48 | "file_extension": ".py", 49 | "mimetype": "text/x-python", 50 | "name": "python", 51 | "nbconvert_exporter": "python", 52 | "pygments_lexer": "ipython3", 53 | "version": "3.5.2" 54 | } 55 | }, 56 | "nbformat": 4, 57 | "nbformat_minor": 1 58 | } 59 | -------------------------------------------------------------------------------- /pytorch_basic/10.pytorch_rnn.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": { 7 | "collapsed": true 8 | }, 9 | "outputs": [], 10 | "source": [] 11 | } 12 | ], 13 | "metadata": { 14 | "anaconda-cloud": {}, 15 | "kernelspec": { 16 | "display_name": "Python [conda root]", 17 | "language": "python", 18 | "name": "conda-root-py" 19 | }, 20 | "language_info": { 21 | "codemirror_mode": { 22 | "name": "ipython", 23 | "version": 3 24 | }, 25 | "file_extension": ".py", 26 | "mimetype": "text/x-python", 27 | "name": "python", 28 | "nbconvert_exporter": "python", 29 | "pygments_lexer": "ipython3", 30 | "version": "3.5.2" 31 | } 32 | }, 33 | "nbformat": 4, 34 | "nbformat_minor": 1 35 | } 36 | -------------------------------------------------------------------------------- /pytorch_basic/Start_Pytorch.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 3, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "from torch.autograd import Variable\n", 10 | "import torch" 11 | ] 12 | }, 13 | { 14 | "cell_type": "code", 15 | "execution_count": 4, 16 | "metadata": {}, 17 | "outputs": [], 18 | "source": [ 19 | "torch.add?" 20 | ] 21 | }, 22 | { 23 | "cell_type": "code", 24 | "execution_count": null, 25 | "metadata": {}, 26 | "outputs": [], 27 | "source": [] 28 | } 29 | ], 30 | "metadata": { 31 | "kernelspec": { 32 | "display_name": "Python 3", 33 | "language": "python", 34 | "name": "python3" 35 | }, 36 | "language_info": { 37 | "codemirror_mode": { 38 | "name": "ipython", 39 | "version": 3 40 | }, 41 | "file_extension": ".py", 42 | "mimetype": "text/x-python", 43 | "name": "python", 44 | "nbconvert_exporter": "python", 45 | "pygments_lexer": "ipython3", 46 | "version": "3.6.3" 47 | } 48 | }, 49 | "nbformat": 4, 50 | "nbformat_minor": 2 51 | } 52 | -------------------------------------------------------------------------------- /pytorch_basic/cnn_cifar10_pytorch.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on Sat Jun 10 15:48:47 2017 5 | 6 | @author: ryan 7 | """ 8 | 9 | #-----------CNN------------------# 10 | import torch 11 | import torchvision 12 | import torchvision.transforms as transforms 13 | 14 | transform = transforms.Compose( 15 | [transforms.ToTensor(), 16 | transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))]) 17 | 18 | trainset = torchvision.datasets.CIFAR10(root='./data', train=True, 19 | download=True, transform=transform) 20 | trainloader = torch.utils.data.DataLoader(trainset, batch_size=4, 21 | shuffle=True, num_workers=2) 22 | 23 | testset = torchvision.datasets.CIFAR10(root='./data', train=False, 24 | download=True, transform=transform) 25 | testloader = torch.utils.data.DataLoader(testset, batch_size=4, 26 | shuffle=False, num_workers=2) 27 | 28 | classes = ('plane', 'car', 'bird', 'cat', 29 | 'deer', 'dog', 'frog', 'horse', 'ship', 'truck') 30 | 31 | import matplotlib.pyplot as plt 32 | import numpy as np 33 | 34 | # functions to show an image 35 | 36 | def imshow(img): 37 | img = img / 2 + 0.5 # unnormalize 38 | npimg = img.numpy() 39 | plt.imshow(np.transpose(npimg, (1, 2, 0))) 40 | 41 | 42 | # get some random training images 43 | dataiter = iter(trainloader) 44 | images, labels = dataiter.next() 45 | 46 | # show images 47 | imshow(torchvision.utils.make_grid(images)) 48 | # print labels 49 | print(' '.join('%5s' % classes[labels[j]] for j in range(4))) 50 | 51 | #1. Loading and normalizing cifar10 52 | 53 | #2. Define a Convolution Neural Network 54 | from torch.autograd import Variable 55 | import torch.nn as nn 56 | import torch.nn.functional as F 57 | 58 | class Net(nn.Module): 59 | def __init__(self): 60 | super(Net, self).__init__() 61 | self.conv1 = nn.Conv2d(3, 6, 5) 62 | self.pool = nn.MaxPool2d(2,2) 63 | self.conv2 = nn.Conv2d(6, 16, 5) 64 | self.fc1 = nn.Linear(16 * 5 * 5, 120) 65 | self.fc2 = nn.Linear(120, 84) 66 | self.fc3 = nn.Linear(84, 10) 67 | 68 | def forward(self, x): 69 | x = self.pool(F.relu(self.conv1(x))) 70 | x = self.pool(F.relu(self.conv2(x))) 71 | x = x.view(-1, 16 * 5 * 5) 72 | x = F.relu(self.fc1(x)) 73 | x = F.relu(self.fc2(x)) 74 | x = self.fc3(x) 75 | return x 76 | 77 | net = Net() 78 | 79 | #3. Define a Loss function and optimizer 80 | 81 | #Cross-Entropy and SGD with momentum 82 | import torch.optim as optim 83 | 84 | criterion = nn.CrossEntropyLoss() 85 | optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9) 86 | 87 | #4. train the network 88 | net.cuda() 89 | 90 | for epoch in range(1000): # loop over the dataset multiple times 91 | 92 | running_loss = 0.0 93 | for i, data in enumerate(trainloader, 0): 94 | # get the inputs 95 | inputs, labels = data 96 | 97 | # wrap them in Variable 98 | #inputs, labels = Variable(inputs), Variable(labels) #CPU Ver 99 | inputs, labels = Variable(inputs.cuda()), Variable(labels.cuda()) #GPU Ver 100 | 101 | # zero the parameter gradients 102 | optimizer.zero_grad() 103 | 104 | # forward + backward + optimize 105 | outputs = net(inputs) 106 | loss = criterion(outputs, labels) 107 | loss.backward() 108 | optimizer.step() 109 | 110 | # print statistics 111 | running_loss += loss.data[0] 112 | if i % 2000 == 1999: # print every 2000 mini-batches 113 | print('[%d, %5d] loss: %.3f' % 114 | (epoch + 1, i + 1, running_loss / 2000)) 115 | running_loss = 0.0 116 | 117 | print('Finished Training') 118 | 119 | #5. Test the network on the test data 120 | dataiter = iter(testloader) 121 | images, labels = dataiter.next() 122 | #print image 123 | imshow(torchvision.utils.make_grid(images)) 124 | print('GroundTruth: ', ' '.join('%5s' % classes[labels[j]] for j in range(4))) 125 | 126 | outputs = net(Variable(images)) 127 | 128 | _, predicted = torch.max(outputs.data, 1) 129 | print('Predicted: ', ' '.join('%5s' % classes[predicted[j][0]] 130 | for j in range(4))) 131 | 132 | #Performance Test 133 | correct = 0 134 | total = 0 135 | for data in testloader: 136 | images, labels = data 137 | outputs = net(Variable(images)) 138 | _, predicted = torch.max(outputs.data, 1) 139 | total += labels.size(0) 140 | correct += (predicted == labels).sum() 141 | 142 | print('Accuracy of the network on the 10000 test images: %d %%' % ( 143 | 100 * correct / total)) 144 | 145 | #정확히 맞추는 것과 못 맞추는 것 구분 146 | class_correct = list(0. for i in range(10)) 147 | class_total = list(0. for i in range(10)) 148 | for data in testloader: 149 | images, labels = data 150 | outputs = net(Variable(images)) 151 | _, predicted = torch.max(outputs.data, 1) 152 | c = (predicted == labels).squeeze() 153 | for i in range(4): 154 | label = labels[i] 155 | class_correct[label] += c[i] 156 | class_total[label] += 1 157 | 158 | for i in range(10): 159 | print('Accuracy of %5s : %2d %%' % ( 160 | classes[i], 100 * class_correct[i] / class_total[i])) 161 | 162 | 163 | 164 | 165 | -------------------------------------------------------------------------------- /pytorch_basic/pytorch_basic.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on Tue Jun 6 16:24:52 2017 5 | 6 | @author: ryan 7 | """ 8 | 9 | """Pytorch Intro""" 10 | 11 | import torch 12 | 13 | import os 14 | os.environ["CUDA_VISIBLE_DEVICES"] = "0, 1" 15 | 16 | #GPU 17 | dtype = torch.cuda.FloatTensor 18 | 19 | ##Tensors 20 | x = torch.Tensor(5,3).type(dtype) 21 | x = torch.rand(5,3).type(dtype) 22 | x.size() 23 | 24 | ##Operations 25 | y = torch.rand(5,3).type(dtype) 26 | print(x+y) 27 | 28 | #print(torch.add(x,y)) 29 | result = torch.Tensor(5,3).type(dtype) 30 | torch.add(x, y, out=result) 31 | print(result) 32 | 33 | #Indexing 34 | print(x[:, 1]) 35 | 36 | ##Numpy Bridge 37 | 38 | #Convert torch Tensor to numpy Array 39 | a = torch.ones(5) 40 | print(a) 41 | 42 | b = a.numpy() 43 | print(b) 44 | 45 | #Convert numpy array to torch 46 | import numpy as np 47 | a = np.ones(5) 48 | b = torch.from_numpy(a) 49 | np.add(a, 1, out=a) 50 | print(a) 51 | print(b) 52 | 53 | #Cuda Tensors 54 | if torch.cuda.is_available(): 55 | x = x.cuda() 56 | y = y.cuda() 57 | x + y 58 | 59 | 60 | """ Autograd: Automatic differentiation """ 61 | 62 | ##Variable 63 | # If Variable is not a scala, you need to specify arg. for backward() 64 | 65 | import torch 66 | from torch.autograd import Variable 67 | 68 | x = Variable(torch.ones(2,2), requires_grad=True).type(dtype) 69 | y = x + 2 70 | print(y) 71 | 72 | z = y * y * 3 73 | out = z.mean() 74 | print(z, out) 75 | 76 | 77 | ##Gradients 78 | out.backward() 79 | 80 | print(x.grad) 81 | 82 | import time 83 | from datetime import timedelta 84 | 85 | start_time = time.monotonic() 86 | x = torch.randn(3) 87 | x = Variable(x, requires_grad=True) 88 | y = x*2 89 | while y.data.norm() < 1000000: 90 | y = y * 2 91 | end_time = time.monotonic() 92 | 93 | print(timedelta(seconds=end_time - start_time)) 94 | 95 | gradients = torch.FloatTensor([0.1, 1.0, 0.0001]) 96 | y.backward(gradients) 97 | 98 | print(x.grad) 99 | 100 | 101 | 102 | -------------------------------------------------------------------------------- /pytorch_basic/pytorch_nlp.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on Tue Jun 6 17:37:50 2017 5 | 6 | @author: ryan 7 | """ 8 | 9 | import torch 10 | import torch.autograd as autograd 11 | import torch.nn as nn 12 | import torch.nn.functional as F 13 | import torch.optim as optim 14 | 15 | torch.manual_seed(1) 16 | 17 | #Creating Tensors 18 | V_data = [1,2,3] 19 | V = torch.Tensor(V_data) 20 | print(V) 21 | 22 | #Create matrix 23 | M_data = [[1,2,3], [4,5,6]] 24 | M = torch.Tensor(M_data) 25 | print(M) 26 | 27 | # Create 3D tensor of size 2*2*2 28 | T_data = [[[1,2],[3,4]], 29 | [[5,6],[7,8]]] 30 | T = torch.Tensor(T_data) 31 | print(T) 32 | 33 | # Index into V and get a scalar 34 | print(V[0]) 35 | 36 | # Index into M and get a vector 37 | print(M[0]) 38 | 39 | # Index into T and get a matrix 40 | print(T[0]) 41 | 42 | x = torch.randn((3, 4, 5)) 43 | print(x) 44 | 45 | ##Operations with Tensors 46 | x = torch.Tensor([1., 2., 3.]) 47 | y = torch.Tensor([4., 5., 6.]) 48 | z = x + y 49 | print(z) 50 | 51 | ##Concat 52 | # By default, it concatenates along the first axis (concatenates rows) 53 | x_1 = torch.randn(2, 5) 54 | y_1 = torch.randn(3, 5) 55 | z_1 = torch.cat([x_1, y_1]) 56 | print(z_1) 57 | 58 | # Concatenate columns: 59 | x_2 = torch.randn(2, 3) 60 | y_2 = torch.randn(2, 5) 61 | # second arg specifies which axis to concat along 62 | z_2 = torch.cat([x_2, y_2], 1) 63 | print(z_2) 64 | 65 | # If your tensors are not compatible, torch will complain. Uncomment to see the error 66 | # torch.cat([x_1, x_2]) 67 | 68 | 69 | ##Reshaping Tensors 70 | x = torch.randn(2,3,4) 71 | print(x) 72 | print(x.view(2,12)) #2rows with 12 col. 73 | print(x.view(2,-1)) #Same, If one of the dim. is -1, its size can be inferred 74 | 75 | #Comp. Graphs and Auto Diff: How your data is combeind 76 | 77 | # Variables wrap tensor objects 78 | x = autograd.Variable(torch.Tensor([1., 2., 3]), requires_grad=True) 79 | # You can access the data with the .data attribute 80 | print(x.data) 81 | 82 | # You can also do all the same operations you did with tensors with Variables. 83 | y = autograd.Variable(torch.Tensor([4., 5., 6]), requires_grad=True) 84 | z = x + y 85 | print(z.data) 86 | 87 | # BUT z knows something extra. 88 | #print(z.grad_fn) does not work 89 | 90 | s = z.sum() 91 | print(s) 92 | #print(s.grad_fn) does not work 93 | 94 | s.backward() 95 | print(x.grad) 96 | 97 | ##Sumamry 98 | 99 | x = torch.randn((2,2)) 100 | y = torch.randn((2,2)) 101 | 102 | z= x + y 103 | 104 | var_x = autograd.Variable(x) 105 | var_y = autograd.Variable(y) 106 | 107 | var_z = var_x + var_y 108 | print(var_z.grad_fn) 109 | 110 | var_z_data = var_z.data # Get the wrapped Tensor object out of var_z... 111 | new_var_z = autograd.Variable(var_z_data) 112 | 113 | print(new_var_z.grad_fn) 114 | 115 | 116 | 117 | 118 | 119 | 120 | 121 | 122 | 123 | 124 | -------------------------------------------------------------------------------- /pytorch_basic/pytorch_nlp2.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on Tue Jun 6 17:37:50 2017 5 | 6 | @author: ryan 7 | """ 8 | 9 | import torch 10 | import torch.autograd as autograd 11 | import torch.nn as nn 12 | import torch.nn.functional as F 13 | import torch.optim as optim 14 | 15 | torch.manual_seed(1) 16 | 17 | lin = nn.Linear(5,3) 18 | data = autograd.Variable(torch.randn(2,5)) 19 | #차원수 변환 20 | print(lin(data)) 21 | 22 | #Non linearity 23 | print(data) 24 | print(F.relu(data)) 25 | 26 | # Softmax is also in torch.functional 27 | data = autograd.Variable(torch.randn(5)) 28 | print(data) 29 | print(F.softmax(data)) 30 | print(F.softmax(data).sum()) # Sums to 1 because it is a distribution! 31 | print(F.log_softmax(data)) # theres also log_softmax 32 | 33 | #BOW 모델 연습 34 | 35 | data = [("me gusta comer en la cafeteria".split(), "SPANISH"), 36 | ("Give it to me".split(), "ENGLISH"), 37 | ("No creo que sea una buena idea".split(), "SPANISH"), 38 | ("No it is not a good idea to get lost at sea".split(), "ENGLISH")] 39 | 40 | test_data = [("Yo creo que si".split(), "SPANISH"), 41 | ("it is lost on me".split(), "ENGLISH")] 42 | 43 | # word_to_ix maps each word in the vocab to a unique integer, which will be its 44 | # index into the Bag of words vector 45 | word_to_ix = {} 46 | for sent, _ in data + test_data: 47 | for word in sent: 48 | if word not in word_to_ix: 49 | word_to_ix[word] = len(word_to_ix) 50 | print(word_to_ix) 51 | 52 | VOCAB_SIZE = len(word_to_ix) 53 | NUM_LABELS = 2 54 | 55 | class BoWClassifier(nn.Module): #inheriting from nn.Module! 56 | def __init__(self, num_labels, vocab_size): 57 | # calls the init function of nn.Module. Dont get confused by syntax, 58 | # just always do it in an nn.Module 59 | super(BoWClassifier, self).__init__() 60 | """ 61 | 상속하게 되면 명확히 상속된 클래스 이름을 한정자로 부모 클래스의 속성과 62 | 메소드를 접근 할 수 있지만 super()를 이용하여 부모 클래스를 접근 가능 63 | Super는 하나의 클래스이다. 64 | Super를 지정하고 접근하면 클래스의 속성과 메소드를 접근해서 처리 가능 65 | 주로 오버라이딩을 작성할 때 super를 이용하여 상위 속성이나 메소드를 참조 66 | """ 67 | 68 | # Define the parameters that you will need. In this case, we need A and b, 69 | # the parameters of the affine mapping. 70 | # Torch defines nn.Linear(), which provides the affine map. 71 | # Make sure you understand why the input dimension is vocab_size 72 | # and the output is num_labels! 73 | self.linear = nn.Linear(vocab_size, num_labels) 74 | 75 | # NOTE! The non-linearity log softmax does not have parameters! So we don't need 76 | # to worry about that here 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | -------------------------------------------------------------------------------- /pytorch_basic/pytorch_nlp3.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on Sat Jun 10 11:48:58 2017 5 | 6 | @author: ryan 7 | http://pytorch.org/tutorials/beginner/nlp/word_embeddings_tutorial.html 8 | 9 | """ 10 | 11 | import torch 12 | import torch.autograd as autograd 13 | import torch.nn as nn 14 | import torch.nn.functional as F 15 | import torch.optim as optim 16 | 17 | torch.manual_seed(1) 18 | 19 | word_to_ix = {"안녕": 0, "반가워": 1} 20 | embeds = nn.Embedding(2, 5) # 2 words in vocab, 5 dimensional embeddings 21 | lookup_tensor = torch.LongTensor([word_to_ix["안녕"]]) 22 | hello_embed = embeds(autograd.Variable(lookup_tensor)) 23 | print(hello_embed) 24 | 25 | ## 26 | CONTEXT_SIZE = 2 27 | EMBEDDING_DIM = 10 28 | 29 | # We will use Shakespeare Sonnet 2 30 | test_sentence = """미국 로스앤젤레스에 사는 배우 척 매카시는 사람들과 산책을 해주고 돈을 번다. 지난해 그가 시작한 '친구 대여(Rent-a-Friend)'는 새로운 형태의 비즈니스다. 매카시는 일감이 많지 않은 무명 배우였지만 이 부업은 조수들을 고용해야 할 만큼 번창하고 있다. 다른 도시와 외국에서도 '출장 산책' 주문이 쇄도한다. 31 | 32 | 매카시는 집 근처 공원과 거리를 고객과 함께 걸으면서 이야기를 나누는 대가로 1마일(1.6㎞)에 7달러를 받는다. 사회적 관계를 구매 가능한 상품으로 포장한 셈이다. 이름 붙이자면 '고독 비즈니스'다. 그는 영국 일간지 가디언과의 인터뷰에서 "혼자 산책하기 두렵거나 친구 없는 사람으로 비칠까봐 걱정하는 사람이 많았다"며 "자기 이야기를 누가 들어준다는 데 기뻐하며 다시 나를 찾는다"고 했다. 33 | 34 | 20~30대에서는 미혼과 만혼(晩婚), 40대 이후로는 이혼과 고령화 등으로 1인 가구가 빠르게 늘어가는 한국 사회에서 고독은 강 건너 불구경이 아니다. 우리는 페이스북·트위터·인스타그램 같은 소셜미디어로 긴밀하게 연결돼 있지만 관계의 응집력은 어느 때보다 느슨하다. '혼밥' '혼술' '혼영(나 홀로 영화)' '혼행(나 홀로 여행)' 같은 소비 패턴이 방증한다. 외로움을 감추기보다 즐기려는 경향도 나타난다. Why?는 예스24에 의뢰해 지난 1~5일 설문조사를 했다. 5864명(여성 4398명)이 응답했다. 고독을 바라보는 한국인의 태도가 드러났다. 35 | """.split() 36 | # we should tokenize the input, but we will ignore that for now 37 | # build a list of tuples. Each tuple is ([ word_i-2, word_i-1 ], target word) 38 | 39 | trigrams = [([test_sentence[i], test_sentence[i + 1]], test_sentence[i + 2]) 40 | for i in range(len(test_sentence) -2)] 41 | 42 | #중복 단어 제외 및 일반 단어 넣어 주기 43 | vocab = set(test_sentence) 44 | word_to_ix = {word: i for i , word in enumerate(vocab)} 45 | 46 | #https://wikidocs.net/28 47 | 48 | class NGramLanguageModeler(nn.Module): 49 | def __init__(self, vocab_size, embedding_dim, context_size): 50 | super(NGramLanguageModeler, self).__init__() 51 | self.embeddings = nn.Embedding(vocab_size, embedding_dim) 52 | self.linear1 = nn.Linear(context_size * embedding_dim, 128) 53 | self.linear2 = nn.Linear(128, vocab_size) 54 | 55 | def forward(self, inputs): 56 | embeds = self.embeddings(inputs).view((1,-1)) 57 | out = F.relu(self.linear1(embeds)) 58 | out = self.linear2(out) 59 | log_probs = F.log_softmax(out) 60 | return log_probs 61 | 62 | losses = [] 63 | loss_function = nn.NLLLoss() 64 | model = NGramLanguageModeler(len(vocab), EMBEDDING_DIM, CONTEXT_SIZE) 65 | optimizer = optim.SGD(model.parameters(), lr = 0.001) 66 | 67 | for epoch in range(100): 68 | total_loss = torch.Tensor([0]) 69 | for context, target in trigrams: 70 | 71 | #Step1: 입력전처리 (integer indices(색인) 와 변수로 변환) 72 | context_idxs = [word_to_ix[w] for w in context] 73 | context_var = autograd.Variable(torch.LongTensor(context_idxs)) 74 | 75 | #Step2: torch는 gradients를 accumlates한다. 새로운 instances를 넘기기 전에, 76 | #모든 그레디언트를 오래된 instnaces로 부터 zero out 해야함 77 | model.zero_grad() 78 | 79 | #Step3: 전진 학습을 하며, 다음 단어에 대한 log prob.얻기 80 | log_probs = model(context_var) 81 | 82 | #Step4: log function 사용하기 83 | loss = loss_function(log_probs, autograd.Variable( 84 | torch.LongTensor([word_to_ix[target]]))) 85 | 86 | #Step5: 백프로게이션 실행 후 그레디언트 수치 업데이트 87 | loss.backward() 88 | optimizer.step() 89 | 90 | total_loss += loss.data 91 | losses.append(total_loss) 92 | print(losses) 93 | 94 | 95 | """Exercise: CBow""" 96 | #.view() check 97 | CONTEXT_SIZE = 2 # 2 words to the left, 2 to the right 98 | raw_text = """We are about to study the idea of a computational process. 99 | Computational processes are abstract beings that inhabit computers. 100 | As they evolve, processes manipulate other abstract things called data. 101 | The evolution of a process is directed by a pattern of rules 102 | called a program. People create programs to direct processes. In effect, 103 | we conjure the spirits of the computer with our spells.""".split() 104 | 105 | # By deriving a set from `raw_text`, we deduplicate the array 106 | vocab = set(raw_text) 107 | vocab_size = len(vocab) 108 | 109 | word_to_ix = {word: i for i, word in enumerate(vocab)} 110 | data = [] 111 | 112 | for i in range(2, len(raw_text) - 2): 113 | context = [raw_text[i - 2], raw_text[i - 1], 114 | raw_text[i + 1], raw_text[i + 2]] 115 | target = raw_text[i] 116 | data.append((context, target)) 117 | 118 | class CBOW(nn.Module): 119 | 120 | def __init__(self): 121 | pass 122 | 123 | def forward(self, inputs): 124 | pass 125 | 126 | def make_context_vector(context, word_to_ix): 127 | idxs = [word_to_ix[w] for w in context] 128 | tensor = torch.LongTensor(idxs) 129 | return autograd.Variable(tensor) 130 | 131 | make_context_vector(data[0][0], word_to_ix) 132 | 133 | 134 | 135 | 136 | 137 | 138 | -------------------------------------------------------------------------------- /pytorch_basic/pytorch_seq2seq(LSTM).py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on Sat Jun 10 18:06:39 2017 5 | # Author: Robert Guthrie 6 | http://pytorch.org/tutorials/beginner/nlp/sequence_models_tutorial.html 7 | """ 8 | 9 | import torch 10 | import torch.autograd as autograd 11 | import torch.nn as nn 12 | import torch.nn.functional as F 13 | import torch.optim as optim 14 | 15 | torch.manual_seed(1) 16 | 17 | lstm = nn.LSTM(3,3) #Input dim, output dim (3,3) 18 | inputs = [autograd.Variable(torch.randn((1, 3))) 19 | for _ in range(5)] # make a sequence of length 5 20 | 21 | #hidden state 초기화 22 | hidden = (autograd.Variable(torch.randn(1,1,3)), 23 | autograd.Variable(torch.randn(1,1,3))) 24 | 25 | for i in inputs: 26 | # Step through the sequence one elements at a time. 27 | # after each step, hidden contains the hidden state 28 | out, hidden = lstm(i.view(1,1,-1), hidden) 29 | 30 | # 전체 seq.를 한번에 진행이 가능하다. 31 | # LSTM에서 받은 첫번째 값은 32 | # 두번째는 가장 최근의 hidden state이다. 33 | # 그 이유는, "out"은 모든 hidden state 차례대로 접근 할 수 있고, 34 | # "hidden"은 seq를 진행하며 backprop을 하게 해주기 때문이다. 35 | inputs = torch.cat(inputs).view(len(inputs), 1, -1) 36 | hidden = (autograd.Variable(torch.randn(1, 1, 3)), autograd.Variable( 37 | torch.randn(1,1,3))) 38 | out. hidden = lstm(inputs, hidden) 39 | print(out) 40 | print(hidden) 41 | 42 | """LSTM for POS Tagging 43 | 44 | """ 45 | 46 | def prepare_sequence(seq, to_ix): 47 | idxs = [to_ix[w] for w in seq] 48 | tensor = torch.LongTensor(idxs) 49 | return autograd.Variable(tensor) 50 | 51 | training_data = [ 52 | ("The dog ate the apple".split(), ["DET", "NN", "V", "DET", "NN"]), 53 | ("Everybody read that book".split(), ["NN", "V", "DET", "NN"]) 54 | ] 55 | 56 | word_to_ix = {} 57 | for sent, tags in training_data: 58 | for word in sent: 59 | if word not in word_to_ix: 60 | word_to_ix[word] = len(word_to_ix) 61 | 62 | tag_to_ix = {"DET": 0, "NN": 1, "V": 2} 63 | 64 | #일반적으로 약 32~64 차원이지만, 값을 적게하여 학습이 진행 되면 값이 어떻게 보내는지 체크 65 | EMBEDDING_DIM = 6 66 | HIDDEN_DIM = 6 67 | 68 | #Create the Model 69 | class LSTMTagger(nn.Module): 70 | 71 | def __init__(self, embedding_dim, hidden_dim, vocab_size, tagset_size): 72 | super(LSTMTagger, self).__init__() 73 | self.hidden_dim = hidden_dim 74 | 75 | self.word_embeddings = nn.Embedding(vocab_size, embedding_dim) 76 | 77 | #LSTM -> input: word embeddings / output: hidden state / dim: hidden_dim 78 | self.lstm = nn.LSTM(embedding_dim, hidden_dim) 79 | 80 | #linear layer는 hidden에서 tag공간으로 변경 81 | self.hidden2tag = nn.Linear(hidden_dim, tagset_size) 82 | self.hidden = self.init_hidden() 83 | 84 | def init_hidden(self): 85 | # The axes semantics are (num_layers, mini_batch_size, hidden_dim) 86 | return (autograd.Variable(torch.zeros(1,1, self.hidden_dim)), 87 | autograd.Variable(torch.zeros(1,1, self.hidden_dim))) 88 | 89 | def forward(self, sentence): 90 | embeds = self.word_embeddings(sentence) 91 | lstm_out, self.hidden = self.lstm( 92 | embeds.view(len(sentence), 1, -1), self.hidden) 93 | tag_space = self.hidden2tag(lstm_out.view(len(sentence), -1)) 94 | tag_scores = F.log_softmax(tag_space) 95 | tag_scores = F.log_softmax(tag_space) 96 | return tag_scores 97 | 98 | #Training Model 99 | model = LSTMTagger(EMBEDDING_DIM, HIDDEN_DIM, len(word_to_ix), len(tag_to_ix)) 100 | loss_function = nn.NLLLoss() 101 | optimizer = optim.SGD(model.parameters(), lr=0.1) 102 | 103 | #학습 전에 성능을 확인해보자 - i: word / j: tag 104 | inputs = prepare_sequence(training_data[0][0], word_to_ix) 105 | tag_scores = model(inputs) 106 | print(tag_scores) 107 | 108 | for epoch in range(300): #toy data이기 때문에 300번만 하는 것, 원래는 그 이상 109 | for sentence, tags in training_data: 110 | #Step1: Pytorch는 gradient를 중첩하는 방식이므로, 각각의 instance들을 명확히 해주는 작업이 필요. 111 | model.zero_grad() 112 | 113 | #또한, hidden state LSTM을 명확히 해주는 것이 필요 114 | #지난 history를 보유하고 있는 instance를 떼어 정보를 공유 115 | model.hidden = model.init_hidden() 116 | 117 | #Step2: input에서 단어의 index형태로 변환시키는 작업 118 | sentence_in = prepare_sequence(sentence, word_to_ix) 119 | targets = prepare_sequence(tags, tag_to_ix) 120 | 121 | #Step3: Run our forward pass. 122 | tag_scores = model(sentence_in) 123 | 124 | #Step4: Compare the loss, gradients, and update the param. by calling optimizer.step() 125 | loss = loss_function(tag_scores, targets) 126 | loss.backward() 127 | optimizer.step() 128 | 129 | #학습 후 점수 확인하기 130 | inputs = prepare_sequence(training_data[0][0], word_to_ix) 131 | tag_scores = model(inputs) 132 | print(tag_scores) 133 | #결과 값을 보면, 예측한 seq는 0 1 2 0 1 (가장 높은 수) 이다. 134 | #문장은 "the dog ate the apple." 135 | #확인해보면, DET, NOUN, VERB, DET, NOUN 이므로 정확한 문장 136 | 137 | 138 | 139 | -------------------------------------------------------------------------------- /pytorch_basic/sec 6. Linear regression wih Python.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 4, 6 | "metadata": { 7 | "collapsed": false 8 | }, 9 | "outputs": [ 10 | { 11 | "ename": "NameError", 12 | "evalue": "name 'n' is not defined", 13 | "output_type": "error", 14 | "traceback": [ 15 | "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", 16 | "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", 17 | "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 2\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrandom\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mseed\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 4\u001b[0;31m \u001b[0mx\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrandom\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrand\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mn\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 5\u001b[0m \u001b[0my\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mx\u001b[0m \u001b[0;34m**\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrandom\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrand\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mn\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 6\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", 18 | "\u001b[0;31mNameError\u001b[0m: name 'n' is not defined" 19 | ] 20 | } 21 | ], 22 | "source": [ 23 | "import numpy as np\n", 24 | "np.random.seed(1)\n", 25 | "\n", 26 | "x = np.random.rand(n)\n", 27 | "y = x ** np.random.rand(n)\n", 28 | "\n", 29 | "colors = np.random.rand()\n", 30 | "plt.plot(np.unique(x), np.poly1d(np.ployfit(x,y,1))(np.unique(x)))\n", 31 | "\n", 32 | "plt.scatter(x,y, colors, alpha=0.5)\n", 33 | "plt.show()" 34 | ] 35 | }, 36 | { 37 | "cell_type": "code", 38 | "execution_count": 5, 39 | "metadata": { 40 | "collapsed": true 41 | }, 42 | "outputs": [], 43 | "source": [ 44 | "# Linear Regression model by pytorch" 45 | ] 46 | } 47 | ], 48 | "metadata": { 49 | "anaconda-cloud": {}, 50 | "kernelspec": { 51 | "display_name": "Python [conda root]", 52 | "language": "python", 53 | "name": "conda-root-py" 54 | }, 55 | "language_info": { 56 | "codemirror_mode": { 57 | "name": "ipython", 58 | "version": 3 59 | }, 60 | "file_extension": ".py", 61 | "mimetype": "text/x-python", 62 | "name": "python", 63 | "nbconvert_exporter": "python", 64 | "pygments_lexer": "ipython3", 65 | "version": "3.5.2" 66 | } 67 | }, 68 | "nbformat": 4, 69 | "nbformat_minor": 1 70 | } 71 | -------------------------------------------------------------------------------- /pytorch_basic/text_loader.py: -------------------------------------------------------------------------------- 1 | # References 2 | # https://github.com/yunjey/pytorch-tutorial/blob/master/tutorials/01-basics/pytorch_basics/main.py 3 | # http://pytorch.org/tutorials/beginner/data_loading_tutorial.html#dataset-class 4 | import gzip 5 | from torch.utils.data import Dataset, DataLoader 6 | 7 | 8 | class TextDataset(Dataset): 9 | # Initialize your data, download, etc. 10 | 11 | def __init__(self, filename="./data/shakespeare.txt.gz"): 12 | self.len = 0 13 | with gzip.open(filename, 'rt') as f: 14 | self.targetLines = [x.strip() for x in f if x.strip()] 15 | self.srcLines = [x.lower().replace(' ', '') 16 | for x in self.targetLines] 17 | self.len = len(self.srcLines) 18 | 19 | def __getitem__(self, index): 20 | return self.srcLines[index], self.targetLines[index] 21 | 22 | def __len__(self): 23 | return self.len 24 | 25 | 26 | # Test the loader 27 | if __name__ == "__main__": 28 | dataset = TextDataset() 29 | train_loader = DataLoader(dataset=dataset, 30 | batch_size=3, 31 | shuffle=True, 32 | num_workers=2) 33 | 34 | for i, (src, target) in enumerate(train_loader): 35 | print(i, "data", src) --------------------------------------------------------------------------------