├── .DS_Store
├── .gitignore
├── Algorithm
    ├── .ipynb_checkpoints
    │   ├── HelloCoding_Algo-checkpoint.ipynb
    │   └── algo_basic-checkpoint.ipynb
    ├── 06-01_calc.py
    ├── Algo_basic.py
    ├── HelloCoding_Algo.ipynb
    ├── IsPrime.py
    ├── algo_basic.ipynb
    ├── binary_search_1.py
    ├── euler_prob1.py
    ├── graph_algo.py
    └── selection_sort_2.py
├── Chatbot
    ├── .ipynb_checkpoints
    │   ├── qa_chatbot-checkpoint.ipynb
    │   └── seq2seq-checkpoint.ipynb
    ├── Slack_Bot
    │   ├── .Rhistory
    │   ├── __init__.py
    │   ├── __pycache__
    │   │   └── mcbot_chat.cpython-35.pyc
    │   ├── data
    │   │   ├── 29일간의실종.txt
    │   │   ├── desktop.ini
    │   │   ├── 게리온의무리들.txt
    │   │   ├── 게임의종말.txt
    │   │   ├── 경찰청사람들.txt
    │   │   ├── 사랑과욕망의덫.txt
    │   │   ├── 연산군.txt
    │   │   ├── 욕망이타는숲.txt
    │   │   ├── 위대한개츠비.txt
    │   │   └── 황야의이리.txt
    │   ├── lstm_bot.py
    │   ├── markov-toji.json
    │   ├── markov_chain_bot.py
    │   ├── mcbot_chat.py
    │   ├── modubot.py
    │   ├── print_bot_id.py
    │   ├── toji.model
    │   ├── toji.wakati
    │   └── toji2.json
    ├── __init__.py
    ├── __pycache__
    │   └── helpers.cpython-36.pyc
    ├── helpers.py
    ├── seq2seq.ipynb
    └── seq2seq_tut.py
├── Dataset
    └── dataset.md
├── ETC
    ├── Lec1.txt
    ├── Lec4.txt
    └── Subtitle_tools.ipynb
├── HTML
    └── code_academy.html
├── Kaggle
    ├── Quora
    │   └── Quora_shin.ipynb
    └── What_Cooking
    │   └── Cooking.ipynb
├── Keras_Basic
    ├── .ipynb_checkpoints
    │   ├── Keras_tutorial_imdb_text_classification-checkpoint.ipynb
    │   └── Keras_tutorial_text_generation-checkpoint.ipynb
    ├── Keras_Cheat_Sheet_Python.pdf
    ├── Keras_basic_fin.py
    ├── Keras_classification_basic.py
    ├── Keras_fine_tuning_basic.py
    ├── Keras_tutorial_imdb_text_classification.ipynb
    └── Keras_tutorial_text_generation.ipynb
├── Machine_Comprehension
    ├── Attention_Keras
    │   ├── .Rhistory
    │   ├── Attention_Keras_QA_Model.py
    │   ├── CNNQA_architecture.json
    │   ├── CNNQA_weights.h5.7z
    │   ├── Glove.py
    │   ├── KerasQA.ods
    │   └── embedding_data.h5
    ├── DMN_QA
    │   ├── DynamicMemoryNetwork.pdf
    │   ├── bAbi.pdf
    │   ├── dataset
    │   │   └── babi_tasks_1-20_v1-2.tar.gz
    │   ├── image
    │   │   ├── algo_process1.png
    │   │   ├── algo_process2.png
    │   │   ├── algo_process3.png
    │   │   └── babi_dataset.png
    │   └── qa_chatbot.ipynb
    └── Readme_MC.md
├── Math_Stat
    ├── .Rhistory
    ├── .ipynb_checkpoints
    │   ├── ML_Basic_Siraj Raval-checkpoint.ipynb
    │   └── support_vector_machine_lesson-checkpoint.ipynb
    ├── ML_Basic_Siraj Raval.ipynb
    ├── Readme.md
    ├── data.csv
    └── support_vector_machine_lesson.ipynb
├── Natural Language Generation
    └── lstm_keras_generation.py
├── Python
    ├── .ipynb_checkpoints
    │   ├── Python_Data_Science-Matplotlib-checkpoint.ipynb
    │   ├── Python_Data_science_toolbox_part1-checkpoint.ipynb
    │   └── Python_Review-checkpoint.ipynb
    ├── Cheat_Sheet.py
    ├── Decorator.py
    ├── OOP.ipynb
    ├── Python_Data_Science-Matplotlib.ipynb
    ├── Python_Data_science_toolbox_part1.ipynb
    ├── Python_Review.ipynb
    ├── Visualization
    │   ├── .ipynb_checkpoints
    │   │   └── Bokeh-checkpoint.ipynb
    │   └── Bokeh.ipynb
    └── attribute.py
├── Quora_insincere
    ├── .gitignore
    ├── README.md
    ├── jupyter_examples
    │   ├── Data_Prepro.ipynb
    │   ├── Modeling.ipynb
    │   ├── data_preprocessing.py
    │   ├── lstm_kernel_shin.ipynb
    │   ├── lstm_kernel_simple.ipynb
    │   ├── test_kernel1.ipynb
    │   └── test_kernel_ryan.ipynb
    └── lstm.py
├── Readme.md
├── Tensorflow
    ├── .gitignore
    ├── 04_word2vec_eager.py
    ├── Chatbot_Attention.ipynb
    ├── TF_README.md
    ├── nmt_with_attention.ipynb
    ├── nmt_with_attention_chatbot_kor.ipynb
    ├── standford_example
    │   ├── 02_lazy_loading.py
    │   ├── 02_placeholder.py
    │   ├── 02_simple_tf.py
    │   ├── 02_variables.py
    │   ├── 03_linreg_dataset.py
    │   ├── 03_linreg_placeholder.py
    │   ├── 03_linreg_starter.py
    │   ├── 03_logreg.py
    │   ├── 03_logreg_placeholder.py
    │   ├── 03_logreg_starter.py
    │   ├── 04_linreg_eager.py
    │   ├── 04_linreg_eager_starter.py
    │   ├── 04_word2vec.py
    │   ├── 04_word2vec_eager.py
    │   ├── 04_word2vec_eager_starter.py
    │   ├── 04_word2vec_visualize.py
    │   ├── 05_randomization.py
    │   ├── 05_variable_sharing.py
    │   ├── 07_convnet_layers.py
    │   ├── 07_convnet_mnist.py
    │   ├── 07_convnet_mnist_starter.py
    │   ├── 07_run_kernels.py
    │   ├── 11_char_rnn.py
    │   ├── kernels.py
    │   └── word2vec_utils.py
    └── tf_eagar(Define by run) 튜토리얼.ipynb
├── Text_Classification
    ├── .DS_Store
    ├── .ipynb_checkpoints
    │   ├── Bag of Words Meets Bags of Popcorn-checkpoint.ipynb
    │   ├── bagofwords_text_classficiation_kaggle-checkpoint.ipynb
    │   ├── cnn_textclassification_keras-checkpoint.ipynb
    │   └── seq2seq_keras-checkpoint.ipynb
    ├── To_Do
    │   ├── cnn_keras_text_class_kor.py
    │   ├── hierachical_attention_keras_text_class_eng.py
    │   ├── lstm_keras_text_class_eng.py
    │   └── rnn_attention_keras_text_class_eng .py
    ├── bagofwords_text_classficiation_kaggle.ipynb
    ├── char_text_classification_keras.py
    ├── cnn_keras_text_class_imdb.py
    ├── cnn_keras_text_class_imdb2.py
    ├── cnn_keras_text_class_imdb2_korean.py
    ├── cnn_keras_text_class_kaggle_eng.py
    ├── cnn_pytorch_text_class_kaggle_eng.py
    ├── nets
    │   ├── __pycache__
    │   │   └── text_cnn.cpython-35.pyc
    │   └── text_cnn.py
    └── seq2seq_keras.ipynb
├── VAE
    └── vae_sample.py
├── dataset
    └── .DS_Store
└── pytorch_basic
    ├── .ipynb_checkpoints
        ├── 10.pytorch_rnn-checkpoint.ipynb
        ├── Pytorch_basic-checkpoint.ipynb
        └── sec 6. Linear regression wih Python-checkpoint.ipynb
    ├── 10.pytorch_rnn.ipynb
    ├── Pytorch Seq2Seq.ipynb
    ├── Pytorch.ipynb
    ├── Pytorch_basic.ipynb
    ├── Pytorch_mnist.ipynb
    ├── Start_Pytorch.ipynb
    ├── cnn_cifar10_pytorch.py
    ├── cnn_text_pytorch.py
    ├── pytorch_basic.py
    ├── pytorch_nlp.py
    ├── pytorch_nlp2.py
    ├── pytorch_nlp3.py
    ├── pytorch_seq2seq(LSTM).py
    ├── sec 6. Linear regression wih Python.ipynb
    ├── seq2seq_models.py
    └── text_loader.py


/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rainmaker712/nlp_ryan/108ce890659ed29d4a143e41e5546f613aa878ca/.DS_Store


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Created by .ignore support plugin (hsz.mobi)
  2 | ### JetBrains template
  3 | # Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio and Webstorm
  4 | # Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839
  5 | 
  6 | # User-specific stuff:
  7 | .idea/**/workspace.xml
  8 | .idea/**/tasks.xml
  9 | .idea/dictionaries
 10 | 
 11 | # Sensitive or high-churn files:
 12 | .idea/**/dataSources/
 13 | .idea/**/dataSources.ids
 14 | .idea/**/dataSources.xml
 15 | .idea/**/dataSources.local.xml
 16 | .idea/**/sqlDataSources.xml
 17 | .idea/**/dynamic.xml
 18 | .idea/**/uiDesigner.xml
 19 | 
 20 | # Gradle:
 21 | .idea/**/gradle.xml
 22 | .idea/**/libraries
 23 | 
 24 | # CMake
 25 | cmake-build-debug/
 26 | 
 27 | # Mongo Explorer plugin:
 28 | .idea/**/mongoSettings.xml
 29 | 
 30 | ## File-based project format:
 31 | *.iws
 32 | 
 33 | ## Plugin-specific files:
 34 | 
 35 | # IntelliJ
 36 | out/
 37 | 
 38 | # mpeltonen/sbt-idea plugin
 39 | .idea_modules/
 40 | 
 41 | # JIRA plugin
 42 | atlassian-ide-plugin.xml
 43 | 
 44 | # Cursive Clojure plugin
 45 | .idea/replstate.xml
 46 | 
 47 | # Crashlytics plugin (for Android Studio and IntelliJ)
 48 | com_crashlytics_export_strings.xml
 49 | crashlytics.properties
 50 | crashlytics-build.properties
 51 | fabric.properties
 52 | ### Python template
 53 | # Byte-compiled / optimized / DLL files
 54 | __pycache__/
 55 | *.py[cod]
 56 | *$py.class
 57 | 
 58 | # C extensions
 59 | *.so
 60 | 
 61 | # Distribution / packaging
 62 | .Python
 63 | build/
 64 | develop-eggs/
 65 | dist/
 66 | downloads/
 67 | eggs/
 68 | .eggs/
 69 | lib/
 70 | lib64/
 71 | parts/
 72 | sdist/
 73 | var/
 74 | wheels/
 75 | *.egg-info/
 76 | .installed.cfg
 77 | *.egg
 78 | MANIFEST
 79 | 
 80 | # PyInstaller
 81 | #  Usually these files are written by a python script from a template
 82 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 83 | *.manifest
 84 | *.spec
 85 | 
 86 | # Installer logs
 87 | pip-log.txt
 88 | pip-delete-this-directory.txt
 89 | 
 90 | # Unit test / coverage reports
 91 | htmlcov/
 92 | .tox/
 93 | .coverage
 94 | .coverage.*
 95 | .cache
 96 | nosetests.xml
 97 | coverage.xml
 98 | *.cover
 99 | .hypothesis/
100 | 
101 | # Translations
102 | *.mo
103 | *.pot
104 | 
105 | # Django stuff:
106 | *.log
107 | .static_storage/
108 | .media/
109 | local_settings.py
110 | 
111 | # Flask stuff:
112 | instance/
113 | .webassets-cache
114 | 
115 | # Scrapy stuff:
116 | .scrapy
117 | 
118 | # Sphinx documentation
119 | docs/_build/
120 | 
121 | # PyBuilder
122 | target/
123 | 
124 | # Jupyter Notebook
125 | .ipynb_checkpoints
126 | 
127 | # pyenv
128 | .python-version
129 | 
130 | # celery beat schedule file
131 | celerybeat-schedule
132 | 
133 | # SageMath parsed files
134 | *.sage.py
135 | 
136 | # Environments
137 | .env
138 | .venv
139 | env/
140 | venv/
141 | ENV/
142 | env.bak/
143 | venv.bak/
144 | 
145 | # Spyder project settings
146 | .spyderproject
147 | .spyproject
148 | 
149 | # Rope project settings
150 | .ropeproject
151 | 
152 | # mkdocs documentation
153 | /site
154 | 
155 | # mypy
156 | .mypy_cache/
157 | ### macOS template
158 | # General
159 | .DS_Store
160 | .AppleDouble
161 | .LSOverride
162 | 
163 | # Icon must end with two \r
164 | Icon
165 | 
166 | # Thumbnails
167 | ._*
168 | 
169 | # Files that might appear in the root of a volume
170 | .DocumentRevisions-V100
171 | .fseventsd
172 | .Spotlight-V100
173 | .TemporaryItems
174 | .Trashes
175 | .VolumeIcon.icns
176 | .com.apple.timemachine.donotpresent
177 | 
178 | # Directories potentially created on remote AFP share
179 | .AppleDB
180 | .AppleDesktop
181 | Network Trash Folder
182 | Temporary Items
183 | .apdisk
184 | 
185 | .idea/
186 | data_in/
187 | data_out/*
188 | checkpoint/
189 | logs/
190 | OLD/
191 | practice/
192 | scala_data_pre/
193 | target/
194 | .vscode/
195 | .ipynb_checkpoints/
196 | .DS_Store
197 | .DS_Store*
198 | my_test_model/
199 | result/
200 | blog_origin.pkl
201 | 


--------------------------------------------------------------------------------
/Algorithm/.ipynb_checkpoints/HelloCoding_Algo-checkpoint.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# 1.원래 저장위치에서 문자열을 역순으로 변환하기\n",
  8 |     "\n",
  9 |     "문자열 역순 변환\n",
 10 |     "- 문자열의 길이 알아내기\n",
 11 |     "- 문자열의 첫번째문자와 마지막 문자 교환\n",
 12 |     "- 문자열의 두번째 문자와 마지막 문자 -1을 서로 교환\n",
 13 |     "\n",
 14 |     "위의 절차를 반복 (유니코드 한글에 대한 주의)"
 15 |    ]
 16 |   },
 17 |   {
 18 |    "cell_type": "code",
 19 |    "execution_count": 1,
 20 |    "metadata": {
 21 |     "collapsed": true
 22 |    },
 23 |    "outputs": [],
 24 |    "source": [
 25 |     "#문자열 뒤집기 알고리즘\n",
 26 |     "\n",
 27 |     "#Sample String\n",
 28 |     "Sam_string = 'ABCD'"
 29 |    ]
 30 |   },
 31 |   {
 32 |    "cell_type": "code",
 33 |    "execution_count": 3,
 34 |    "metadata": {
 35 |     "collapsed": false
 36 |    },
 37 |    "outputs": [
 38 |     {
 39 |      "data": {
 40 |       "text/plain": [
 41 |        "'DCBA'"
 42 |       ]
 43 |      },
 44 |      "execution_count": 3,
 45 |      "metadata": {},
 46 |      "output_type": "execute_result"
 47 |     }
 48 |    ],
 49 |    "source": [
 50 |     "#Python은 심플하게 끝남\n",
 51 |     "\n",
 52 |     "def reverseString(str):\n",
 53 |     "    return str[::-1]\n",
 54 |     "\n",
 55 |     "reverseString(Sam_string)"
 56 |    ]
 57 |   },
 58 |   {
 59 |    "cell_type": "markdown",
 60 |    "metadata": {},
 61 |    "source": [
 62 |     "# 스택 (접착지 메모)\n",
 63 |     "\n",
 64 |     "- 스택에는 푸시와 팝이라는 두가지 연산\n",
 65 |     "- 모든 함수 호출은 호출 스택을 사용\n",
 66 |     "- 호출 스택은 너무 켜져 메모리를 크게 소모 할 수 도 있음"
 67 |    ]
 68 |   },
 69 |   {
 70 |    "cell_type": "code",
 71 |    "execution_count": 9,
 72 |    "metadata": {
 73 |     "collapsed": false
 74 |    },
 75 |    "outputs": [
 76 |     {
 77 |      "data": {
 78 |       "text/plain": [
 79 |        "'D'"
 80 |       ]
 81 |      },
 82 |      "execution_count": 9,
 83 |      "metadata": {},
 84 |      "output_type": "execute_result"
 85 |     }
 86 |    ],
 87 |    "source": [
 88 |     "#Stack을 사용해 보자\n",
 89 |     "#Sam_string\n",
 90 |     "# 문자열의 길이 알아내기\n",
 91 |     "# 문자열의 첫번째문자와 마지막 문자 교환\n",
 92 |     "# 문자열의 두번째 문자와 마지막 문자 -1을 서로 교환\n",
 93 |     "\n",
 94 |     "len(Sam_string)\n",
 95 |     "\n",
 96 |     "Sam_string[0]\n",
 97 |     "Sam_string[-1]\n"
 98 |    ]
 99 |   },
100 |   {
101 |    "cell_type": "code",
102 |    "execution_count": 16,
103 |    "metadata": {
104 |     "collapsed": false
105 |    },
106 |    "outputs": [],
107 |    "source": [
108 |     "def reverseString2(str):\n",
109 |     "    stack = []\n",
110 |     "    for ch in str:\n",
111 |     "        stack.append(ch)\n",
112 |     "    \n",
113 |     "    result = \"\"\n",
114 |     "    while len(stack) > 0:\n",
115 |     "        result += stack.pop()\n",
116 |     "        \n",
117 |     "    return result\n",
118 |     "\n"
119 |    ]
120 |   },
121 |   {
122 |    "cell_type": "markdown",
123 |    "metadata": {},
124 |    "source": [
125 |     "# 선택 정렬"
126 |    ]
127 |   },
128 |   {
129 |    "cell_type": "code",
130 |    "execution_count": null,
131 |    "metadata": {
132 |     "collapsed": true
133 |    },
134 |    "outputs": [],
135 |    "source": []
136 |   }
137 |  ],
138 |  "metadata": {
139 |   "anaconda-cloud": {},
140 |   "kernelspec": {
141 |    "display_name": "Python [conda root]",
142 |    "language": "python",
143 |    "name": "conda-root-py"
144 |   },
145 |   "language_info": {
146 |    "codemirror_mode": {
147 |     "name": "ipython",
148 |     "version": 3
149 |    },
150 |    "file_extension": ".py",
151 |    "mimetype": "text/x-python",
152 |    "name": "python",
153 |    "nbconvert_exporter": "python",
154 |    "pygments_lexer": "ipython3",
155 |    "version": "3.5.2"
156 |   }
157 |  },
158 |  "nbformat": 4,
159 |  "nbformat_minor": 1
160 | }
161 | 


--------------------------------------------------------------------------------
/Algorithm/.ipynb_checkpoints/algo_basic-checkpoint.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# 1. Bubble Sort\n",
  8 |     "\n",
  9 |     "* performance: O(n^2)\n",
 10 |     "* space complexity O(1)\n",
 11 |     "\n",
 12 |     "Procedure:  \n",
 13 |     "Loop1  \n",
 14 |     "6,5,3,1 / 5,6,3,1 / 5,3,6,1 /5,3,1,6  \n",
 15 |     "Loop2  \n",
 16 |     "3,5,1,6 / 3,1,5,6 / 3,1,5,6  \n",
 17 |     "Loop3  \n",
 18 |     "1,3,5,6  "
 19 |    ]
 20 |   },
 21 |   {
 22 |    "cell_type": "code",
 23 |    "execution_count": 2,
 24 |    "metadata": {
 25 |     "collapsed": false
 26 |    },
 27 |    "outputs": [
 28 |     {
 29 |      "name": "stdout",
 30 |      "output_type": "stream",
 31 |      "text": [
 32 |       "[1, 2, 3, 4, 5, 6]\n"
 33 |      ]
 34 |     }
 35 |    ],
 36 |    "source": [
 37 |     "import unittest\n",
 38 |     "\n",
 39 |     "def bubblesort(alist):\n",
 40 |     "    for i in range(len(alist)-1):\n",
 41 |     "        for j in range(len(alist)-1):\n",
 42 |     "            if alist[j] > alist[j+1]:\n",
 43 |     "                alist[j], alist[j+1] = alist[j+1], alist[j]\n",
 44 |     "    return alist\n",
 45 |     "               \n",
 46 |     "sort = [4,6,1,3,5,2]\n",
 47 |     "print(bubblesort(sort))\n",
 48 |     "\n",
 49 |     "class unit_test(unittest.TestCase):\n",
 50 |     "    def test(self):\n",
 51 |     "        self.assertEqual([1, 2, 3, 4, 5, 6], bubblesort([4, 6, 1, 3, 5, 2]))\n",
 52 |     "        self.assertEqual([1, 2, 3, 4, 5, 6], bubblesort([6, 4, 3, 1, 2, 5]))\n",
 53 |     "        self.assertEqual([1, 2, 3, 4, 5, 6], bubblesort([6, 5, 4, 3, 2, 1]))"
 54 |    ]
 55 |   },
 56 |   {
 57 |    "cell_type": "markdown",
 58 |    "metadata": {},
 59 |    "source": [
 60 |     "# 2. Selection Sort\n",
 61 |     "\n",
 62 |     "* Worst case performance: O(n^2)\n",
 63 |     "* Best Case perform: O(n^2)\n",
 64 |     "* Avg. Case perform: O(n^2)\n",
 65 |     "* Worst case space complexity: O(n) total, O(1) auxilary\n",
 66 |     "\n",
 67 |     "Procedure:  \n",
 68 |     "4,6,1,3,5,2  \n",
 69 |     "Min: 4 (첫 번째 포인트)  \n",
 70 |     "4,6,1,3,5,2  \n",
 71 |     "Min: 1  \n",
 72 |     "\n",
 73 |     "1,6,4,3,5,2  \n",
 74 |     "Min: 6 (두 번째 포인트)  \n",
 75 |     "1,6,4,3,5,2  \n",
 76 |     "Min: 2  \n",
 77 |     "\n",
 78 |     "1,2,4,3,5,6  \n",
 79 |     "Min:  4 (세 번째 포인트) "
 80 |    ]
 81 |   },
 82 |   {
 83 |    "cell_type": "markdown",
 84 |    "metadata": {},
 85 |    "source": [
 86 |     "# 3. Insertion Sort\n",
 87 |     "\n",
 88 |     "쉽지만 성능이 낮음\n",
 89 |     "\n",
 90 |     "Procedure:  \n",
 91 |     "4,6,1,3,5,2  \n",
 92 |     "\n",
 93 |     "4,6  \n",
 94 |     "4,1,6  \n",
 95 |     "1,4,6  \n",
 96 |     "\n",
 97 |     "1,4,6,3  \n",
 98 |     "1,4,3,6  \n",
 99 |     "1,3,4,6  \n"
100 |    ]
101 |   },
102 |   {
103 |    "cell_type": "markdown",
104 |    "metadata": {},
105 |    "source": [
106 |     "# 4. Merge Sort\n",
107 |     "\n",
108 |     "Perform: O(nlogn)\n",
109 |     "space complex: O(n)\n",
110 |     "\n",
111 |     "1. 정렬되지 않은 리스트를 지속적으로 쪼갠다\n",
112 |     "2. 정렬된 아이템과 병합한다.\n",
113 |     "\n",
114 |     "Procedure:  \n",
115 |     "6,2,4,1,3,6,5,8\n",
116 |     "\n",
117 |     "Step1:  \n",
118 |     "6,2,4,1 / 3,7,5,8  \n",
119 |     "* 6,2,4,1\n",
120 |     "\n",
121 |     "6,2 / 4,1  \n",
122 |     "6|2 / 4|1\n",
123 |     "\n",
124 |     "Step2:\n",
125 |     "2|6 -> 2,6 -> 1|4 -> 1,4\n",
126 |     "2,6 / 1,4 -> 1,2,4,6\n",
127 |     "\n",
128 |     "Step3:\n",
129 |     "3/7 / 5/8\n",
130 |     "\n"
131 |    ]
132 |   }
133 |  ],
134 |  "metadata": {
135 |   "anaconda-cloud": {},
136 |   "kernelspec": {
137 |    "display_name": "Python [conda root]",
138 |    "language": "python",
139 |    "name": "conda-root-py"
140 |   },
141 |   "language_info": {
142 |    "codemirror_mode": {
143 |     "name": "ipython",
144 |     "version": 3
145 |    },
146 |    "file_extension": ".py",
147 |    "mimetype": "text/x-python",
148 |    "name": "python",
149 |    "nbconvert_exporter": "python",
150 |    "pygments_lexer": "ipython3",
151 |    "version": "3.5.2"
152 |   }
153 |  },
154 |  "nbformat": 4,
155 |  "nbformat_minor": 1
156 | }
157 | 


--------------------------------------------------------------------------------
/Algorithm/06-01_calc.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding: utf-8 -*-
 3 | """
 4 | Created on Sat Jul 29 16:01:08 2017
 5 | 
 6 | @author: ryan
 7 | 
 8 | #탑코더 06 알고리즘
 9 | """
10 | 
11 | A = []
12 | B = []
13 | C = []
14 | 
15 | n = 5
16 | m = 5
17 | 
18 | def calc(n, m):
19 |     A
20 |     for i in range(n):
21 |         B
22 |         i += 1
23 |         print("count {} times B called".format(i))
24 |         for j in range(m):
25 |             C
26 |             j += 1
27 |             print("count {} times C called".format(j))
28 | 
29 | 
30 | calc(n, m)
31 | 
32 | #계산량은 O(nm)
33 | #Top Coder 에서는 10^7 까지는 괜찮지만 10^8을 넘으면 안됨
34 | 


--------------------------------------------------------------------------------
/Algorithm/Algo_basic.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding: utf-8 -*-
 3 | """
 4 | Created on Mon Jun 12 00:08:58 2017
 5 | 
 6 | @author: ryan
 7 | """
 8 | 
 9 | """
10 | Bubble Sort
11 | performance: O(n^2)
12 | space complexity O(1)
13 | 
14 | Procedure:
15 | Loop1
16 | 6,5,3,1 / 5,6,3,1 / 5,3,6,1 /5,3,1,6
17 | Loop2
18 | 3,5,1,6 / 3,1,5,6 / 3,1,5,6
19 | Loop3
20 | 1,3,5,6
21 | """
22 | 
23 | import unittest
24 | 
25 | def bubblesort(alist):
26 |     for i in range(len(alist)-1):
27 |         for j in range(len(alist)-1):
28 |             if alist[j] > alist[j+1]:
29 |                 alist[j], alist[j+1] = alist[j+1], alist[j]
30 |     return alist
31 |                
32 | sort = [4,6,1,3,5,2]
33 | bubblesort(sort)
34 |     
35 | class unit_test(unittest.TestCase):
36 |     def test(self):
37 |         self.assertEqual([1, 2, 3, 4, 5, 6], bubblesort([4, 6, 1, 3, 5, 2]))
38 |         self.assertEqual([1, 2, 3, 4, 5, 6], bubblesort([6, 4, 3, 1, 2, 5]))
39 |         self.assertEqual([1, 2, 3, 4, 5, 6], bubblesort([6, 5, 4, 3, 2, 1]))
40 | 
41 | 
42 | 


--------------------------------------------------------------------------------
/Algorithm/HelloCoding_Algo.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# 1.원래 저장위치에서 문자열을 역순으로 변환하기\n",
  8 |     "\n",
  9 |     "문자열 역순 변환\n",
 10 |     "- 문자열의 길이 알아내기\n",
 11 |     "- 문자열의 첫번째문자와 마지막 문자 교환\n",
 12 |     "- 문자열의 두번째 문자와 마지막 문자 -1을 서로 교환\n",
 13 |     "\n",
 14 |     "위의 절차를 반복 (유니코드 한글에 대한 주의)"
 15 |    ]
 16 |   },
 17 |   {
 18 |    "cell_type": "code",
 19 |    "execution_count": 1,
 20 |    "metadata": {
 21 |     "collapsed": true
 22 |    },
 23 |    "outputs": [],
 24 |    "source": [
 25 |     "#문자열 뒤집기 알고리즘\n",
 26 |     "\n",
 27 |     "#Sample String\n",
 28 |     "Sam_string = 'ABCD'"
 29 |    ]
 30 |   },
 31 |   {
 32 |    "cell_type": "code",
 33 |    "execution_count": 3,
 34 |    "metadata": {
 35 |     "collapsed": false
 36 |    },
 37 |    "outputs": [
 38 |     {
 39 |      "data": {
 40 |       "text/plain": [
 41 |        "'DCBA'"
 42 |       ]
 43 |      },
 44 |      "execution_count": 3,
 45 |      "metadata": {},
 46 |      "output_type": "execute_result"
 47 |     }
 48 |    ],
 49 |    "source": [
 50 |     "#Python은 심플하게 끝남\n",
 51 |     "\n",
 52 |     "def reverseString(str):\n",
 53 |     "    return str[::-1]\n",
 54 |     "\n",
 55 |     "reverseString(Sam_string)"
 56 |    ]
 57 |   },
 58 |   {
 59 |    "cell_type": "markdown",
 60 |    "metadata": {},
 61 |    "source": [
 62 |     "# 스택 (접착지 메모)\n",
 63 |     "\n",
 64 |     "- 스택에는 푸시와 팝이라는 두가지 연산\n",
 65 |     "- 모든 함수 호출은 호출 스택을 사용\n",
 66 |     "- 호출 스택은 너무 켜져 메모리를 크게 소모 할 수 도 있음"
 67 |    ]
 68 |   },
 69 |   {
 70 |    "cell_type": "code",
 71 |    "execution_count": 9,
 72 |    "metadata": {
 73 |     "collapsed": false
 74 |    },
 75 |    "outputs": [
 76 |     {
 77 |      "data": {
 78 |       "text/plain": [
 79 |        "'D'"
 80 |       ]
 81 |      },
 82 |      "execution_count": 9,
 83 |      "metadata": {},
 84 |      "output_type": "execute_result"
 85 |     }
 86 |    ],
 87 |    "source": [
 88 |     "#Stack을 사용해 보자\n",
 89 |     "#Sam_string\n",
 90 |     "# 문자열의 길이 알아내기\n",
 91 |     "# 문자열의 첫번째문자와 마지막 문자 교환\n",
 92 |     "# 문자열의 두번째 문자와 마지막 문자 -1을 서로 교환\n",
 93 |     "\n",
 94 |     "len(Sam_string)\n",
 95 |     "\n",
 96 |     "Sam_string[0]\n",
 97 |     "Sam_string[-1]\n"
 98 |    ]
 99 |   },
100 |   {
101 |    "cell_type": "code",
102 |    "execution_count": 16,
103 |    "metadata": {
104 |     "collapsed": false
105 |    },
106 |    "outputs": [],
107 |    "source": [
108 |     "def reverseString2(str):\n",
109 |     "    stack = []\n",
110 |     "    for ch in str:\n",
111 |     "        stack.append(ch)\n",
112 |     "    \n",
113 |     "    result = \"\"\n",
114 |     "    while len(stack) > 0:\n",
115 |     "        result += stack.pop()\n",
116 |     "        \n",
117 |     "    return result\n",
118 |     "\n"
119 |    ]
120 |   },
121 |   {
122 |    "cell_type": "markdown",
123 |    "metadata": {},
124 |    "source": [
125 |     "# 선택 정렬"
126 |    ]
127 |   },
128 |   {
129 |    "cell_type": "code",
130 |    "execution_count": null,
131 |    "metadata": {
132 |     "collapsed": true
133 |    },
134 |    "outputs": [],
135 |    "source": []
136 |   },
137 |   {
138 |    "cell_type": "markdown",
139 |    "metadata": {},
140 |    "source": [
141 |     "## 1.1 문자열에 포함된 문자들이 전부 유일한지 검사 하는 알고리즘\n",
142 |     "\n",
143 |     "https://www.youtube.com/watch?v=piDwgBqmqKM&list=PLVNY1HnUlO24RlncfRjfoZHnD0YWVsvhq"
144 |    ]
145 |   },
146 |   {
147 |    "cell_type": "code",
148 |    "execution_count": 1,
149 |    "metadata": {
150 |     "collapsed": true
151 |    },
152 |    "outputs": [],
153 |    "source": [
154 |     "test1 = 'ABCD' #True\n",
155 |     "test2 = 'ABAD' #False"
156 |    ]
157 |   },
158 |   {
159 |    "cell_type": "code",
160 |    "execution_count": null,
161 |    "metadata": {
162 |     "collapsed": true
163 |    },
164 |    "outputs": [],
165 |    "source": [
166 |     "fkdls;s;skfkfld;sfksld;sjflldldls;a'fkdls;dldldldldls;s;dldlfjdk'"
167 |    ]
168 |   }
169 |  ],
170 |  "metadata": {
171 |   "anaconda-cloud": {},
172 |   "kernelspec": {
173 |    "display_name": "Python [conda root]",
174 |    "language": "python",
175 |    "name": "conda-root-py"
176 |   },
177 |   "language_info": {
178 |    "codemirror_mode": {
179 |     "name": "ipython",
180 |     "version": 3
181 |    },
182 |    "file_extension": ".py",
183 |    "mimetype": "text/x-python",
184 |    "name": "python",
185 |    "nbconvert_exporter": "python",
186 |    "pygments_lexer": "ipython3",
187 |    "version": "3.5.2"
188 |   }
189 |  },
190 |  "nbformat": 4,
191 |  "nbformat_minor": 1
192 | }
193 | 


--------------------------------------------------------------------------------
/Algorithm/IsPrime.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding: utf-8 -*-
 3 | """
 4 | Created on Sun Jun 18 00:22:29 2017
 5 | 
 6 | @author: ryan
 7 | """
 8 | 
 9 | #Check whether Prime number or not
10 | 
11 | def isPrime(num):
12 |     if num > 0:
13 |         
14 |         if (num % 2) != 0:
15 |             print("{} is prime num".format(num))
16 |         else:
17 |             print("{} is not prime num".format(num))
18 |             
19 |     else:
20 |         print("input value must be greater than zero")
21 |         
22 | a = -3
23 | 
24 | isPrime(a)


--------------------------------------------------------------------------------
/Algorithm/algo_basic.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# 1. Bubble Sort\n",
  8 |     "\n",
  9 |     "* performance: O(n^2)\n",
 10 |     "* space complexity O(1)\n",
 11 |     "\n",
 12 |     "Procedure:  \n",
 13 |     "Loop1  \n",
 14 |     "6,5,3,1 / 5,6,3,1 / 5,3,6,1 /5,3,1,6  \n",
 15 |     "Loop2  \n",
 16 |     "3,5,1,6 / 3,1,5,6 / 3,1,5,6  \n",
 17 |     "Loop3  \n",
 18 |     "1,3,5,6  "
 19 |    ]
 20 |   },
 21 |   {
 22 |    "cell_type": "code",
 23 |    "execution_count": 2,
 24 |    "metadata": {
 25 |     "collapsed": false
 26 |    },
 27 |    "outputs": [
 28 |     {
 29 |      "name": "stdout",
 30 |      "output_type": "stream",
 31 |      "text": [
 32 |       "[1, 2, 3, 4, 5, 6]\n"
 33 |      ]
 34 |     }
 35 |    ],
 36 |    "source": [
 37 |     "import unittest\n",
 38 |     "\n",
 39 |     "def bubblesort(alist):\n",
 40 |     "    for i in range(len(alist)-1):\n",
 41 |     "        for j in range(len(alist)-1):\n",
 42 |     "            if alist[j] > alist[j+1]:\n",
 43 |     "                alist[j], alist[j+1] = alist[j+1], alist[j]\n",
 44 |     "    return alist\n",
 45 |     "               \n",
 46 |     "sort = [4,6,1,3,5,2]\n",
 47 |     "print(bubblesort(sort))\n",
 48 |     "\n",
 49 |     "class unit_test(unittest.TestCase):\n",
 50 |     "    def test(self):\n",
 51 |     "        self.assertEqual([1, 2, 3, 4, 5, 6], bubblesort([4, 6, 1, 3, 5, 2]))\n",
 52 |     "        self.assertEqual([1, 2, 3, 4, 5, 6], bubblesort([6, 4, 3, 1, 2, 5]))\n",
 53 |     "        self.assertEqual([1, 2, 3, 4, 5, 6], bubblesort([6, 5, 4, 3, 2, 1]))"
 54 |    ]
 55 |   },
 56 |   {
 57 |    "cell_type": "markdown",
 58 |    "metadata": {},
 59 |    "source": [
 60 |     "# 2. Selection Sort\n",
 61 |     "\n",
 62 |     "* Worst case performance: O(n^2)\n",
 63 |     "* Best Case perform: O(n^2)\n",
 64 |     "* Avg. Case perform: O(n^2)\n",
 65 |     "* Worst case space complexity: O(n) total, O(1) auxilary\n",
 66 |     "\n",
 67 |     "Procedure:  \n",
 68 |     "4,6,1,3,5,2  \n",
 69 |     "Min: 4 (첫 번째 포인트)  \n",
 70 |     "4,6,1,3,5,2  \n",
 71 |     "Min: 1  \n",
 72 |     "\n",
 73 |     "1,6,4,3,5,2  \n",
 74 |     "Min: 6 (두 번째 포인트)  \n",
 75 |     "1,6,4,3,5,2  \n",
 76 |     "Min: 2  \n",
 77 |     "\n",
 78 |     "1,2,4,3,5,6  \n",
 79 |     "Min:  4 (세 번째 포인트) "
 80 |    ]
 81 |   },
 82 |   {
 83 |    "cell_type": "markdown",
 84 |    "metadata": {},
 85 |    "source": [
 86 |     "# 3. Insertion Sort\n",
 87 |     "\n",
 88 |     "쉽지만 성능이 낮음\n",
 89 |     "\n",
 90 |     "Procedure:  \n",
 91 |     "4,6,1,3,5,2  \n",
 92 |     "\n",
 93 |     "4,6  \n",
 94 |     "4,1,6  \n",
 95 |     "1,4,6  \n",
 96 |     "\n",
 97 |     "1,4,6,3  \n",
 98 |     "1,4,3,6  \n",
 99 |     "1,3,4,6  \n"
100 |    ]
101 |   },
102 |   {
103 |    "cell_type": "markdown",
104 |    "metadata": {},
105 |    "source": [
106 |     "# 4. Merge Sort\n",
107 |     "\n",
108 |     "Perform: O(nlogn)\n",
109 |     "space complex: O(n)\n",
110 |     "\n",
111 |     "1. 정렬되지 않은 리스트를 지속적으로 쪼갠다\n",
112 |     "2. 정렬된 아이템과 병합한다.\n",
113 |     "\n",
114 |     "Procedure:  \n",
115 |     "6,2,4,1,3,6,5,8\n",
116 |     "\n",
117 |     "Step1:  \n",
118 |     "6,2,4,1 / 3,7,5,8  \n",
119 |     "* 6,2,4,1\n",
120 |     "\n",
121 |     "6,2 / 4,1  \n",
122 |     "6|2 / 4|1\n",
123 |     "\n",
124 |     "Step2:  \n",
125 |     "2|6 -> 2,6 -> 1|4 -> 1,4  \n",
126 |     "2,6 / 1,4 -> 1,2,4,6\n",
127 |     "\n",
128 |     "Step3: \n",
129 |     "3|7 / 5|8\n",
130 |     "\n",
131 |     "Step4:  \n",
132 |     "3|7 -> 3,7 -> 5|8 -> 5,8  \n",
133 |     "3,7 / 5,8 -> 3,5,7,8\n",
134 |     "\n",
135 |     "Step5:  \n",
136 |     "1,2,4,6 / 3,5,7,8 -> 1,2,3,4,5,6,7,8\n",
137 |     "\n",
138 |     "\n",
139 |     "\n"
140 |    ]
141 |   },
142 |   {
143 |    "cell_type": "code",
144 |    "execution_count": null,
145 |    "metadata": {
146 |     "collapsed": true
147 |    },
148 |    "outputs": [],
149 |    "source": []
150 |   }
151 |  ],
152 |  "metadata": {
153 |   "anaconda-cloud": {},
154 |   "kernelspec": {
155 |    "display_name": "Python [conda root]",
156 |    "language": "python",
157 |    "name": "conda-root-py"
158 |   },
159 |   "language_info": {
160 |    "codemirror_mode": {
161 |     "name": "ipython",
162 |     "version": 3
163 |    },
164 |    "file_extension": ".py",
165 |    "mimetype": "text/x-python",
166 |    "name": "python",
167 |    "nbconvert_exporter": "python",
168 |    "pygments_lexer": "ipython3",
169 |    "version": "3.5.2"
170 |   }
171 |  },
172 |  "nbformat": 4,
173 |  "nbformat_minor": 1
174 | }
175 | 


--------------------------------------------------------------------------------
/Algorithm/binary_search_1.py:
--------------------------------------------------------------------------------
 1 | #Binary search
 2 | 
 3 | alist = [2,4,6,8,10,14,21]
 4 | 
 5 | exp_val = 14
 6 | 
 7 | #hint1
 8 | low = 0 
 9 | high = len(alist) - 1
10 | mid = (low + high) // 2
11 | guess = alist[mid]
12 | 
13 | #hint2
14 | if guess < item:
15 |     low = mid + 1
16 | 
17 | #hint3: cannot solve
18 | def binarySearch(list, item):
19 |     low = 0
20 |     high = len(list) - 1
21 |     
22 |     while low <= high: # Key point
23 |         mid = (low + high) // 2
24 |         guess = list[mid]
25 |         if guess == item:
26 |             return mid
27 |         elif guess > item:
28 |             high = mid - 1
29 |         else:
30 |             low = mid + 1
31 |     return None
32 |         
33 | binarySearch(alist, exp_val)
34 |         
35 |     
36 |     
37 | #my code   
38 | i = 0
39 | while i > 100:
40 |     i += 1
41 |     if guess < exp_val:
42 |         mid = (mid + high) // 2
43 |         guess = alist[mid]
44 |     elif guess > exp_val:
45 |         mid = (mid + low) // 2
46 |         guess = alist[mid]
47 |     else:
48 |         print("Value Location {}, Value {}".format(mid, guess))
49 |         break
50 | 
51 |     
52 |     
53 | 


--------------------------------------------------------------------------------
/Algorithm/euler_prob1.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rainmaker712/nlp_ryan/108ce890659ed29d4a143e41e5546f613aa878ca/Algorithm/euler_prob1.py


--------------------------------------------------------------------------------
/Algorithm/graph_algo.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rainmaker712/nlp_ryan/108ce890659ed29d4a143e41e5546f613aa878ca/Algorithm/graph_algo.py


--------------------------------------------------------------------------------
/Algorithm/selection_sort_2.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # -*- coding: utf-8 -*-
3 | """
4 | Created on Sun Jun 25 22:49:03 2017
5 | 
6 | @author: ryan
7 | """
8 | 
9 | 


--------------------------------------------------------------------------------
/Chatbot/.ipynb_checkpoints/seq2seq-checkpoint.ipynb:
--------------------------------------------------------------------------------
1 | {
2 |  "cells": [],
3 |  "metadata": {},
4 |  "nbformat": 4,
5 |  "nbformat_minor": 2
6 | }
7 | 


--------------------------------------------------------------------------------
/Chatbot/Slack_Bot/.Rhistory:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rainmaker712/nlp_ryan/108ce890659ed29d4a143e41e5546f613aa878ca/Chatbot/Slack_Bot/.Rhistory


--------------------------------------------------------------------------------
/Chatbot/Slack_Bot/__init__.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # -*- coding: utf-8 -*-


--------------------------------------------------------------------------------
/Chatbot/Slack_Bot/__pycache__/mcbot_chat.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rainmaker712/nlp_ryan/108ce890659ed29d4a143e41e5546f613aa878ca/Chatbot/Slack_Bot/__pycache__/mcbot_chat.cpython-35.pyc


--------------------------------------------------------------------------------
/Chatbot/Slack_Bot/data/desktop.ini:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rainmaker712/nlp_ryan/108ce890659ed29d4a143e41e5546f613aa878ca/Chatbot/Slack_Bot/data/desktop.ini


--------------------------------------------------------------------------------
/Chatbot/Slack_Bot/lstm_bot.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding: utf-8 -*-
  3 | """
  4 | Created on Sun Jun 25 20:02:25 2017
  5 | 소설 사이트 다운로드:http://blog.naver.com/PostView.nhn?blogId=dmswjd5366&logNo=220010721513
  6 | @author: ryan
  7 | """
  8 | 
  9 | import codecs
 10 | from bs4 import BeautifulSoup
 11 | from keras.models import Sequential
 12 | from keras.layers import Dense, Activation, Dropout
 13 | from keras.layers import LSTM
 14 | from keras.optimizers import RMSprop
 15 | from keras.utils.data_utils import get_file
 16 | import numpy as np
 17 | import random, sys
 18 | 
 19 | import os
 20 | import pandas as pd
 21 | import json
 22 | 
 23 | #import chardet
 24 | 
 25 | #data = pd.read_csv("/home/ryan/nlp_ryan/Chatbot/Slack_Bot/data/toji1.txt", "r", encoding="utf-8")
 26 | fp = codecs.open("/home/ryan/nlp_ryan/Chatbot/Slack_Bot/data/toji1.txt", "r", encoding="utf-8")
 27 | #soup = BeautifulSoup(fp, "html.parser")
 28 | #body = soup.select_one("body")
 29 | #text = body.getText() + " "
 30 | 
 31 | print('코퍼스의 길이: ', len(dic))
 32 | # 문자를 하나하나 읽어 들이고 ID 붙이기
 33 | chars = sorted(list(set(text)))
 34 | print('사용되고 있는 문자의 수:', len(chars))
 35 | char_indices = dict((c, i) for i, c in enumerate(chars)) # 문자 → ID
 36 | indices_char = dict((i, c) for i, c in enumerate(chars)) # ID → 문자
 37 | # 텍스트를 maxlen개의 문자로 자르고 다음에 오는 문자 등록하기
 38 | maxlen = 20
 39 | step = 3
 40 | sentences = []
 41 | next_chars = []
 42 | for i in range(0, len(text) - maxlen, step):
 43 |     sentences.append(text[i: i + maxlen])
 44 |     next_chars.append(text[i + maxlen])
 45 | print('학습할 구문의 수:', len(sentences))
 46 | print('텍스트를 ID 벡터로 변환합니다...')
 47 | X = np.zeros((len(sentences), maxlen, len(chars)), dtype=np.bool)
 48 | y = np.zeros((len(sentences), len(chars)), dtype=np.bool)
 49 | for i, sentence in enumerate(sentences):
 50 |     for t, char in enumerate(sentence):
 51 |         X[i, t, char_indices[char]] = 1
 52 |     y[i, char_indices[next_chars[i]]] = 1
 53 | # 모델 구축하기(LSTM)
 54 | print('모델을 구축합니다...')
 55 | model = Sequential()
 56 | model.add(LSTM(128, input_shape=(maxlen, len(chars))))
 57 | model.add(Dense(len(chars)))
 58 | model.add(Activation('softmax'))
 59 | optimizer = RMSprop(lr=0.01)
 60 | model.compile(loss='categorical_crossentropy', optimizer=optimizer)
 61 | # 후보를 배열에서 꺼내기
 62 | def sample(preds, temperature=1.0):
 63 |     preds = np.asarray(preds).astype('float64')
 64 |     preds = np.log(preds) / temperature
 65 |     exp_preds = np.exp(preds)
 66 |     preds = exp_preds / np.sum(exp_preds)
 67 |     probas = np.random.multinomial(1, preds, 1)
 68 |     return np.argmax(probas)
 69 | # 학습시키고 텍스트 생성하기 반복
 70 | for iteration in range(1, 60):
 71 |     print()
 72 |     print('-' * 50)
 73 |     print('반복 =', iteration)
 74 |     model.fit(X, y, batch_size=128, nb_epoch=1) # 
 75 |     # 임의의 시작 텍스트 선택하기
 76 |     start_index = random.randint(0, len(text) - maxlen - 1)
 77 |     # 다양한 다양성의 문장 생성
 78 |     for diversity in [0.2, 0.5, 1.0, 1.2]:
 79 |         print()
 80 |         print('--- 다양성 = ', diversity)
 81 |         generated = ''
 82 |         sentence = text[start_index: start_index + maxlen]
 83 |         generated += sentence
 84 |         print('--- 시드 = "' + sentence + '"')
 85 |         sys.stdout.write(generated)
 86 |         # 시드를 기반으로 텍스트 자동 생성
 87 |         for i in range(400):
 88 |             x = np.zeros((1, maxlen, len(chars)))
 89 |             for t, char in enumerate(sentence):
 90 |                 x[0, t, char_indices[char]] = 1.
 91 |             # 다음에 올 문자를 예측하기
 92 |             preds = model.predict(x, verbose=0)[0]
 93 |             next_index = sample(preds, diversity)
 94 |             next_char = indices_char[next_index]
 95 |             # 출력하기
 96 |             generated += next_char
 97 |             sentence = sentence[1:] + next_char
 98 |             sys.stdout.write(next_char)
 99 |             sys.stdout.flush()
100 |         print()


--------------------------------------------------------------------------------
/Chatbot/Slack_Bot/markov_chain_bot.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding: utf-8 -*-
  3 | """
  4 | Created on Sun Jun 25 20:02:25 2017
  5 | 
  6 | @author: ryan
  7 | """
  8 | 
  9 | import codecs
 10 | from bs4 import BeautifulSoup
 11 | from konlpy.tag import Twitter
 12 | import urllib.request
 13 | import os, re, json, random
 14 | # 마르코프 체인 딕셔너리 만들기 --- (※1)
 15 | def make_dic(words):
 16 |     tmp = ["@"]
 17 |     dic = {}
 18 |     for word in words:
 19 |         tmp.append(word)
 20 |         if len(tmp) < 3: continue
 21 |         if len(tmp) > 3: tmp = tmp[1:]
 22 |         set_word3(dic, tmp)
 23 |         if word == ".":
 24 |             tmp = ["@"]
 25 |             continue
 26 |     return dic
 27 | # 딕셔너리에 데이터 등록하기 --- (※2)
 28 | def set_word3(dic, s3):
 29 |     w1, w2, w3 = s3
 30 |     if not w1 in dic: dic[w1] = {}
 31 |     if not w2 in dic[w1]: dic[w1][w2] = {}
 32 |     if not w3 in dic[w1][w2]: dic[w1][w2][w3] = 0
 33 |     dic[w1][w2][w3] += 1
 34 | 
 35 | # 문장 만들기 --- (※3)
 36 | def make_sentence(dic):
 37 |     ret = []
 38 |     if not "@" in dic: return "no dic" 
 39 |     top = dic["@"]
 40 |     w1 = word_choice(top)
 41 |     w2 = word_choice(top[w1])
 42 |     ret.append(w1)
 43 |     ret.append(w2)
 44 |     while True:
 45 |         w3 = word_choice(dic[w1][w2])
 46 |         ret.append(w3)
 47 |         if w3 == ".": break
 48 |         w1, w2 = w2, w3
 49 |     ret = "".join(ret)
 50 |     # 띄어쓰기
 51 |     params = urllib.parse.urlencode({
 52 |         "_callback": "",
 53 |         "q": ret
 54 |     })
 55 |     # 네이버 맞춤법 검사기를 사용합니다.
 56 |     data = urllib.request.urlopen("https://m.search.naver.com/p/csearch/dcontent/spellchecker.nhn?" + params)
 57 |     data = data.read().decode("utf-8")[1:-2]
 58 |     data = json.loads(data)
 59 |     data = data["message"]["result"]["html"]
 60 |     #data = soup = BeautifulSoup(data, "html.parser").getText()
 61 |     data = BeautifulSoup(data, "html.parser").getText()
 62 |     
 63 |     # 리턴
 64 |     return data
 65 | 
 66 | def word_choice(sel):
 67 |     keys = sel.keys()
 68 |     return random.choice(list(keys))
 69 | 
 70 | # 문장 읽어 들이기 --- (※4)
 71 | toji_file = "toji.txt"
 72 | dict_file = "/home/ryan/nlp_ryan/Chatbot/Slack_Bot/markov-toji.json"
 73 | 
 74 | if not os.path.exists(dict_file):
 75 |     # 토지 텍스트 파일 읽어 들이기
 76 |     fp = codecs.open("BEXX0003.txt", "r", encoding="utf-16")
 77 |     soup = BeautifulSoup(fp, "html.parser")
 78 |     body = soup.select_one("body > text")
 79 |     text = body.getText()
 80 |     text = text.replace("…", "") # 현재 koNLPy가 …을 구두점으로 잡지 못하는 문제 임시 해결
 81 |     # 형태소 분석
 82 |     twitter = Twitter()
 83 |     malist = twitter.pos(text, norm=True)
 84 |     words = []
 85 |     for word in malist:
 86 |         # 구두점 등은 대상에서 제외(단 마침표는 포함)
 87 |         if not word[1] in ["Punctuation"]:
 88 |             words.append(word[0])
 89 |         if word[0] == ".":
 90 |             words.append(word[0])
 91 |     # 딕셔너리 생성
 92 |     dic = make_dic(words)
 93 |     json.dump(dic, open(dict_file,"w", encoding="utf-8"))
 94 | else:
 95 |     dic = json.load(open(dict_file,"r"))
 96 | # 문장 만들기 --- (※6)
 97 | 
 98 | 
 99 | 
100 | for i in range(3):
101 |     s = make_sentence(dic)
102 |     print(s)
103 |     print("---")


--------------------------------------------------------------------------------
/Chatbot/Slack_Bot/mcbot_chat.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding: utf-8 -*-
 3 | """
 4 | Created on Sun Jun 25 20:02:25 2017
 5 | 
 6 | @author: ryan
 7 | """
 8 | 
 9 | import codecs
10 | from bs4 import BeautifulSoup
11 | from konlpy.tag import Twitter
12 | import urllib.request
13 | import os, re, json, random
14 | 
15 | # 문장 읽어 들이기 --- (※4)
16 | dict_file = "/home/ryan/nlp_ryan/Chatbot/Slack_Bot/markov-toji.json"
17 | dic = json.load(open(dict_file,"r"))
18 | 
19 | # 문장 만들기 --- (※3)
20 | def make_sentence(dic):
21 |     ret = []
22 |     if not "@" in dic: return "no dic" 
23 |     top = dic["@"]
24 |     w1 = word_choice(top)
25 |     w2 = word_choice(top[w1])
26 |     ret.append(w1)
27 |     ret.append(w2)
28 |     while True:
29 |         w3 = word_choice(dic[w1][w2])
30 |         ret.append(w3)
31 |         if w3 == ".": break
32 |         w1, w2 = w2, w3
33 |     ret = "".join(ret)
34 |     # 띄어쓰기
35 |     params = urllib.parse.urlencode({
36 |         "_callback": "",
37 |         "q": ret
38 |     })
39 |     # 네이버 맞춤법 검사기를 사용합니다.
40 |     data = urllib.request.urlopen("https://m.search.naver.com/p/csearch/dcontent/spellchecker.nhn?" + params)
41 |     data = data.read().decode("utf-8")[1:-2]
42 |     data = json.loads(data)
43 |     data = data["message"]["result"]["html"]
44 |     #data = soup = BeautifulSoup(data, "html.parser").getText()
45 |     data = BeautifulSoup(data, "html.parser").getText()
46 |     
47 |     # 리턴
48 |     return data
49 | 
50 | def word_choice(sel):
51 |     keys = sel.keys()
52 |     return random.choice(list(keys))


--------------------------------------------------------------------------------
/Chatbot/Slack_Bot/modubot.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding: utf-8 -*-
  3 | """
  4 | Created on Sun Jun 25 15:35:47 2017
  5 | 
  6 | modu-deepnlp
  7 | modubot
  8 | 
  9 | http://www.usefulparadigm.com/2016/04/06/creating-a-slack-bot-with-aws-lambda-and-api-gateway/
 10 | https://www.fullstackpython.com/blog/build-first-slack-bot-python.html
 11 | 
 12 | @author: ryan
 13 | https://hooks.slack.com/services/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
 14 | 
 15 | """
 16 | 
 17 | import sys
 18 | sys.path.append('/home/ryan/nlp_ryan/Chatbot/Slack_Bot')
 19 | from mcbot_chat import make_sentence
 20 | import os, re, json, random
 21 | 
 22 | dict_file = "/home/ryan/nlp_ryan/Chatbot/Slack_Bot/markov-toji.json"
 23 | dic = json.load(open(dict_file,"r"))
 24 | 
 25 | import os
 26 | import time
 27 | from slackclient import SlackClient
 28 | import random
 29 | 
 30 | #Bot ID & Token
 31 | #slack_client.api_call("api.test")
 32 | BOT_NAME = 'modubot'
 33 | BOT_ID = 'U5Z492W0J'
 34 | slack_token = 'your token'
 35 | 
 36 | #export BOT_NAME='modubot'
 37 | #export slack_token='xoxb-203145098018-UFRw9AIzGDiZcuc4aSF1kFdl'
 38 | 
 39 | # instantiate Slack & Twilio clients
 40 | slack_client = SlackClient(slack_token)
 41 | 
 42 | #Check if everything is alright
 43 | is_ok = slack_client.api_call("users.list").get('ok')
 44 | 
 45 | # find the id of our slack bot
 46 | if(is_ok):
 47 |     for user in slack_client.api_call("users.list").get('members'):
 48 |         if user.get('name') == BOT_ID:
 49 |             print(user.get('id'))
 50 | 
 51 | # how the bot is mentioned on slack
 52 | def get_mention(user):
 53 |     return '<@{user}>'.format(user=user)
 54 | 
 55 | slack_mention = get_mention(BOT_ID)
 56 |            
 57 | #Start Chatbot
 58 | SOCKET_DELAY = 1
 59 | 
 60 | def is_private(event):
 61 |     """Checks if private slack channel"""
 62 |     return event.get('channel').startswith('D')
 63 | 
 64 | def is_for_me(event):
 65 |     #chekc if not my own event
 66 |     type = event.get('type')
 67 |     if type and type == 'message' and not(event.get('user')==BOT_ID):
 68 |             #in case it is a private message
 69 |             if is_private(event):
 70 |                 return True
 71 |             #in case it is not a private
 72 |             text = event.get('text')
 73 |             channel = event.get('channel')
 74 |             if slack_mention in text.strip().split():
 75 |                 return True
 76 |                 
 77 | def post_message(message, channel):
 78 |     slack_client.api_call('chat.postMessage', channel=channel,
 79 |                           text=message, as_user=True)
 80 | 
 81 | import nltk
 82 |     
 83 | def is_hi(message):
 84 |     tokens = [word.lower() for word in message.strip().split()]
 85 |     return any(g in tokens
 86 |                for g in ['안녕', '안녕하세요', '테스트'])
 87 | 
 88 | def is_bye(message):
 89 |     tokens = [word.lower() for word in message.strip().split()]
 90 |     return any(g in tokens
 91 |                for g in ['bye', 'goodbye', 'revoir', 'adios', 'later', 'cya'])
 92 | 
 93 | def say_hi(user_mention):
 94 |     """Say Hi to a user by formatting their mention"""
 95 |     response_template = random.choice([make_sentence(dic)])
 96 |     return response_template.format(mention=user_mention)
 97 | 
 98 | def say_bye(user_mention):
 99 |     """Say Goodbye to a user"""
100 |     response_template = random.choice(['see you later, alligator...',
101 |                                        'adios amigo',
102 |                                        'Bye {mention}!',
103 |                                        'Au revoir!'])
104 |     return response_template.format(mention=user_mention)
105 | 
106 |     
107 | def handle_message(message, user, channel):
108 |     if is_hi(message):
109 |         user_mention = get_mention(user)
110 |         post_message(message=say_hi(user_mention), channel=channel)
111 |     elif is_bye(message):
112 |         user_mention = get_mention(user)
113 |         post_message(message=say_bye(user_mention), channel=channel)
114 |     
115 | def run():
116 |     if slack_client.rtm_connect():
117 |         print('[.] modubot is ON...')
118 |         while True:
119 |             event_list = slack_client.rtm_read()
120 |             if len(event_list) > 0:
121 |                 for event in event_list:
122 |                     print(event)
123 |                     if is_for_me(event):
124 |                         handle_message(message=event.get('text'), user=event.get('user'), channel=event.get('channel'))
125 |             time.sleep(SOCKET_DELAY)
126 |     else:
127 |         print('[!] Connection to Slack failed.')
128 |         
129 | if __name__=='__main__':
130 |     run()
131 | 


--------------------------------------------------------------------------------
/Chatbot/Slack_Bot/print_bot_id.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding: utf-8 -*-
 3 | """
 4 | Created on Sun Jun 25 15:35:47 2017
 5 | 
 6 | modu-deepnlp
 7 | modubot
 8 | @author: ryan
 9 | https://hooks.slack.com/services/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
10 | 
11 | """
12 | 
13 | import os
14 | from slackclient import SlackClient
15 | 
16 | token = 'your token'
17 | slack_client = SlackClient(token)
18 | #slack_client = SlackClient(os.environ.get('SLACK_BOT_TOKEN'))
19 | print(slack_client.api_call("api.test"))
20 | print(slack_client.api_call("api.test"))
21 | 
22 | if __name__ == "__main__":
23 |     api_call = slack_client.api_call("users.list")
24 |     if api_call.get('ok'):
25 |         # retrieve all users so we can find our bot
26 |         users = api_call.get('members')
27 |         for user in users:
28 |             if 'name' in user and user.get('name') == BOT_NAME:
29 |                 print("Bot ID for '" + user['name'] + "' is " + user.get('id'))
30 |     else:
31 |         print("could not find bot user with the name " + BOT_NAME)
32 | 
33 |         
34 |     
35 |                 


--------------------------------------------------------------------------------
/Chatbot/Slack_Bot/toji.model:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rainmaker712/nlp_ryan/108ce890659ed29d4a143e41e5546f613aa878ca/Chatbot/Slack_Bot/toji.model


--------------------------------------------------------------------------------
/Chatbot/__init__.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # -*- coding: utf-8 -*-
3 | """
4 | Created on Thu Aug 10 16:42:07 2017
5 | 
6 | @author: naver
7 | """
8 | 
9 | 


--------------------------------------------------------------------------------
/Chatbot/__pycache__/helpers.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rainmaker712/nlp_ryan/108ce890659ed29d4a143e41e5546f613aa878ca/Chatbot/__pycache__/helpers.cpython-36.pyc


--------------------------------------------------------------------------------
/Chatbot/helpers.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding: utf-8 -*-
 3 | """
 4 | Created on Thu Aug 10 16:38:16 2017
 5 | 
 6 | @author: naver
 7 | """
 8 | 
 9 | import numpy as np
10 | 
11 | def batch(inputs, max_sequence_length=None):
12 |     """
13 |     Args:
14 |         inputs:
15 |             list of sentences (integer lists)
16 |         max_sequence_length:
17 |             integer specifying how large should `max_time` dimension be.
18 |             If None, maximum sequence length would be used
19 |     
20 |     Outputs:
21 |         inputs_time_major:
22 |             input sentences transformed into time-major matrix 
23 |             (shape [max_time, batch_size]) padded with 0s
24 |         sequence_lengths:
25 |             batch-sized list of integers specifying amount of active 
26 |             time steps in each input sequence
27 |     """
28 |     
29 |     sequence_lengths = [len(seq) for seq in inputs]
30 |     batch_size = len(inputs)
31 |     
32 |     if max_sequence_length is None:
33 |         max_sequence_length = max(sequence_lengths)
34 |     
35 |     inputs_batch_major = np.zeros(shape=[batch_size, max_sequence_length], dtype=np.int32) # == PAD
36 |     
37 |     for i, seq in enumerate(inputs):
38 |         for j, element in enumerate(seq):
39 |             inputs_batch_major[i, j] = element
40 | 
41 |     # [batch_size, max_time] -> [max_time, batch_size]
42 |     inputs_time_major = inputs_batch_major.swapaxes(0, 1)
43 | 
44 |     return inputs_time_major, sequence_lengths
45 | 
46 | 
47 | def random_sequences(length_from, length_to,
48 |                      vocab_lower, vocab_upper,
49 |                      batch_size):
50 |     """ Generates batches of random integer sequences,
51 |         sequence length in [length_from, length_to],
52 |         vocabulary in [vocab_lower, vocab_upper]
53 |     """
54 |     if length_from > length_to:
55 |             raise ValueError('length_from > length_to')
56 | 
57 |     def random_length():
58 |         if length_from == length_to:
59 |             return length_from
60 |         return np.random.randint(length_from, length_to + 1)
61 |     
62 |     while True:
63 |         yield [
64 |             np.random.randint(low=vocab_lower,
65 |                               high=vocab_upper,
66 |                               size=random_length()).tolist()
67 |             for _ in range(batch_size)
68 |         ]


--------------------------------------------------------------------------------
/Chatbot/seq2seq.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 5,
  6 |    "metadata": {
  7 |     "collapsed": true
  8 |    },
  9 |    "outputs": [],
 10 |    "source": [
 11 |     "import numpy as np\n",
 12 |     "\n",
 13 |     "def batch(inputs, max_sequence_length=None):\n",
 14 |     "    \"\"\"\n",
 15 |     "    Args:\n",
 16 |     "        inputs:\n",
 17 |     "            list of sentences (integer lists)\n",
 18 |     "        max_sequence_length:\n",
 19 |     "            integer specifying how large should `max_time` dimension be.\n",
 20 |     "            If None, maximum sequence length would be used\n",
 21 |     "    \n",
 22 |     "    Outputs:\n",
 23 |     "        inputs_time_major:\n",
 24 |     "            input sentences transformed into time-major matrix \n",
 25 |     "            (shape [max_time, batch_size]) padded with 0s\n",
 26 |     "        sequence_lengths:\n",
 27 |     "            batch-sized list of integers specifying amount of active \n",
 28 |     "            time steps in each input sequence\n",
 29 |     "    \"\"\"\n",
 30 |     "    \n",
 31 |     "    sequence_lengths = [len(seq) for seq in inputs]\n",
 32 |     "    batch_size = len(inputs)\n",
 33 |     "    \n",
 34 |     "    if max_sequence_length is None:\n",
 35 |     "        max_sequence_length = max(sequence_lengths)\n",
 36 |     "    \n",
 37 |     "    inputs_batch_major = np.zeros(shape=[batch_size, max_sequence_length], dtype=np.int32) # == PAD\n",
 38 |     "    \n",
 39 |     "    for i, seq in enumerate(inputs):\n",
 40 |     "        for j, element in enumerate(seq):\n",
 41 |     "            inputs_batch_major[i, j] = element\n",
 42 |     "\n",
 43 |     "    # [batch_size, max_time] -> [max_time, batch_size]\n",
 44 |     "    inputs_time_major = inputs_batch_major.swapaxes(0, 1)\n",
 45 |     "\n",
 46 |     "    return inputs_time_major, sequence_lengths\n",
 47 |     "\n",
 48 |     "\n",
 49 |     "def random_sequences(length_from, length_to,\n",
 50 |     "                     vocab_lower, vocab_upper,\n",
 51 |     "                     batch_size):\n",
 52 |     "    \"\"\" Generates batches of random integer sequences,\n",
 53 |     "        sequence length in [length_from, length_to],\n",
 54 |     "        vocabulary in [vocab_lower, vocab_upper]\n",
 55 |     "    \"\"\"\n",
 56 |     "    if length_from > length_to:\n",
 57 |     "            raise ValueError('length_from > length_to')\n",
 58 |     "\n",
 59 |     "    def random_length():\n",
 60 |     "        if length_from == length_to:\n",
 61 |     "            return length_from\n",
 62 |     "        return np.random.randint(length_from, length_to + 1)\n",
 63 |     "    \n",
 64 |     "    while True:\n",
 65 |     "        yield [\n",
 66 |     "            np.random.randint(low=vocab_lower,\n",
 67 |     "                              high=vocab_upper,\n",
 68 |     "                              size=random_length()).tolist()\n",
 69 |     "            for _ in range(batch_size)\n",
 70 |     "        ]"
 71 |    ]
 72 |   },
 73 |   {
 74 |    "cell_type": "code",
 75 |    "execution_count": 6,
 76 |    "metadata": {},
 77 |    "outputs": [],
 78 |    "source": [
 79 |     "x = [[5, 7, 8], [6, 3], [3], [1]]\n",
 80 |     "\n",
 81 |     "xt, xlen = batch(x)"
 82 |    ]
 83 |   }
 84 |  ],
 85 |  "metadata": {
 86 |   "kernelspec": {
 87 |    "display_name": "Python 3",
 88 |    "language": "python",
 89 |    "name": "python3"
 90 |   },
 91 |   "language_info": {
 92 |    "codemirror_mode": {
 93 |     "name": "ipython",
 94 |     "version": 3
 95 |    },
 96 |    "file_extension": ".py",
 97 |    "mimetype": "text/x-python",
 98 |    "name": "python",
 99 |    "nbconvert_exporter": "python",
100 |    "pygments_lexer": "ipython3",
101 |    "version": "3.6.1"
102 |   }
103 |  },
104 |  "nbformat": 4,
105 |  "nbformat_minor": 2
106 | }
107 | 


--------------------------------------------------------------------------------
/Dataset/dataset.md:
--------------------------------------------------------------------------------
 1 | Premade Datasets
 2 | 1. http://research.microsoft.com/en-us/um/redmond/projects/mctest/index.html
 3 | MCTest is a freely available set of 660 stories and associated questions intended for research on the machine comprehension of text.
 4 | 2. http://www.gutenberg.org/wiki/Gutenberg:Offline_Catalogs
 5 | Gutenberge has a lot of books
 6 | 3. https://catalog.ldc.upenn.edu/LDC2006T13
 7 | Web 1T 5-gram Version 1, contributed by Google Inc., contains English word n-grams and their observed frequency counts. 
 8 | The length of the n-grams ranges from unigrams (single words) to five-grams. 
 9 | This data is expected to be useful for statistical language modeling, e.g., for machine translation or speech recognition, etc.
10 | 4. http://www.iesl.cs.umass.edu/data
11 | A lot of datasets
12 | 5. http://webdatacommons.org/webtables/
13 | A subset of the HTML tables on the Web contains relational data which can be useful for various applications. 
14 | The Web Data Commons project has extracted two large corpora of relational Web tables from the Common Crawl and offers them for public download. 
15 | This page provides an overview of the corpora as well as their use cases.
16 | 6. http://statmt.org/ngrams/
17 | Unpruend Unpruned 5-gram counts and language models trained on 9 billion web pages -- Large amounts of raw data in many languages 
18 | 7. https://en.wikipedia.org/wiki/Wikipedia:Database_download
19 | Wikipedia Database Download
20 | 8. https://aws.amazon.com/ko/datasets/google-books-ngrams/
21 | A data set containing Google Books n-gram corpora.
22 | 9. https://aws.amazon.com/ko/public-datasets/common-crawl/
23 | The Common Crawl corpus includes web crawl data collected over 8 years. 
24 | Common Crawl offers the largest, most comprehensive, open repository of web crawl data on the cloud.
25 | 10. http://commoncrawl.org/the-data/tutorials/
26 | 착한 아이들 ㅋㅋ
27 | 11. https://wikireverse.org/data
28 | The full dataset of 36 million links can be downloaded as a torrent. 
29 | The download is a tarball containing 4 tab-delimited files. 
30 | The data is 1.1 GB when compressed and 5.4 GB when extracted.
31 | 12. https://www.cs.cornell.edu/~cristian/Cornell_Movie-Dialogs_Corpus.html
32 | This corpus contains a large metadata-rich collection of fictional conversations extracted from raw movie scripts.
33 | 13. https://www.uow.edu.au/~dlee/corpora.htm
34 | several dozens of english corpus
35 | 14. http://research.google.com/research-outreach.html#/research-outreach/research-datasets
36 | Google Datasets
37 | 15. http://www.cs.cornell.edu/home/llee/data/
38 | Collection of Cornell Datasets
39 | 16. https://github.com/rkadlec/ubuntu-ranking-dataset-creator
40 | Ubuntu Dialogue Datasets
41 | 17. http://ebiquity.umbc.edu/resource/html/id/351
42 | The UMBC webBase corpus (http://ebiq.org/r/351) is a dataset containing a collection of English paragraphs with over three billion words 
43 | processed from the February 2007 crawl from the Stanford WebBase project (http://bit.ly/WebBase). Compressed, it is about 13GB in size.
44 | 
45 | 
46 | 
47 | Movie Subtitles Datasets (BE AWARE OF COPYRIGHTS!!!)
48 | 
49 | http://www.opensubtitles.org/en/search
50 | https://subscene.com/
51 | http://www.moviesubtitles.org/
52 | http://www.divxsubtitles.net/
53 | http://www.subs4free.com/
54 | 
55 | https://videoconverter.iskysoft.com/video-tips/download-subtitles.html (15 Best Subtitle Software and Top 10 Subtitle Download Sites)
56 | 
57 | 
58 | 
59 | Q&A Datasets
60 | https://www.researchgate.net/post/What_are_the_datasets_available_for_question_answering_system
61 | https://archive.org/details/stackexchange
62 | https://rajpurkar.github.io/SQuAD-explorer/
63 | https://www.quora.com/Datasets-How-can-I-get-corpus-of-a-question-answering-website-like-Quora-or-Yahoo-Answers-or-Stack-Overflow-for-analyzing-answer-quality
64 | http://jmcauley.ucsd.edu/data/amazon/qa/
65 | 
66 | 
67 | A lot of Datasets
68 | https://www.reddit.com/r/datasets/comments/3bxlg7/i_have_every_publicly_available_reddit_comment/
69 | https://github.com/caesar0301/awesome-public-datasets#natural-language
70 | 
71 | 
72 | Miscellaneous
73 | https://github.com/deepmind/rc-data
74 | http://u.cs.biu.ac.il/~koppel/BlogCorpus.htm
75 | http://wiki.dbpedia.org/Downloads2015-10
76 | https://aws.amazon.com/ko/datasets/google-books-ngrams/


--------------------------------------------------------------------------------
/HTML/code_academy.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html>
 3 | <head>
 4 |   <title>Ship To It - Company Packing List</title>
 5 |   <link href="https://fonts.googleapis.com/css?family=Lato: 100,300,400,700|Luckiest+Guy|Oxygen:300,400" rel="stylesheet">
 6 |   <link href="style.css" type="text/css" rel="stylesheet">
 7 | </head>
 8 | <body>
 9 | 
10 |   <ul class="navigation">
11 |     <li><img src="https://s3.amazonaws.com/codecademy-content/courses/web-101/unit-9/htmlcss1-img_logo-shiptoit.png" height="20px;"></li>
12 |     <li class="active">Action List</li>
13 |     <li>Profiles</li>
14 |     <li>Settings</li>
15 |   </ul>
16 | 
17 |   <div class="search">Search the table</div>
18 | 
19 |   <table>
20 |     <thead>
21 |       <tr>
22 |         <th>Company Name</th>
23 |         <th>Number of Items to Ship</th>
24 |         <th>Next Action</th>
25 |       </tr>
26 |     </thead>
27 |     <tbody>
28 |       <tr>
29 |         <th>Adam's Greenworks</th>
30 |         <td>14</td>
31 |         <td>Package Items</td>
32 |       </tr>
33 |       <tr>
34 |         <th>Davie's Burgers</th>
35 |         <td>2</td>
36 |         <td>Send Invoice</td>
37 |       </tr>
38 |       <tr>
39 |         <th>Baker's Bike Shop</th>
40 |         <td>3</td>
41 |         <td>Send Invoice</td>
42 |       </tr>
43 |       <tr>
44 |         <th>Miss Sally's Southern</th>
45 |         <td>4</td>
46 |         <td>Ship</td>
47 |       </tr>
48 |       <tr>
49 |         <th>Summit Resort Rentals</th>
50 |         <td>4</td>
51 |         <td>Ship</td>
52 |       </tr>
53 |       <tr>
54 |         <th>Strike Fitness</th>
55 |         <td>1</td>
56 |         <td>Enter Order</td>
57 |       </tr>
58 |       </tbody>
59 |   </table>
60 | 
61 | </body>
62 | </html>


--------------------------------------------------------------------------------
/Keras_Basic/Keras_Cheat_Sheet_Python.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rainmaker712/nlp_ryan/108ce890659ed29d4a143e41e5546f613aa878ca/Keras_Basic/Keras_Cheat_Sheet_Python.pdf


--------------------------------------------------------------------------------
/Keras_Basic/Keras_basic_fin.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding: utf-8 -*-
 3 | """
 4 | Created on Sun Apr 30 15:20:50 2017
 5 | 
 6 | @author: ryan
 7 | 
 8 | Most of infomation from DataCamp Keras Course
 9 | https://www.datacamp.com/community/blog/new-course-deep-learning-in-python-first-keras-2-0-online-course#gs.8RUVmWM
10 | 
11 | """
12 | 
13 | # Import necessary modules
14 | #import keras
15 | from keras.layers import Dense
16 | from keras.models import Sequential
17 | from keras.datasets import boston_housing
18 | from keras.wrappers.scikit_learn import KerasRegressor
19 | from sklearn.model_selection import cross_val_score
20 | from sklearn.model_selection import KFold
21 | import numpy as np
22 | 
23 | (x_train, y_train), (x_test, y_test) = boston_housing.load_data()
24 | 
25 | print(x_train.shape, y_train.shape) #(404, 13) / (404,)
26 | 
27 | # Save the number of columns in training set: n_cols
28 | n_cols = x_train.shape[1]
29 | 
30 | #Define Model for boston data
31 | 
32 | # Set up the model: model
33 | model = Sequential()
34 | model.add(Dense(13, activation='relu', input_shape=(n_cols,), kernel_initializer = 'normal'))
35 | # Add the output layer
36 | model.add(Dense(1, kernel_initializer='normal'))
37 | #Complile model 일반적으로 Adam을 추천 (CS231 강의에서도 잘 모르겠으면 Adam 사용 추천)
38 | model.compile(optimizer='adam', loss='mean_squared_error')
39 | 
40 | # Verify that model contains information from compiling
41 | print("Loss function: " + model.loss)
42 | 
43 | """
44 | 모델 학습 / 구조 확인 및 시각화
45 | """
46 | model.summary() #모델의 구조를 확인
47 | # Fit the model
48 | history = model.fit(x_train, y_train, epochs=100)
49 | # Test the model
50 | '''Predictions'''
51 | # Calculate predictions: predictions
52 | score = model.evaluate(x_test, y_test)
53 | 
54 | # list all data in history
55 | print(history.history.keys())
56 | 
57 | #Loss 시각화
58 | import matplotlib.pyplot as plt
59 | 
60 | plt.plot(history.history['loss'])
61 | plt.title('model loss')
62 | plt.ylabel('loss')
63 | plt.xlabel('epoch')
64 | plt.legend(['train'], loc='upper left')
65 | plt.show()
66 | 
67 | 
68 | 


--------------------------------------------------------------------------------
/Keras_Basic/Keras_classification_basic.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding: utf-8 -*-
 3 | """
 4 | Created on Sun Apr 30 15:20:50 2017
 5 | 
 6 | @author: ryan
 7 | 
 8 | Most of infomation from DataCamp Keras Course
 9 | https://www.datacamp.com/community/blog/new-course-deep-learning-in-python-first-keras-2-0-online-course#gs.8RUVmWM
10 | 
11 | """
12 | 
13 | # Import necessary modules
14 | import keras
15 | from keras.layers import Dense
16 | from keras.models import Sequential
17 | from keras.utils import to_categorical
18 | 
19 | # Convert the target to categorical: target
20 | target = to_categorical(df.survived)
21 | 
22 | model = Sequential()
23 | model.add(Dense(32, activation='relu', input_shape=(n_cols,)))
24 | model.add(Dense(2, activation='softmax'))
25 | # Compile the model
26 | model.compile(optimizer='sgd', loss='categorical_crossentropy', metrics=['accuracy'])
27 | # Fit the model
28 | model.fit(predictors, target)
29 | 
30 | '''Predictions'''
31 | # Calculate predictions: predictions
32 | predictions = model.predict(pred_data)
33 | 
34 | # Calculate predicted probability of survival: predicted_prob_true
35 | predicted_prob_true = predictions[:,1]
36 | 
37 | # print predicted_prob_true
38 | print(predicted_prob_true)
39 | 
40 | '''
41 | Save and Load
42 | '''
43 | 
44 | from keras.models import load_model
45 | model.save('model_file.h5')
46 | my_model = load_model('my_model.h5')
47 | 
48 | 
49 | 
50 | 
51 | 


--------------------------------------------------------------------------------
/Keras_Basic/Keras_fine_tuning_basic.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding: utf-8 -*-
  3 | """
  4 | Created on Sun Apr 30 18:27:14 2017
  5 | 
  6 | @author: ryan
  7 | """
  8 | 
  9 | 
 10 | """
 11 | Model Optimization
 12 | 
 13 | 1. loss options
 14 | - mean_squared_error
 15 | - mean_squared_lograithmic_error
 16 | - mean_absolute_error
 17 | - mean_ablsolute_percentage_error
 18 | - binary_crossentropy
 19 | - categorical_crossentropy
 20 | 
 21 | 2. L1/L2 regeularization
 22 | 
 23 | from keras import regularizers
 24 | model.add(Dense(50, input_dim=100, activation="sigmoid", W_regularizer=regularizers.l2(0.01)))
 25 | 
 26 | 3. Dropout -> 마지막에 가중치 p를 곱하여 스케일링
 27 | 
 28 | model.add(Dropout(0.5))
 29 | model.compile(optimizer=SGD(0.5), loss='categorical_crossentropy', metrics=["acc"])
 30 | 
 31 | 4. Weight initialization
 32 | model.add(Dense(100, input_dim=10, activation="sigmoid", "init"=uniform))
 33 | 
 34 | 5. Softmax
 35 | 
 36 | model.Sequential()
 37 | model.add(Dense(15, input_dim=100, activation='sigmoid', init="global_uniform"))
 38 | model.add(Dense(10, activation='softmax', init='global_uniform"))
 39 | model.compile(optimizer=SGD(), loss='categorical_crossentropy', metrics=["accuracy"])
 40 | 
 41 | """
 42 | 
 43 | # Import the SGD optimizer
 44 | from keras.optimizers import SGD
 45 | 
 46 | # Create list of learning rates: lr_to_test
 47 | lr_to_test = [.000001, 0.01, 1]
 48 | 
 49 | # Loop over learning rates
 50 | for lr in lr_to_test:
 51 |     print('\n\nTesting model with learning rate: %f\n'%lr )
 52 |     
 53 |     # Build new model to test, unaffected by previous models
 54 |     model = get_new_model()
 55 |     
 56 |     # Create SGD optimizer with specified learning rate: my_optimizer
 57 |     my_optimizer = SGD(lr=lr)
 58 |     
 59 |     # Compile the model
 60 |     model.compile(optimizer = my_optimizer, loss = 'categorical_crossentropy')
 61 |     
 62 |     # Fit the model
 63 |     model.fit(predictors, target)
 64 |     
 65 |     
 66 | 
 67 |     
 68 |     
 69 |     
 70 |     
 71 | """
 72 | Model validation
 73 | 
 74 | 
 75 | model.fit(predictors, target, validation_split=0.3)
 76 | Early Stopping
 77 | stop traiing if validation is same (patient)
 78 | 
 79 | Experimentation
 80 | - Experiment with different architectures
 81 | - More layers
 82 | - Fewer layers
 83 | - Layers with more nodes
 84 | - Layers with fewer nodes
 85 | - Creating a great model requires experimentation
 86 | 
 87 | """"
 88 | #Validation Set
 89 | # Save the number of columns in predictors: n_cols
 90 | n_cols = predictors.shape[1]
 91 | input_shape = (n_cols,)
 92 | 
 93 | # Specify the model
 94 | model = Sequential()
 95 | model.add(Dense(100, activation='relu', input_shape = input_shape))
 96 | model.add(Dense(100, activation='relu'))
 97 | model.add(Dense(2, activation='softmax'))
 98 | 
 99 | # Compile the model
100 | model.compile(optimizer = 'adam', loss = 'categorical_crossentropy', metrics=['accuracy'])
101 | 
102 | # Fit the model
103 | hist = model.fit(predictors, target, validation_split=0.3)
104 | 
105 | """
106 | #Early Stopping
107 | """
108 | 
109 | # Import EarlyStopping
110 | from keras.callbacks import EarlyStopping
111 | 
112 | # Save the number of columns in predictors: n_cols
113 | n_cols = predictors.shape[1]
114 | input_shape = (n_cols,)
115 | 
116 | # Specify the model
117 | model = Sequential()
118 | model.add(Dense(100, activation='relu', input_shape = input_shape))
119 | model.add(Dense(100, activation='relu'))
120 | model.add(Dense(2, activation='softmax'))
121 | 
122 | # Compile the model
123 | model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
124 | 
125 | # Define early_stopping_monitor
126 | early_stopping_monitor = EarlyStopping(patience = 2)
127 | 
128 | # Fit the model
129 | model.fit(predictors, target, epochs=30, validation_split=0.3, callbacks = [early_stopping_monitor])
130 | 
131 | """
132 | ##Experimenting with wider networks
133 | 
134 | verbose=False / logging output, tell me everything
135 | 
136 | """
137 | # Define early_stopping_monitor
138 | early_stopping_monitor = EarlyStopping(patience=2)
139 | 
140 | # Create the new model: model_2
141 | model_2 = Sequential()
142 | 
143 | # Add the first and second layers
144 | model_2.add(Dense(100, activation="relu", input_shape=input_shape))
145 | model_2.add(Dense(100, activation="relu"))
146 | 
147 | # Add the output layer
148 | model_2.add(Dense(2, activation="softmax"))
149 | 
150 | # Compile model_2
151 | model_2.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
152 | 
153 | # Fit model_1
154 | model_1_training = model_1.fit(predictors, target, epochs=15, validation_split=0.2, callbacks=[early_stopping_monitor], verbose=False)
155 | 
156 | # Fit model_2
157 | model_2_training = model_2.fit(predictors, target, epochs=15, validation_split=0.2, callbacks=[early_stopping_monitor], verbose=False)
158 | 
159 | # Create the plot
160 | plt.plot(model_1_training.history['val_loss'], 'r', model_2_training.history['val_loss'], 'b')
161 | plt.xlabel('Epochs')
162 | plt.ylabel('Validation score')
163 | plt.show()
164 | 
165 | 
166 | 
167 | 


--------------------------------------------------------------------------------
/Keras_Basic/Keras_tutorial_text_generation.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": null,
  6 |    "metadata": {
  7 |     "collapsed": true
  8 |    },
  9 |    "outputs": [],
 10 |    "source": [
 11 |     "'''Example script to generate text from Nietzsche's writings.\n",
 12 |     "At least 20 epochs are required before the generated text\n",
 13 |     "starts sounding coherent.\n",
 14 |     "It is recommended to run this script on GPU, as recurrent\n",
 15 |     "networks are quite computationally intensive.\n",
 16 |     "If you try this script on new data, make sure your corpus\n",
 17 |     "has at least ~100k characters. ~1M is better.\n",
 18 |     "'''\n",
 19 |     "\n",
 20 |     "from __future__ import print_function\n",
 21 |     "from keras.models import Sequential\n",
 22 |     "from keras.layers import Dense, Activation\n",
 23 |     "from keras.layers import LSTM\n",
 24 |     "from keras.optimizers import RMSprop\n",
 25 |     "from keras.utils.data_utils import get_file\n",
 26 |     "import numpy as np\n",
 27 |     "import random\n",
 28 |     "import sys\n",
 29 |     "\n",
 30 |     "path = get_file('nietzsche.txt', origin='https://s3.amazonaws.com/text-datasets/nietzsche.txt')\n",
 31 |     "text = open(path).read().lower()\n",
 32 |     "print('corpus length:', len(text))\n",
 33 |     "\n",
 34 |     "chars = sorted(list(set(text)))\n",
 35 |     "print('total chars:', len(chars))\n",
 36 |     "char_indices = dict((c, i) for i, c in enumerate(chars))\n",
 37 |     "indices_char = dict((i, c) for i, c in enumerate(chars))\n",
 38 |     "\n",
 39 |     "# cut the text in semi-redundant sequences of maxlen characters\n",
 40 |     "maxlen = 40\n",
 41 |     "step = 3\n",
 42 |     "sentences = []\n",
 43 |     "next_chars = []\n",
 44 |     "for i in range(0, len(text) - maxlen, step):\n",
 45 |     "    sentences.append(text[i: i + maxlen])\n",
 46 |     "    next_chars.append(text[i + maxlen])\n",
 47 |     "print('nb sequences:', len(sentences))\n",
 48 |     "\n",
 49 |     "print('Vectorization...')\n",
 50 |     "X = np.zeros((len(sentences), maxlen, len(chars)), dtype=np.bool)\n",
 51 |     "y = np.zeros((len(sentences), len(chars)), dtype=np.bool)\n",
 52 |     "for i, sentence in enumerate(sentences):\n",
 53 |     "    for t, char in enumerate(sentence):\n",
 54 |     "        X[i, t, char_indices[char]] = 1\n",
 55 |     "    y[i, char_indices[next_chars[i]]] = 1\n",
 56 |     "\n",
 57 |     "\n",
 58 |     "# build the model: a single LSTM\n",
 59 |     "print('Build model...')\n",
 60 |     "model = Sequential()\n",
 61 |     "model.add(LSTM(128, input_shape=(maxlen, len(chars))))\n",
 62 |     "model.add(Dense(len(chars)))\n",
 63 |     "model.add(Activation('softmax'))\n",
 64 |     "\n",
 65 |     "optimizer = RMSprop(lr=0.01)\n",
 66 |     "model.compile(loss='categorical_crossentropy', optimizer=optimizer)\n",
 67 |     "\n",
 68 |     "\n",
 69 |     "def sample(preds, temperature=1.0):\n",
 70 |     "    # helper function to sample an index from a probability array\n",
 71 |     "    preds = np.asarray(preds).astype('float64')\n",
 72 |     "    preds = np.log(preds) / temperature\n",
 73 |     "    exp_preds = np.exp(preds)\n",
 74 |     "    preds = exp_preds / np.sum(exp_preds)\n",
 75 |     "    probas = np.random.multinomial(1, preds, 1)\n",
 76 |     "    return np.argmax(probas)\n",
 77 |     "\n",
 78 |     "# train the model, output generated text after each iteration\n",
 79 |     "for iteration in range(1, 60):\n",
 80 |     "    print()\n",
 81 |     "    print('-' * 50)\n",
 82 |     "    print('Iteration', iteration)\n",
 83 |     "    model.fit(X, y,\n",
 84 |     "              batch_size=128,\n",
 85 |     "              epochs=1)\n",
 86 |     "\n",
 87 |     "    start_index = random.randint(0, len(text) - maxlen - 1)\n",
 88 |     "\n",
 89 |     "    for diversity in [0.2, 0.5, 1.0, 1.2]:\n",
 90 |     "        print()\n",
 91 |     "        print('----- diversity:', diversity)\n",
 92 |     "\n",
 93 |     "        generated = ''\n",
 94 |     "        sentence = text[start_index: start_index + maxlen]\n",
 95 |     "        generated += sentence\n",
 96 |     "        print('----- Generating with seed: \"' + sentence + '\"')\n",
 97 |     "        sys.stdout.write(generated)\n",
 98 |     "\n",
 99 |     "        for i in range(400):\n",
100 |     "            x = np.zeros((1, maxlen, len(chars)))\n",
101 |     "            for t, char in enumerate(sentence):\n",
102 |     "                x[0, t, char_indices[char]] = 1.\n",
103 |     "\n",
104 |     "            preds = model.predict(x, verbose=0)[0]\n",
105 |     "            next_index = sample(preds, diversity)\n",
106 |     "            next_char = indices_char[next_index]\n",
107 |     "\n",
108 |     "            generated += next_char\n",
109 |     "            sentence = sentence[1:] + next_char\n",
110 |     "\n",
111 |     "            sys.stdout.write(next_char)\n",
112 |     "            sys.stdout.flush()\n",
113 |     "        print()"
114 |    ]
115 |   }
116 |  ],
117 |  "metadata": {
118 |   "anaconda-cloud": {},
119 |   "kernelspec": {
120 |    "display_name": "Python [conda root]",
121 |    "language": "python",
122 |    "name": "conda-root-py"
123 |   },
124 |   "language_info": {
125 |    "codemirror_mode": {
126 |     "name": "ipython",
127 |     "version": 3
128 |    },
129 |    "file_extension": ".py",
130 |    "mimetype": "text/x-python",
131 |    "name": "python",
132 |    "nbconvert_exporter": "python",
133 |    "pygments_lexer": "ipython3",
134 |    "version": "3.5.2"
135 |   }
136 |  },
137 |  "nbformat": 4,
138 |  "nbformat_minor": 1
139 | }
140 | 


--------------------------------------------------------------------------------
/Machine_Comprehension/Attention_Keras/.Rhistory:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rainmaker712/nlp_ryan/108ce890659ed29d4a143e41e5546f613aa878ca/Machine_Comprehension/Attention_Keras/.Rhistory


--------------------------------------------------------------------------------
/Machine_Comprehension/Attention_Keras/CNNQA_weights.h5.7z:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rainmaker712/nlp_ryan/108ce890659ed29d4a143e41e5546f613aa878ca/Machine_Comprehension/Attention_Keras/CNNQA_weights.h5.7z


--------------------------------------------------------------------------------
/Machine_Comprehension/Attention_Keras/KerasQA.ods:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rainmaker712/nlp_ryan/108ce890659ed29d4a143e41e5546f613aa878ca/Machine_Comprehension/Attention_Keras/KerasQA.ods


--------------------------------------------------------------------------------
/Machine_Comprehension/Attention_Keras/embedding_data.h5:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rainmaker712/nlp_ryan/108ce890659ed29d4a143e41e5546f613aa878ca/Machine_Comprehension/Attention_Keras/embedding_data.h5


--------------------------------------------------------------------------------
/Machine_Comprehension/DMN_QA/DynamicMemoryNetwork.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rainmaker712/nlp_ryan/108ce890659ed29d4a143e41e5546f613aa878ca/Machine_Comprehension/DMN_QA/DynamicMemoryNetwork.pdf


--------------------------------------------------------------------------------
/Machine_Comprehension/DMN_QA/bAbi.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rainmaker712/nlp_ryan/108ce890659ed29d4a143e41e5546f613aa878ca/Machine_Comprehension/DMN_QA/bAbi.pdf


--------------------------------------------------------------------------------
/Machine_Comprehension/DMN_QA/dataset/babi_tasks_1-20_v1-2.tar.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rainmaker712/nlp_ryan/108ce890659ed29d4a143e41e5546f613aa878ca/Machine_Comprehension/DMN_QA/dataset/babi_tasks_1-20_v1-2.tar.gz


--------------------------------------------------------------------------------
/Machine_Comprehension/DMN_QA/image/algo_process1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rainmaker712/nlp_ryan/108ce890659ed29d4a143e41e5546f613aa878ca/Machine_Comprehension/DMN_QA/image/algo_process1.png


--------------------------------------------------------------------------------
/Machine_Comprehension/DMN_QA/image/algo_process2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rainmaker712/nlp_ryan/108ce890659ed29d4a143e41e5546f613aa878ca/Machine_Comprehension/DMN_QA/image/algo_process2.png


--------------------------------------------------------------------------------
/Machine_Comprehension/DMN_QA/image/algo_process3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rainmaker712/nlp_ryan/108ce890659ed29d4a143e41e5546f613aa878ca/Machine_Comprehension/DMN_QA/image/algo_process3.png


--------------------------------------------------------------------------------
/Machine_Comprehension/DMN_QA/image/babi_dataset.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rainmaker712/nlp_ryan/108ce890659ed29d4a143e41e5546f613aa878ca/Machine_Comprehension/DMN_QA/image/babi_dataset.png


--------------------------------------------------------------------------------
/Machine_Comprehension/Readme_MC.md:
--------------------------------------------------------------------------------
1 | # Machine Comprehension
2 | 
3 | *Feedback: sungjin7127@gmail.com*
4 | 
5 | ## Attention Keras
6 |   * [Teaching Machines to Read and Comprehend (Paper) - DeepMind](https://arxiv.org/abs/1506.03340)
7 |     * [Code to Generate](https://github.com/deepmind/rc-data)
8 |     * [DeepMind Q&A Dataset (CNN, Daily Mail)](http://cs.nyu.edu/~kcho/DMQA/)
9 |     * [by Keras](https://github.com/dandxy89/DeepLearning_MachineLearning/tree/master/Keras/Attention)


--------------------------------------------------------------------------------
/Math_Stat/.Rhistory:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rainmaker712/nlp_ryan/108ce890659ed29d4a143e41e5546f613aa878ca/Math_Stat/.Rhistory


--------------------------------------------------------------------------------
/Math_Stat/.ipynb_checkpoints/ML_Basic_Siraj Raval-checkpoint.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Intro_to_Math_of_Intelligence"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "code",
 12 |    "execution_count": 3,
 13 |    "metadata": {
 14 |     "collapsed": false
 15 |    },
 16 |    "outputs": [
 17 |     {
 18 |      "name": "stdout",
 19 |      "output_type": "stream",
 20 |      "text": [
 21 |       "Starting gradient descent at b = 0, m = 0, error = 5565.107834483211\n",
 22 |       "Running...\n",
 23 |       "After 10000 iterations b = 0.6078985997054931, m = 1.4675440436333027, error = 112.31533427075733\n"
 24 |      ]
 25 |     }
 26 |    ],
 27 |    "source": [
 28 |     "#The optimal values of m and b can be actually calculated with way less effort than doing a linear regression. \n",
 29 |     "#this is just to demonstrate gradient descent\n",
 30 |     "\n",
 31 |     "from numpy import *\n",
 32 |     "\n",
 33 |     "# y = mx + b\n",
 34 |     "# m is slope, b is y-intercept\n",
 35 |     "def compute_error_for_line_given_points(b, m, points):\n",
 36 |     "    totalError = 0\n",
 37 |     "    for i in range(0, len(points)):\n",
 38 |     "        x = points[i, 0]\n",
 39 |     "        y = points[i, 1]\n",
 40 |     "        totalError += (y - (m * x + b)) ** 2\n",
 41 |     "    return totalError / float(len(points))\n",
 42 |     "\n",
 43 |     "def step_gradient(b_current, m_current, points, learningRate):\n",
 44 |     "    b_gradient = 0\n",
 45 |     "    m_gradient = 0\n",
 46 |     "    N = float(len(points))\n",
 47 |     "    for i in range(0, len(points)):\n",
 48 |     "        x = points[i, 0]\n",
 49 |     "        y = points[i, 1]\n",
 50 |     "        b_gradient += -(2/N) * (y - ((m_current * x) + b_current))\n",
 51 |     "        m_gradient += -(2/N) * x * (y - ((m_current * x) + b_current))\n",
 52 |     "    new_b = b_current - (learningRate * b_gradient)\n",
 53 |     "    new_m = m_current - (learningRate * m_gradient)\n",
 54 |     "    return [new_b, new_m]\n",
 55 |     "\n",
 56 |     "def gradient_descent_runner(points, starting_b, starting_m, learning_rate, num_iterations):\n",
 57 |     "    b = starting_b\n",
 58 |     "    m = starting_m\n",
 59 |     "    for i in range(num_iterations):\n",
 60 |     "        b, m = step_gradient(b, m, array(points), learning_rate)\n",
 61 |     "    return [b, m]\n",
 62 |     "\n",
 63 |     "def run():\n",
 64 |     "    points = genfromtxt(\"data.csv\", delimiter=\",\")\n",
 65 |     "    learning_rate = 0.0001\n",
 66 |     "    initial_b = 0 # initial y-intercept guess\n",
 67 |     "    initial_m = 0 # initial slope guess\n",
 68 |     "    num_iterations = 10000\n",
 69 |     "    print(\"Starting gradient descent at b = {0}, m = {1}, error = {2}\".format(initial_b, initial_m, compute_error_for_line_given_points(initial_b, initial_m, points)))\n",
 70 |     "    print(\"Running...\")\n",
 71 |     "    [b, m] = gradient_descent_runner(points, initial_b, initial_m, learning_rate, num_iterations)\n",
 72 |     "    print(\"After {0} iterations b = {1}, m = {2}, error = {3}\".format(num_iterations, b, m, compute_error_for_line_given_points(b, m, points)))\n",
 73 |     "\n",
 74 |     "if __name__ == '__main__':\n",
 75 |     "    run()"
 76 |    ]
 77 |   }
 78 |  ],
 79 |  "metadata": {
 80 |   "anaconda-cloud": {},
 81 |   "kernelspec": {
 82 |    "display_name": "Python [conda root]",
 83 |    "language": "python",
 84 |    "name": "conda-root-py"
 85 |   },
 86 |   "language_info": {
 87 |    "codemirror_mode": {
 88 |     "name": "ipython",
 89 |     "version": 3
 90 |    },
 91 |    "file_extension": ".py",
 92 |    "mimetype": "text/x-python",
 93 |    "name": "python",
 94 |    "nbconvert_exporter": "python",
 95 |    "pygments_lexer": "ipython3",
 96 |    "version": "3.5.2"
 97 |   }
 98 |  },
 99 |  "nbformat": 4,
100 |  "nbformat_minor": 1
101 | }
102 | 


--------------------------------------------------------------------------------
/Math_Stat/ML_Basic_Siraj Raval.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Intro_to_Math_of_Intelligence"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "code",
 12 |    "execution_count": 3,
 13 |    "metadata": {
 14 |     "collapsed": false,
 15 |     "scrolled": true
 16 |    },
 17 |    "outputs": [
 18 |     {
 19 |      "name": "stdout",
 20 |      "output_type": "stream",
 21 |      "text": [
 22 |       "Starting gradient descent at b = 0, m = 0, error = 5565.107834483211\n",
 23 |       "Running...\n",
 24 |       "After 10000 iterations b = 0.6078985997054931, m = 1.4675440436333027, error = 112.31533427075733\n"
 25 |      ]
 26 |     }
 27 |    ],
 28 |    "source": [
 29 |     "#The optimal values of m and b can be actually calculated with way less effort than doing a linear regression. \n",
 30 |     "#this is just to demonstrate gradient descent\n",
 31 |     "\n",
 32 |     "from numpy import *\n",
 33 |     "\n",
 34 |     "# y = mx + b\n",
 35 |     "# m is slope, b is y-intercept\n",
 36 |     "def compute_error_for_line_given_points(b, m, points):\n",
 37 |     "    totalError = 0\n",
 38 |     "    for i in range(0, len(points)):\n",
 39 |     "        x = points[i, 0]\n",
 40 |     "        y = points[i, 1]\n",
 41 |     "        totalError += (y - (m * x + b)) ** 2\n",
 42 |     "    return totalError / float(len(points))\n",
 43 |     "\n",
 44 |     "def step_gradient(b_current, m_current, points, learningRate):\n",
 45 |     "    b_gradient = 0\n",
 46 |     "    m_gradient = 0\n",
 47 |     "    N = float(len(points))\n",
 48 |     "    for i in range(0, len(points)):\n",
 49 |     "        x = points[i, 0]\n",
 50 |     "        y = points[i, 1]\n",
 51 |     "        b_gradient += -(2/N) * (y - ((m_current * x) + b_current))\n",
 52 |     "        m_gradient += -(2/N) * x * (y - ((m_current * x) + b_current))\n",
 53 |     "    new_b = b_current - (learningRate * b_gradient)\n",
 54 |     "    new_m = m_current - (learningRate * m_gradient)\n",
 55 |     "    return [new_b, new_m]\n",
 56 |     "\n",
 57 |     "def gradient_descent_runner(points, starting_b, starting_m, learning_rate, num_iterations):\n",
 58 |     "    b = starting_b\n",
 59 |     "    m = starting_m\n",
 60 |     "    for i in range(num_iterations):\n",
 61 |     "        b, m = step_gradient(b, m, array(points), learning_rate)\n",
 62 |     "    return [b, m]\n",
 63 |     "\n",
 64 |     "def run():\n",
 65 |     "    points = genfromtxt(\"data.csv\", delimiter=\",\")\n",
 66 |     "    learning_rate = 0.0001\n",
 67 |     "    initial_b = 0 # initial y-intercept guess\n",
 68 |     "    initial_m = 0 # initial slope guess\n",
 69 |     "    num_iterations = 10000\n",
 70 |     "    print(\"Starting gradient descent at b = {0}, m = {1}, error = {2}\".format(initial_b, initial_m, compute_error_for_line_given_points(initial_b, initial_m, points)))\n",
 71 |     "    print(\"Running...\")\n",
 72 |     "    [b, m] = gradient_descent_runner(points, initial_b, initial_m, learning_rate, num_iterations)\n",
 73 |     "    print(\"After {0} iterations b = {1}, m = {2}, error = {3}\".format(num_iterations, b, m, compute_error_for_line_given_points(b, m, points)))\n",
 74 |     "\n",
 75 |     "if __name__ == '__main__':\n",
 76 |     "    run()"
 77 |    ]
 78 |   },
 79 |   {
 80 |    "cell_type": "code",
 81 |    "execution_count": null,
 82 |    "metadata": {
 83 |     "collapsed": true
 84 |    },
 85 |    "outputs": [],
 86 |    "source": []
 87 |   }
 88 |  ],
 89 |  "metadata": {
 90 |   "anaconda-cloud": {},
 91 |   "kernelspec": {
 92 |    "display_name": "Python [conda root]",
 93 |    "language": "python",
 94 |    "name": "conda-root-py"
 95 |   },
 96 |   "language_info": {
 97 |    "codemirror_mode": {
 98 |     "name": "ipython",
 99 |     "version": 3
100 |    },
101 |    "file_extension": ".py",
102 |    "mimetype": "text/x-python",
103 |    "name": "python",
104 |    "nbconvert_exporter": "python",
105 |    "pygments_lexer": "ipython3",
106 |    "version": "3.5.2"
107 |   }
108 |  },
109 |  "nbformat": 4,
110 |  "nbformat_minor": 1
111 | }
112 | 


--------------------------------------------------------------------------------
/Math_Stat/Readme.md:
--------------------------------------------------------------------------------
 1 | # Math&Stat Basic
 2 | 
 3 | # *To-Do*
 4 | 
 5 | ## Math & Stat
 6 | 
 7 | * [Fundamentals of Engineering Exam Review](https://www.coursera.org/learn/fe-exam/home/welcome) - Week2 & Week3 (Due July 2017)
 8 | 
 9 | * [Data Science Math Skills](https://www.coursera.org/learn/datasciencemathskills/home/welcome) - (Due August 2017)
10 | 
11 | * [Bayesian Statistics: From Concept to Data Analysis](https://www.coursera.org/learn/bayesian-statistics/home/welcome) - (Due 2017)
12 |   
13 | ## Python Skills for Data (One Course per week)
14 | 
15 | * Introduction to Data Visualization with Python
16 | * pandas Foundation
17 | * Maniplulating DataFrames with pandas
18 | * Merging DataFrames with pandas
19 | * Statistical Thinking in Python 1 & 2
20 | * Introduction to Databases in Python
21 | * Supervised & Unsupervised Learning in Python
22 | * Intermediate Python for Data Science
23 | 
24 | 
25 | 
26 | # *참고자료*
27 | 
28 |  ## Data Preprocessing
29 |   - [Chris ALBon: Python/R](https://ch/home/ryan/nlp_ryan/Readme.mdrisalbon.com/)
30 | 


--------------------------------------------------------------------------------
/Math_Stat/data.csv:
--------------------------------------------------------------------------------
  1 | 32.502345269453031,31.70700584656992
  2 | 53.426804033275019,68.77759598163891
  3 | 61.530358025636438,62.562382297945803
  4 | 47.475639634786098,71.546632233567777
  5 | 59.813207869512318,87.230925133687393
  6 | 55.142188413943821,78.211518270799232
  7 | 52.211796692214001,79.64197304980874
  8 | 39.299566694317065,59.171489321869508
  9 | 48.10504169176825,75.331242297063056
 10 | 52.550014442733818,71.300879886850353
 11 | 45.419730144973755,55.165677145959123
 12 | 54.351634881228918,82.478846757497919
 13 | 44.164049496773352,62.008923245725825
 14 | 58.16847071685779,75.392870425994957
 15 | 56.727208057096611,81.43619215887864
 16 | 48.955888566093719,60.723602440673965
 17 | 44.687196231480904,82.892503731453715
 18 | 60.297326851333466,97.379896862166078
 19 | 45.618643772955828,48.847153317355072
 20 | 38.816817537445637,56.877213186268506
 21 | 66.189816606752601,83.878564664602763
 22 | 65.41605174513407,118.59121730252249
 23 | 47.48120860786787,57.251819462268969
 24 | 41.57564261748702,51.391744079832307
 25 | 51.84518690563943,75.380651665312357
 26 | 59.370822011089523,74.765564032151374
 27 | 57.31000343834809,95.455052922574737
 28 | 63.615561251453308,95.229366017555307
 29 | 46.737619407976972,79.052406169565586
 30 | 50.556760148547767,83.432071421323712
 31 | 52.223996085553047,63.358790317497878
 32 | 35.567830047746632,41.412885303700563
 33 | 42.436476944055642,76.617341280074044
 34 | 58.16454011019286,96.769566426108199
 35 | 57.504447615341789,74.084130116602523
 36 | 45.440530725319981,66.588144414228594
 37 | 61.89622268029126,77.768482417793024
 38 | 33.093831736163963,50.719588912312084
 39 | 36.436009511386871,62.124570818071781
 40 | 37.675654860850742,60.810246649902211
 41 | 44.555608383275356,52.682983366387781
 42 | 43.318282631865721,58.569824717692867
 43 | 50.073145632289034,82.905981485070512
 44 | 43.870612645218372,61.424709804339123
 45 | 62.997480747553091,115.24415280079529
 46 | 32.669043763467187,45.570588823376085
 47 | 40.166899008703702,54.084054796223612
 48 | 53.575077531673656,87.994452758110413
 49 | 33.864214971778239,52.725494375900425
 50 | 64.707138666121296,93.576118692658241
 51 | 38.119824026822805,80.166275447370964
 52 | 44.502538064645101,65.101711570560326
 53 | 40.599538384552318,65.562301260400375
 54 | 41.720676356341293,65.280886920822823
 55 | 51.088634678336796,73.434641546324301
 56 | 55.078095904923202,71.13972785861894
 57 | 41.377726534895203,79.102829683549857
 58 | 62.494697427269791,86.520538440347153
 59 | 49.203887540826003,84.742697807826218
 60 | 41.102685187349664,59.358850248624933
 61 | 41.182016105169822,61.684037524833627
 62 | 50.186389494880601,69.847604158249183
 63 | 52.378446219236217,86.098291205774103
 64 | 50.135485486286122,59.108839267699643
 65 | 33.644706006191782,69.89968164362763
 66 | 39.557901222906828,44.862490711164398
 67 | 56.130388816875467,85.498067778840223
 68 | 57.362052133238237,95.536686846467219
 69 | 60.269214393997906,70.251934419771587
 70 | 35.678093889410732,52.721734964774988
 71 | 31.588116998132829,50.392670135079896
 72 | 53.66093226167304,63.642398775657753
 73 | 46.682228649471917,72.247251068662365
 74 | 43.107820219102464,57.812512976181402
 75 | 70.34607561504933,104.25710158543822
 76 | 44.492855880854073,86.642020318822006
 77 | 57.50453330326841,91.486778000110135
 78 | 36.930076609191808,55.231660886212836
 79 | 55.805733357942742,79.550436678507609
 80 | 38.954769073377065,44.847124242467601
 81 | 56.901214702247074,80.207523139682763
 82 | 56.868900661384046,83.14274979204346
 83 | 34.33312470421609,55.723489260543914
 84 | 59.04974121466681,77.634182511677864
 85 | 57.788223993230673,99.051414841748269
 86 | 54.282328705967409,79.120646274680027
 87 | 51.088719898979143,69.588897851118475
 88 | 50.282836348230731,69.510503311494389
 89 | 44.211741752090113,73.687564318317285
 90 | 38.005488008060688,61.366904537240131
 91 | 32.940479942618296,67.170655768995118
 92 | 53.691639571070056,85.668203145001542
 93 | 68.76573426962166,114.85387123391394
 94 | 46.230966498310252,90.123572069967423
 95 | 68.319360818255362,97.919821035242848
 96 | 50.030174340312143,81.536990783015028
 97 | 49.239765342753763,72.111832469615663
 98 | 50.039575939875988,85.232007342325673
 99 | 48.149858891028863,66.224957888054632
100 | 25.128484647772304,53.454394214850524
101 | 


--------------------------------------------------------------------------------
/Natural Language Generation/lstm_keras_generation.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding: utf-8 -*-
  3 | """
  4 | Created on Mon Jun 26 21:23:43 2017
  5 | 
  6 | @author: ryan
  7 | """
  8 | 
  9 | '''Example script to generate text from Nietzsche's writings.
 10 | At least 20 epochs are required before the generated text
 11 | starts sounding coherent.
 12 | It is recommended to run this script on GPU, as recurrent
 13 | networks are quite computationally intensive.
 14 | If you try this script on new data, make sure your corpus
 15 | has at least ~100k characters. ~1M is better.
 16 | '''
 17 | 
 18 | from __future__ import print_function
 19 | from keras.models import Sequential
 20 | from keras.layers import Dense, Activation
 21 | from keras.layers import LSTM
 22 | from keras.optimizers import RMSprop
 23 | from keras.utils.data_utils import get_file
 24 | import numpy as np
 25 | import random
 26 | import sys
 27 | 
 28 | path = get_file('nietzsche.txt', origin='https://s3.amazonaws.com/text-datasets/nietzsche.txt')
 29 | text = open(path).read().lower()
 30 | print('corpus length:', len(text))
 31 | 
 32 | chars = sorted(list(set(text)))
 33 | print('total chars:', len(chars))
 34 | char_indices = dict((c, i) for i, c in enumerate(chars))
 35 | indices_char = dict((i, c) for i, c in enumerate(chars))
 36 | 
 37 | # cut the text in semi-redundant sequences of maxlen characters
 38 | maxlen = 40
 39 | step = 3
 40 | sentences = []
 41 | next_chars = []
 42 | for i in range(0, len(text) - maxlen, step):
 43 |     sentences.append(text[i: i + maxlen])
 44 |     next_chars.append(text[i + maxlen])
 45 | print('nb sequences:', len(sentences))
 46 | 
 47 | print('Vectorization...')
 48 | X = np.zeros((len(sentences), maxlen, len(chars)), dtype=np.bool)
 49 | y = np.zeros((len(sentences), len(chars)), dtype=np.bool)
 50 | for i, sentence in enumerate(sentences):
 51 |     for t, char in enumerate(sentence):
 52 |         X[i, t, char_indices[char]] = 1
 53 |     y[i, char_indices[next_chars[i]]] = 1
 54 | 
 55 | 
 56 | # build the model: a single LSTM
 57 | print('Build model...')
 58 | model = Sequential()
 59 | model.add(LSTM(128, input_shape=(maxlen, len(chars))))
 60 | model.add(Dense(len(chars)))
 61 | model.add(Activation('softmax'))
 62 | 
 63 | optimizer = RMSprop(lr=0.01)
 64 | model.compile(loss='categorical_crossentropy', optimizer=optimizer)
 65 | 
 66 | 
 67 | def sample(preds, temperature=1.0):
 68 |     # helper function to sample an index from a probability array
 69 |     preds = np.asarray(preds).astype('float64')
 70 |     preds = np.log(preds) / temperature
 71 |     exp_preds = np.exp(preds)
 72 |     preds = exp_preds / np.sum(exp_preds)
 73 |     probas = np.random.multinomial(1, preds, 1)
 74 |     return np.argmax(probas)
 75 | 
 76 | # train the model, output generated text after each iteration
 77 | for iteration in range(1, 60):
 78 |     print()
 79 |     print('-' * 50)
 80 |     print('Iteration', iteration)
 81 |     model.fit(X, y,
 82 |               batch_size=128,
 83 |               epochs=1)
 84 | 
 85 |     start_index = random.randint(0, len(text) - maxlen - 1)
 86 | 
 87 |     for diversity in [0.2, 0.5, 1.0, 1.2]:
 88 |         print()
 89 |         print('----- diversity:', diversity)
 90 | 
 91 |         generated = ''
 92 |         sentence = text[start_index: start_index + maxlen]
 93 |         generated += sentence
 94 |         print('----- Generating with seed: "' + sentence + '"')
 95 |         sys.stdout.write(generated)
 96 | 
 97 |         for i in range(400):
 98 |             x = np.zeros((1, maxlen, len(chars)))
 99 |             for t, char in enumerate(sentence):
100 |                 x[0, t, char_indices[char]] = 1.
101 | 
102 |             preds = model.predict(x, verbose=0)[0]
103 |             next_index = sample(preds, diversity)
104 |             next_char = indices_char[next_index]
105 | 
106 |             generated += next_char
107 |             sentence = sentence[1:] + next_char
108 | 
109 |             sys.stdout.write(next_char)
110 |             sys.stdout.flush()
111 |         print()


--------------------------------------------------------------------------------
/Python/.ipynb_checkpoints/Python_Review-checkpoint.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# 7. 함수 이해하기"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "code",
 12 |    "execution_count": 1,
 13 |    "metadata": {
 14 |     "collapsed": true
 15 |    },
 16 |    "outputs": [],
 17 |    "source": [
 18 |     "def func(pa1, pa2):\n",
 19 |     "    \"\"\"함수란 이런것이다.\n",
 20 |     "    \n",
 21 |     "    \"\"\"\n",
 22 |     "    pa1, pa2 = pa2, pa1\n",
 23 |     "    \n",
 24 |     "    return pa1, pa2"
 25 |    ]
 26 |   },
 27 |   {
 28 |    "cell_type": "code",
 29 |    "execution_count": 2,
 30 |    "metadata": {
 31 |     "collapsed": false,
 32 |     "scrolled": true
 33 |    },
 34 |    "outputs": [
 35 |     {
 36 |      "name": "stdout",
 37 |      "output_type": "stream",
 38 |      "text": [
 39 |       "Help on function func in module __main__:\n",
 40 |       "\n",
 41 |       "func(pa1, pa2)\n",
 42 |       "    함수란 이런것이다.\n",
 43 |       "\n"
 44 |      ]
 45 |     }
 46 |    ],
 47 |    "source": [
 48 |     "help(func)"
 49 |    ]
 50 |   },
 51 |   {
 52 |    "cell_type": "markdown",
 53 |    "metadata": {
 54 |     "collapsed": true
 55 |    },
 56 |    "source": [
 57 |     "# 수정범위\n",
 58 |     "\n",
 59 |     "P6. 일급객체\n"
 60 |    ]
 61 |   },
 62 |   {
 63 |    "cell_type": "code",
 64 |    "execution_count": 3,
 65 |    "metadata": {
 66 |     "collapsed": false
 67 |    },
 68 |    "outputs": [
 69 |     {
 70 |      "name": "stdout",
 71 |      "output_type": "stream",
 72 |      "text": [
 73 |       "{'count': 0}\n",
 74 |       "call count 1\n",
 75 |       "20\n",
 76 |       "call count 2\n",
 77 |       "22\n",
 78 |       "{'count': 2}\n"
 79 |      ]
 80 |     }
 81 |    ],
 82 |    "source": [
 83 |     "def add(x,y):\n",
 84 |     "    add.count += 1\n",
 85 |     "    print(\"call count\" , add.count)\n",
 86 |     "    return x+y\n",
 87 |     "\n",
 88 |     "add.count = 0\n",
 89 |     "print(add.__dict__)\n",
 90 |     "print(add(10,10))\n",
 91 |     "print(add(11,11))\n",
 92 |     "print(add.__dict__)"
 93 |    ]
 94 |   },
 95 |   {
 96 |    "cell_type": "code",
 97 |    "execution_count": 7,
 98 |    "metadata": {
 99 |     "collapsed": false
100 |    },
101 |    "outputs": [
102 |     {
103 |      "name": "stdout",
104 |      "output_type": "stream",
105 |      "text": [
106 |       "<function add at 0x7eff0801c158>\n",
107 |       "<function add at 0x7eff0801c158>\n"
108 |      ]
109 |     }
110 |    ],
111 |    "source": [
112 |     "def add(x,y):\n",
113 |     "    return x+y\n",
114 |     "\n",
115 |     "print(globals()[\"add\"])\n",
116 |     "print(add)"
117 |    ]
118 |   },
119 |   {
120 |    "cell_type": "code",
121 |    "execution_count": 8,
122 |    "metadata": {
123 |     "collapsed": false
124 |    },
125 |    "outputs": [
126 |     {
127 |      "name": "stdout",
128 |      "output_type": "stream",
129 |      "text": [
130 |       "10\n"
131 |      ]
132 |     }
133 |    ],
134 |    "source": [
135 |     "def func(func, x, y):\n",
136 |     "    return func(x, y)\n",
137 |     "\n",
138 |     "print(func(add,5,5))"
139 |    ]
140 |   },
141 |   {
142 |    "cell_type": "code",
143 |    "execution_count": 10,
144 |    "metadata": {
145 |     "collapsed": false
146 |    },
147 |    "outputs": [
148 |     {
149 |      "name": "stdout",
150 |      "output_type": "stream",
151 |      "text": [
152 |       "dahl\n",
153 |       "dahl\n"
154 |      ]
155 |     }
156 |    ],
157 |    "source": [
158 |     "#함수에서 클레스 접근 예시\n",
159 |     "class A:\n",
160 |     "    name = \"dahl\"\n",
161 |     "    \n",
162 |     "def getName():\n",
163 |     "    return A.name\n",
164 |     "\n",
165 |     "print(getName())\n",
166 |     "\n",
167 |     "#함수에서 인스턴스 접근 예시\n",
168 |     "#instance = 변수 in class\n",
169 |     "class Person:\n",
170 |     "    def __init__(self,name):\n",
171 |     "        self.name = name\n",
172 |     "        \n",
173 |     "def func(obj):\n",
174 |     "    return obj.name\n",
175 |     "\n",
176 |     "p = Person(\"dahl\")\n",
177 |     "print(func(p))"
178 |    ]
179 |   },
180 |   {
181 |    "cell_type": "code",
182 |    "execution_count": 13,
183 |    "metadata": {
184 |     "collapsed": false
185 |    },
186 |    "outputs": [
187 |     {
188 |      "name": "stdout",
189 |      "output_type": "stream",
190 |      "text": [
191 |       "10\n",
192 |       "20\n"
193 |      ]
194 |     }
195 |    ],
196 |    "source": [
197 |     "#익명함수\n",
198 |     "fn = lambda x : x\n",
199 |     "print(fn(10))\n",
200 |     "\n",
201 |     "#익명 함수도 객체임\n",
202 |     "\n",
203 |     "#익명 함수에서 함수 적용\n",
204 |     "fn1 = lambda x : add(x,x)\n",
205 |     "print(fn1(10))\n",
206 |     "\n",
207 |     "#익명함수 내의 파라메터 초기값 처리\n",
208 |     "x = 20\n",
209 |     "lam = lambda x=x : list(x+n for n in range(3))"
210 |    ]
211 |   }
212 |  ],
213 |  "metadata": {
214 |   "anaconda-cloud": {},
215 |   "kernelspec": {
216 |    "display_name": "Python [conda root]",
217 |    "language": "python",
218 |    "name": "conda-root-py"
219 |   },
220 |   "language_info": {
221 |    "codemirror_mode": {
222 |     "name": "ipython",
223 |     "version": 3
224 |    },
225 |    "file_extension": ".py",
226 |    "mimetype": "text/x-python",
227 |    "name": "python",
228 |    "nbconvert_exporter": "python",
229 |    "pygments_lexer": "ipython3",
230 |    "version": "3.5.2"
231 |   }
232 |  },
233 |  "nbformat": 4,
234 |  "nbformat_minor": 1
235 | }
236 | 


--------------------------------------------------------------------------------
/Python/Cheat_Sheet.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding: utf-8 -*-
 3 | """
 4 | Created on Sun Jun 11 19:26:51 2017
 5 | 
 6 | @author: ryan
 7 | """
 8 | 
 9 | #-----------------Sklearn--------------------
10 | #1. Divide train and test data
11 | from sklearn.model_selection import train_test_split
12 | x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.33, random_state=42)
13 | 
14 | #---------------Keras----------------
15 | 
16 | #Create the plot
17 | import matplotlib.pyplot as plt
18 | plt.plot(model['acc'], 'r')
19 | plt.xlabel('Epochs')
20 | plt.ylabel('acc')
21 | plt.show()
22 | 
23 | #Save Model
24 | from keras.models import load_model
25 | model.save('domain_classify.h5')
26 | 
27 | #Load Model
28 | my_model = load_model('domain_classify.h5')
29 | 
30 | #Use Model (Make sure input as same dim.)
31 | my_model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
32 | my_model.predict_classes(np.array(sent).shape)
33 | 
34 | 
35 | 


--------------------------------------------------------------------------------
/Python/Decorator.py:
--------------------------------------------------------------------------------
 1 | #11월 20일, Unit 44 데코레이터
 2 | #https://dojang.io/mod/page/view.php?id=1131
 3 | 
 4 | #데코레이터는 함수를 수정하지 않은 상태에서 추가 기능을 구현하기 위해 사용
 5 | 
 6 | class Calc:
 7 |     @staticmethod
 8 |     def add(a,b):
 9 |         print(a,b)
10 | 
11 | #함수의 시작과 끝을 출력하는 데코레이터
12 | def trace(func):
13 |     def wrapper():
14 |         print(func.__name__, '함수 시작')
15 |         func()
16 |         print(func.__name__, '함수 끝')
17 |     return wrapper
18 | 
19 | @trace
20 | def hello():
21 |     print('hello')
22 | 
23 | @trace
24 | def world():
25 |     print('world')
26 | 
27 | # trace_hello = trace(hello) #데코레이터에 호출할 함수 넣기
28 | # trace_hello() #반환된 함수를 호출
29 | # trace_world = trace(world)
30 | # trace_world()
31 | 
32 | hello()
33 | world()


--------------------------------------------------------------------------------
/Python/Visualization/.ipynb_checkpoints/Bokeh-checkpoint.ipynb:
--------------------------------------------------------------------------------
1 | {
2 |  "cells": [],
3 |  "metadata": {},
4 |  "nbformat": 4,
5 |  "nbformat_minor": 2
6 | }
7 | 


--------------------------------------------------------------------------------
/Python/Visualization/Bokeh.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "code",
 5 |    "execution_count": null,
 6 |    "metadata": {
 7 |     "collapsed": true
 8 |    },
 9 |    "outputs": [],
10 |    "source": []
11 |   }
12 |  ],
13 |  "metadata": {
14 |   "kernelspec": {
15 |    "display_name": "Python 3",
16 |    "language": "python",
17 |    "name": "python3"
18 |   },
19 |   "language_info": {
20 |    "codemirror_mode": {
21 |     "name": "ipython",
22 |     "version": 3
23 |    },
24 |    "file_extension": ".py",
25 |    "mimetype": "text/x-python",
26 |    "name": "python",
27 |    "nbconvert_exporter": "python",
28 |    "pygments_lexer": "ipython3",
29 |    "version": "3.6.1"
30 |   }
31 |  },
32 |  "nbformat": 4,
33 |  "nbformat_minor": 2
34 | }
35 | 


--------------------------------------------------------------------------------
/Python/attribute.py:
--------------------------------------------------------------------------------
1 | class Person:
2 |     def __init__(self):
3 |         self.hello = '안녕하세요.'
4 |  
5 |     def greeting(self):
6 |         print(self.hello)
7 |  
8 | james = Person()
9 | james.greeting()    # 안녕하세요.


--------------------------------------------------------------------------------
/Quora_insincere/.gitignore:
--------------------------------------------------------------------------------
  1 | /2_NLP_Study
  2 | .DS_Store
  3 | .ipynb_checkpoints/
  4 | data_in/
  5 | sh/
  6 | input/
  7 | 
  8 | # Byte-compiled / optimized / DLL files
  9 | __pycache__/
 10 | *.py[cod]
 11 | *$py.class
 12 | 
 13 | # C extensions
 14 | *.so
 15 | 
 16 | # Distribution / packaging
 17 | .Python
 18 | build/
 19 | develop-eggs/
 20 | dist/
 21 | downloads/
 22 | eggs/
 23 | .eggs/
 24 | lib/
 25 | lib64/
 26 | parts/
 27 | sdist/
 28 | var/
 29 | wheels/
 30 | *.egg-info/
 31 | .installed.cfg
 32 | *.egg
 33 | MANIFEST
 34 | 
 35 | # PyInstaller
 36 | #  Usually these files are written by a python script from a template
 37 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 38 | *.manifest
 39 | *.spec
 40 | 
 41 | # Installer logs
 42 | pip-log.txt
 43 | pip-delete-this-directory.txt
 44 | 
 45 | # Unit test / coverage reports
 46 | htmlcov/
 47 | .tox/
 48 | .coverage
 49 | .coverage.*
 50 | .cache
 51 | nosetests.xml
 52 | coverage.xml
 53 | *.cover
 54 | .hypothesis/
 55 | .pytest_cache/
 56 | 
 57 | # Translations
 58 | *.mo
 59 | *.pot
 60 | 
 61 | # Django stuff:
 62 | *.log
 63 | local_settings.py
 64 | db.sqlite3
 65 | 
 66 | # Flask stuff:
 67 | instance/
 68 | .webassets-cache
 69 | 
 70 | # Scrapy stuff:
 71 | .scrapy
 72 | 
 73 | # Sphinx documentation
 74 | docs/_build/
 75 | 
 76 | # PyBuilder
 77 | target/
 78 | 
 79 | # Jupyter Notebook
 80 | .ipynb_checkpoints
 81 | 
 82 | # pyenv
 83 | .python-version
 84 | 
 85 | # celery beat schedule file
 86 | celerybeat-schedule
 87 | 
 88 | # SageMath parsed files
 89 | *.sage.py
 90 | 
 91 | # Environments
 92 | .env
 93 | .venv
 94 | env/
 95 | venv/
 96 | ENV/
 97 | env.bak/
 98 | venv.bak/
 99 | 
100 | # Spyder project settings
101 | .spyderproject
102 | .spyproject
103 | 
104 | # Rope project settings
105 | .ropeproject
106 | 
107 | # mkdocs documentation
108 | /site
109 | 
110 | # mypy
111 | .mypy_cache/
112 | 
113 | .vscode
114 | .ipynb_checkpoints
115 | 
116 | *.voc
117 | checkPoint
118 | *.log
119 | 
120 | OLD/


--------------------------------------------------------------------------------
/Quora_insincere/README.md:
--------------------------------------------------------------------------------
1 | Kaggle
2 | 
3 | https://www.kaggle.com/c/quora-insincere-questions-classification/data


--------------------------------------------------------------------------------
/Tensorflow/.gitignore:
--------------------------------------------------------------------------------
  1 | # Created by .ignore support plugin (hsz.mobi)
  2 | ### JetBrains template
  3 | # Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio and Webstorm
  4 | # Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839
  5 | 
  6 | # User-specific stuff:
  7 | .idea/**/workspace.xml
  8 | .idea/**/tasks.xml
  9 | .idea/dictionaries
 10 | 
 11 | # Sensitive or high-churn files:
 12 | .idea/**/dataSources/
 13 | .idea/**/dataSources.ids
 14 | .idea/**/dataSources.xml
 15 | .idea/**/dataSources.local.xml
 16 | .idea/**/sqlDataSources.xml
 17 | .idea/**/dynamic.xml
 18 | .idea/**/uiDesigner.xml
 19 | 
 20 | # Gradle:
 21 | .idea/**/gradle.xml
 22 | .idea/**/libraries
 23 | 
 24 | # CMake
 25 | cmake-build-debug/
 26 | 
 27 | # Mongo Explorer plugin:
 28 | .idea/**/mongoSettings.xml
 29 | 
 30 | ## File-based project format:
 31 | *.iws
 32 | 
 33 | ## Plugin-specific files:
 34 | 
 35 | # IntelliJ
 36 | out/
 37 | 
 38 | # mpeltonen/sbt-idea plugin
 39 | .idea_modules/
 40 | 
 41 | # JIRA plugin
 42 | atlassian-ide-plugin.xml
 43 | 
 44 | # Cursive Clojure plugin
 45 | .idea/replstate.xml
 46 | 
 47 | # Crashlytics plugin (for Android Studio and IntelliJ)
 48 | com_crashlytics_export_strings.xml
 49 | crashlytics.properties
 50 | crashlytics-build.properties
 51 | fabric.properties
 52 | ### Python template
 53 | # Byte-compiled / optimized / DLL files
 54 | __pycache__/
 55 | *.py[cod]
 56 | *$py.class
 57 | 
 58 | # C extensions
 59 | *.so
 60 | 
 61 | # Distribution / packaging
 62 | .Python
 63 | build/
 64 | develop-eggs/
 65 | dist/
 66 | downloads/
 67 | eggs/
 68 | .eggs/
 69 | lib/
 70 | lib64/
 71 | parts/
 72 | sdist/
 73 | var/
 74 | wheels/
 75 | *.egg-info/
 76 | .installed.cfg
 77 | *.egg
 78 | MANIFEST
 79 | 
 80 | # PyInstaller
 81 | #  Usually these files are written by a python script from a template
 82 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 83 | *.manifest
 84 | *.spec
 85 | 
 86 | # Installer logs
 87 | pip-log.txt
 88 | pip-delete-this-directory.txt
 89 | 
 90 | # Unit test / coverage reports
 91 | htmlcov/
 92 | .tox/
 93 | .coverage
 94 | .coverage.*
 95 | .cache
 96 | nosetests.xml
 97 | coverage.xml
 98 | *.cover
 99 | .hypothesis/
100 | 
101 | # Translations
102 | *.mo
103 | *.pot
104 | 
105 | # Django stuff:
106 | *.log
107 | .static_storage/
108 | .media/
109 | local_settings.py
110 | 
111 | # Flask stuff:
112 | instance/
113 | .webassets-cache
114 | 
115 | # Scrapy stuff:
116 | .scrapy
117 | 
118 | # Sphinx documentation
119 | docs/_build/
120 | 
121 | # PyBuilder
122 | target/
123 | 
124 | # Jupyter Notebook
125 | .ipynb_checkpoints
126 | 
127 | # pyenv
128 | .python-version
129 | 
130 | # celery beat schedule file
131 | celerybeat-schedule
132 | 
133 | # SageMath parsed files
134 | *.sage.py
135 | 
136 | # Environments
137 | .env
138 | .venv
139 | env/
140 | venv/
141 | ENV/
142 | env.bak/
143 | venv.bak/
144 | 
145 | # Spyder project settings
146 | .spyderproject
147 | .spyproject
148 | 
149 | # Rope project settings
150 | .ropeproject
151 | 
152 | # mkdocs documentation
153 | /site
154 | 
155 | # mypy
156 | .mypy_cache/
157 | ### macOS template
158 | # General
159 | .DS_Store
160 | .AppleDouble
161 | .LSOverride
162 | 
163 | # Icon must end with two \r
164 | Icon
165 | 
166 | # Thumbnails
167 | ._*
168 | 
169 | # Files that might appear in the root of a volume
170 | .DocumentRevisions-V100
171 | .fseventsd
172 | .Spotlight-V100
173 | .TemporaryItems
174 | .Trashes
175 | .VolumeIcon.icns
176 | .com.apple.timemachine.donotpresent
177 | 
178 | # Directories potentially created on remote AFP share
179 | .AppleDB
180 | .AppleDesktop
181 | Network Trash Folder
182 | Temporary Items
183 | .apdisk
184 | 
185 | .idea/
186 | data_out/*
187 | checkpoint/
188 | logs/
189 | OLD/
190 | practice/
191 | scala_data_pre/
192 | target/
193 | .vscode/
194 | .ipynb_checkpoints/
195 | .DS_Store
196 | .DS_Store*
197 | my_test_model/
198 | result/
199 | sh/


--------------------------------------------------------------------------------
/Tensorflow/04_word2vec_eager.py:
--------------------------------------------------------------------------------
 1 | """ starter code for word2vec skip-gram model with NCE loss
 2 | Eager execution
 3 | CS 20: "TensorFlow for Deep Learning Research"
 4 | cs20.stanford.edu
 5 | Chip Huyen (chiphuyen@cs.stanford.edu) & Akshay Agrawal (akshayka@cs.stanford.edu)
 6 | Lecture 04
 7 | """
 8 | 
 9 | import os
10 | os.environ['TF_CPP_MIN_LOG_LEVEL']='2'
11 | 
12 | import numpy as np
13 | import tensorflow as tf
14 | import tensorflow.contrib.eager as tfe
15 | 
16 | import utils
17 | import word2vec_utils
18 | 
19 | tfe.enable_eager_execution()
20 | 
21 | # Model hyperparameters
22 | VOCAB_SIZE = 50000
23 | BATCH_SIZE = 128
24 | EMBED_SIZE = 128            # dimension of the word embedding vectors
25 | SKIP_WINDOW = 1             # the context window
26 | NUM_SAMPLED = 64            # number of negative examples to sample
27 | LEARNING_RATE = 1.0
28 | NUM_TRAIN_STEPS = 100000
29 | VISUAL_FLD = 'visualization'
30 | SKIP_STEP = 5000
31 | 
32 | # Parameters for downloading data
33 | DOWNLOAD_URL = 'http://mattmahoney.net/dc/text8.zip'
34 | EXPECTED_BYTES = 31344016
35 | 
36 | class Word2Vec(object):
37 |   def __init__(self, vocab_size, embed_size, num_sampled=NUM_SAMPLED):
38 |     self.vocab_size = vocab_size
39 |     self.num_sampled = num_sampled
40 |     self.embed_matrix = tfe.Variable(tf.random_uniform(
41 |                                       [vocab_size, embed_size]))
42 |     self.nce_weight = tfe.Variable(tf.truncated_normal(
43 |                                     [vocab_size, embed_size],
44 |                                     stddev=1.0 / (embed_size ** 0.5)))
45 |     self.nce_bias = tfe.Variable(tf.zeros([vocab_size]))
46 | 
47 |   def compute_loss(self, center_words, target_words):
48 |     """Computes the forward pass of word2vec with the NCE loss.""" 
49 |     embed = tf.nn.embedding_lookup(self.embed_matrix, center_words)
50 |     loss = tf.reduce_mean(tf.nn.nce_loss(weights=self.nce_weight, 
51 |                                         biases=self.nce_bias, 
52 |                                         labels=target_words, 
53 |                                         inputs=embed, 
54 |                                         num_sampled=self.num_sampled, 
55 |                                         num_classes=self.vocab_size))
56 |     return loss
57 | 
58 | 
59 | def gen():
60 |   yield from word2vec_utils.batch_gen(DOWNLOAD_URL, EXPECTED_BYTES,
61 |                                       VOCAB_SIZE, BATCH_SIZE, SKIP_WINDOW,
62 |                                       VISUAL_FLD)
63 | 
64 | def main():
65 |   dataset = tf.data.Dataset.from_generator(gen, (tf.int32, tf.int32),
66 |                               (tf.TensorShape([BATCH_SIZE]),
67 |                               tf.TensorShape([BATCH_SIZE, 1])))
68 |   optimizer = tf.train.GradientDescentOptimizer(LEARNING_RATE)
69 |   model = Word2Vec(vocab_size=VOCAB_SIZE, embed_size=EMBED_SIZE)
70 |   grad_fn = tfe.implicit_value_and_gradients(model.compute_loss)
71 |   total_loss = 0.0  # for average loss in the last SKIP_STEP steps
72 |   num_train_steps = 0
73 |   while num_train_steps < NUM_TRAIN_STEPS:
74 |     for center_words, target_words in tfe.Iterator(dataset):
75 |       if num_train_steps >= NUM_TRAIN_STEPS:
76 |         break
77 |       loss_batch, grads = grad_fn(center_words, target_words)
78 |       total_loss += loss_batch
79 |       optimizer.apply_gradients(grads)
80 |       if (num_train_steps + 1) % SKIP_STEP == 0:
81 |         print('Average loss at step {}: {:5.1f}'.format(
82 |                 num_train_steps, total_loss / SKIP_STEP))
83 |         total_loss = 0.0
84 |       num_train_steps += 1
85 | 
86 | 
87 | if __name__ == '__main__':
88 |     main()
89 | 


--------------------------------------------------------------------------------
/Tensorflow/TF_README.md:
--------------------------------------------------------------------------------
 1 | # Deep Learning NLP Tutorial from Scratch
 2 | 
 3 | *Feedback: sungjin7127@gmail.com*
 4 | 
 5 | # *To-Do*
 6 | 
 7 | ##Tensorflow Tutorial (1.4 or Later)
 8 | 
 9 | * [Conversation-Tensorflow](https://github.com/DoungjunLee/conversation-tensorflow)
10 | * [Hvass Tensorflow Tutorial](https://github.com/Hvass-Labs/TensorFlow-Tutorials)
11 | 


--------------------------------------------------------------------------------
/Tensorflow/standford_example/02_lazy_loading.py:
--------------------------------------------------------------------------------
 1 | """ Example of lazy vs normal loading
 2 | Created by Chip Huyen (chiphuyen@cs.stanford.edu)
 3 | CS20: "TensorFlow for Deep Learning Research"
 4 | cs20.stanford.edu
 5 | Lecture 02
 6 | """
 7 | import os
 8 | os.environ['TF_CPP_MIN_LOG_LEVEL']='2'
 9 | 
10 | import tensorflow as tf 
11 | 
12 | ######################################## 
13 | ## NORMAL LOADING   			      ##
14 | ## print out a graph with 1 Add node  ## 
15 | ########################################
16 | 
17 | x = tf.Variable(10, name='x')
18 | y = tf.Variable(20, name='y')
19 | z = tf.add(x, y)
20 | 
21 | with tf.Session() as sess:
22 | 	sess.run(tf.global_variables_initializer())
23 | 	writer = tf.summary.FileWriter('graphs/normal_loading', sess.graph)
24 | 	for _ in range(10):
25 | 		sess.run(z)
26 | 	print(tf.get_default_graph().as_graph_def())
27 | 	writer.close()
28 | 
29 | ######################################## 
30 | ## LAZY LOADING   					  ##
31 | ## print out a graph with 10 Add nodes## 
32 | ########################################
33 | 
34 | x = tf.Variable(10, name='x')
35 | y = tf.Variable(20, name='y')
36 | 
37 | with tf.Session() as sess:
38 | 	sess.run(tf.global_variables_initializer())
39 | 	writer = tf.summary.FileWriter('graphs/lazy_loading', sess.graph)
40 | 	for _ in range(10):
41 | 		sess.run(tf.add(x, y))
42 | 	print(tf.get_default_graph().as_graph_def()) 
43 | 	writer.close()


--------------------------------------------------------------------------------
/Tensorflow/standford_example/02_placeholder.py:
--------------------------------------------------------------------------------
 1 | """ Placeholder and feed_dict example
 2 | Created by Chip Huyen (chiphuyen@cs.stanford.edu)
 3 | CS20: "TensorFlow for Deep Learning Research"
 4 | cs20.stanford.edu
 5 | Lecture 02
 6 | """
 7 | import os
 8 | os.environ['TF_CPP_MIN_LOG_LEVEL']='2'
 9 | 
10 | import tensorflow as tf
11 | 
12 | # Example 1: feed_dict with placeholder
13 | 
14 | # a is a placeholderfor a vector of 3 elements, type tf.float32
15 | a = tf.placeholder(tf.float32, shape=[3])
16 | b = tf.constant([5, 5, 5], tf.float32)
17 | 
18 | # use the placeholder as you would a constant
19 | c = a + b  # short for tf.add(a, b)
20 | 
21 | writer = tf.summary.FileWriter('graphs/placeholders', tf.get_default_graph())
22 | with tf.Session() as sess:
23 |     # compute the value of c given the value of a is [1, 2, 3]
24 |     print(sess.run(c, {a: [1, 2, 3]}))                 # [6. 7. 8.]
25 | writer.close()
26 | 
27 | 
28 | # Example 2: feed_dict with variables
29 | a = tf.add(2, 5)
30 | b = tf.multiply(a, 3)
31 | 
32 | with tf.Session() as sess:
33 |     print(sess.run(b))                                 # >> 21
34 |     # compute the value of b given the value of a is 15
35 |     print(sess.run(b, feed_dict={a: 15}))              # >> 45


--------------------------------------------------------------------------------
/Tensorflow/standford_example/02_simple_tf.py:
--------------------------------------------------------------------------------
 1 | """ Simple TensorFlow's ops
 2 | Created by Chip Huyen (chiphuyen@cs.stanford.edu)
 3 | CS20: "TensorFlow for Deep Learning Research"
 4 | cs20.stanford.edu
 5 | """
 6 | import os
 7 | os.environ['TF_CPP_MIN_LOG_LEVEL']='2'
 8 | 
 9 | import numpy as np
10 | import tensorflow as tf
11 | 
12 | # Example 1: Simple ways to create log file writer
13 | a = tf.constant(2, name='a')
14 | b = tf.constant(3, name='b')
15 | x = tf.add(a, b, name='add')
16 | writer = tf.summary.FileWriter('./graphs/simple', tf.get_default_graph()) 
17 | with tf.Session() as sess:
18 |     # writer = tf.summary.FileWriter('./graphs', sess.graph) 
19 |     print(sess.run(x))
20 | writer.close() # close the writer when you’re done using it
21 | 
22 | # Example 2: The wonderful wizard of div
23 | a = tf.constant([2, 2], name='a')
24 | b = tf.constant([[0, 1], [2, 3]], name='b')
25 | 
26 | with tf.Session() as sess:
27 |     print(sess.run(tf.div(b, a)))
28 |     print(sess.run(tf.divide(b, a)))
29 |     print(sess.run(tf.truediv(b, a)))
30 |     print(sess.run(tf.floordiv(b, a)))
31 |     # print(sess.run(tf.realdiv(b, a)))
32 |     print(sess.run(tf.truncatediv(b, a)))
33 |     print(sess.run(tf.floor_div(b, a)))
34 | 
35 | # Example 3: multiplying tensors
36 | a = tf.constant([10, 20], name='a')
37 | b = tf.constant([2, 3], name='b')
38 | 
39 | with tf.Session() as sess:
40 |     print(sess.run(tf.multiply(a, b)))
41 |     print(sess.run(tf.tensordot(a, b, 1)))
42 | 
43 | # Example 4: Python native type
44 | t_0 = 19 
45 | x = tf.zeros_like(t_0) 					# ==> 0
46 | y = tf.ones_like(t_0) 					# ==> 1
47 | 
48 | t_1 = ['apple', 'peach', 'banana']
49 | x = tf.zeros_like(t_1) 					# ==> ['' '' '']
50 | # y = tf.ones_like(t_1) 				# ==> TypeError: Expected string, got 1 of type 'int' instead.
51 | 
52 | t_2 = [[True, False, False],
53 |        [False, False, True],
54 |        [False, True, False]] 
55 | x = tf.zeros_like(t_2) 					# ==> 3x3 tensor, all elements are False
56 | y = tf.ones_like(t_2) 					# ==> 3x3 tensor, all elements are True
57 | 
58 | print(tf.int32.as_numpy_dtype())
59 | 
60 | # Example 5: printing your graph's definition
61 | my_const = tf.constant([1.0, 2.0], name='my_const')
62 | print(tf.get_default_graph().as_graph_def())


--------------------------------------------------------------------------------
/Tensorflow/standford_example/02_variables.py:
--------------------------------------------------------------------------------
 1 | """ Variable exmaples
 2 | Created by Chip Huyen (chiphuyen@cs.stanford.edu)
 3 | CS20: "TensorFlow for Deep Learning Research"
 4 | cs20.stanford.edu
 5 | Lecture 02
 6 | """
 7 | import os
 8 | os.environ['TF_CPP_MIN_LOG_LEVEL']='2'
 9 | 
10 | import numpy as np
11 | import tensorflow as tf
12 | 
13 | # Example 1: creating variables
14 | s = tf.Variable(2, name='scalar') 
15 | m = tf.Variable([[0, 1], [2, 3]], name='matrix') 
16 | W = tf.Variable(tf.zeros([784,10]), name='big_matrix')
17 | V = tf.Variable(tf.truncated_normal([784, 10]), name='normal_matrix')
18 | 
19 | s = tf.get_variable('scalar', initializer=tf.constant(2)) 
20 | m = tf.get_variable('matrix', initializer=tf.constant([[0, 1], [2, 3]]))
21 | W = tf.get_variable('big_matrix', shape=(784, 10), initializer=tf.zeros_initializer())
22 | V = tf.get_variable('normal_matrix', shape=(784, 10), initializer=tf.truncated_normal_initializer())
23 | 
24 | with tf.Session() as sess:
25 |     sess.run(tf.global_variables_initializer())
26 |     print(V.eval())
27 | 
28 | # Example 2: assigning values to variables
29 | W = tf.Variable(10)
30 | W.assign(100)
31 | with tf.Session() as sess:
32 |     sess.run(W.initializer)
33 |     print(sess.run(W))                    	# >> 10
34 | 
35 | W = tf.Variable(10)
36 | assign_op = W.assign(100)
37 | with tf.Session() as sess:
38 |     sess.run(assign_op)
39 |     print(W.eval())                     	# >> 100
40 | 
41 | # create a variable whose original value is 2
42 | a = tf.get_variable('scalar', initializer=tf.constant(2)) 
43 | a_times_two = a.assign(a * 2)
44 | with tf.Session() as sess:
45 |     sess.run(tf.global_variables_initializer()) 
46 |     sess.run(a_times_two)                 	# >> 4
47 |     sess.run(a_times_two)                 	# >> 8
48 |     sess.run(a_times_two)                 	# >> 16
49 | 
50 | W = tf.Variable(10)
51 | with tf.Session() as sess:
52 |     sess.run(W.initializer)
53 |     print(sess.run(W.assign_add(10)))     	# >> 20
54 |     print(sess.run(W.assign_sub(2)))     	# >> 18
55 | 
56 | # Example 3: Each session has its own copy of variable
57 | W = tf.Variable(10)
58 | sess1 = tf.Session()
59 | sess2 = tf.Session()
60 | sess1.run(W.initializer)
61 | sess2.run(W.initializer)
62 | print(sess1.run(W.assign_add(10)))        	# >> 20
63 | print(sess2.run(W.assign_sub(2)))        	# >> 8
64 | print(sess1.run(W.assign_add(100)))        	# >> 120
65 | print(sess2.run(W.assign_sub(50)))        	# >> -42
66 | sess1.close()
67 | sess2.close()
68 | 
69 | # Example 4: create a variable with the initial value depending on another variable
70 | W = tf.Variable(tf.truncated_normal([700, 10]))
71 | U = tf.Variable(W * 2)


--------------------------------------------------------------------------------
/Tensorflow/standford_example/03_linreg_dataset.py:
--------------------------------------------------------------------------------
 1 | """ Solution for simple linear regression example using tf.data
 2 | Created by Chip Huyen (chiphuyen@cs.stanford.edu)
 3 | CS20: "TensorFlow for Deep Learning Research"
 4 | cs20.stanford.edu
 5 | Lecture 03
 6 | """
 7 | import os
 8 | os.environ['TF_CPP_MIN_LOG_LEVEL']='2'
 9 | import time
10 | 
11 | import numpy as np
12 | import matplotlib.pyplot as plt
13 | import tensorflow as tf
14 | 
15 | import utils
16 | 
17 | DATA_FILE = 'data/birth_life_2010.txt'
18 | 
19 | # Step 1: read in the data
20 | data, n_samples = utils.read_birth_life_data(DATA_FILE)
21 | 
22 | # Step 2: create Dataset and iterator
23 | dataset = tf.data.Dataset.from_tensor_slices((data[:,0], data[:,1]))
24 | 
25 | iterator = dataset.make_initializable_iterator()
26 | X, Y = iterator.get_next()
27 | 
28 | # Step 3: create weight and bias, initialized to 0
29 | w = tf.get_variable('weights', initializer=tf.constant(0.0))
30 | b = tf.get_variable('bias', initializer=tf.constant(0.0))
31 | 
32 | # Step 4: build model to predict Y
33 | Y_predicted = X * w + b
34 | 
35 | # Step 5: use the square error as the loss function
36 | loss = tf.square(Y - Y_predicted, name='loss')
37 | # loss = utils.huber_loss(Y, Y_predicted)
38 | 
39 | # Step 6: using gradient descent with learning rate of 0.001 to minimize loss
40 | optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.001).minimize(loss)
41 | 
42 | start = time.time()
43 | with tf.Session() as sess:
44 |     # Step 7: initialize the necessary variables, in this case, w and b
45 |     sess.run(tf.global_variables_initializer()) 
46 |     writer = tf.summary.FileWriter('./graphs/linear_reg', sess.graph)
47 |     
48 |     # Step 8: train the model for 100 epochs
49 |     for i in range(100):
50 |         sess.run(iterator.initializer) # initialize the iterator
51 |         total_loss = 0
52 |         try:
53 |             while True:
54 |                 _, l = sess.run([optimizer, loss]) 
55 |                 total_loss += l
56 |         except tf.errors.OutOfRangeError:
57 |             pass
58 |             
59 |         print('Epoch {0}: {1}'.format(i, total_loss/n_samples))
60 | 
61 |     # close the writer when you're done using it
62 |     writer.close() 
63 |     
64 |     # Step 9: output the values of w and b
65 |     w_out, b_out = sess.run([w, b]) 
66 |     print('w: %f, b: %f' %(w_out, b_out))
67 | print('Took: %f seconds' %(time.time() - start))
68 | 
69 | # plot the results
70 | plt.plot(data[:,0], data[:,1], 'bo', label='Real data')
71 | plt.plot(data[:,0], data[:,0] * w_out + b_out, 'r', label='Predicted data with squared error')
72 | # plt.plot(data[:,0], data[:,0] * (-5.883589) + 85.124306, 'g', label='Predicted data with Huber loss')
73 | plt.legend()
74 | plt.show()


--------------------------------------------------------------------------------
/Tensorflow/standford_example/03_linreg_placeholder.py:
--------------------------------------------------------------------------------
 1 | """ Solution for simple linear regression example using placeholders
 2 | Created by Chip Huyen (chiphuyen@cs.stanford.edu)
 3 | CS20: "TensorFlow for Deep Learning Research"
 4 | cs20.stanford.edu
 5 | Lecture 03
 6 | """
 7 | import os
 8 | os.environ['TF_CPP_MIN_LOG_LEVEL']='2'
 9 | import time
10 | 
11 | import numpy as np
12 | import matplotlib.pyplot as plt
13 | import tensorflow as tf
14 | 
15 | import utils
16 | 
17 | DATA_FILE = 'data/birth_life_2010.txt'
18 | 
19 | # Step 1: read in data from the .txt file
20 | data, n_samples = utils.read_birth_life_data(DATA_FILE)
21 | 
22 | # Step 2: create placeholders for X (birth rate) and Y (life expectancy)
23 | X = tf.placeholder(tf.float32, name='X')
24 | Y = tf.placeholder(tf.float32, name='Y')
25 | 
26 | # Step 3: create weight and bias, initialized to 0
27 | w = tf.get_variable('weights', initializer=tf.constant(0.0))
28 | b = tf.get_variable('bias', initializer=tf.constant(0.0))
29 | 
30 | # Step 4: build model to predict Y
31 | Y_predicted = w * X + b 
32 | 
33 | # Step 5: use the squared error as the loss function
34 | # you can use either mean squared error or Huber loss
35 | loss = tf.square(Y - Y_predicted, name='loss')
36 | # loss = utils.huber_loss(Y, Y_predicted)
37 | 
38 | # Step 6: using gradient descent with learning rate of 0.001 to minimize loss
39 | optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.001).minimize(loss)
40 | 
41 | 
42 | start = time.time()
43 | writer = tf.summary.FileWriter('./graphs/linear_reg', tf.get_default_graph())
44 | with tf.Session() as sess:
45 | 	# Step 7: initialize the necessary variables, in this case, w and b
46 | 	sess.run(tf.global_variables_initializer()) 
47 | 	
48 | 	# Step 8: train the model for 100 epochs
49 | 	for i in range(100): 
50 | 		total_loss = 0
51 | 		for x, y in data:
52 | 			# Session execute optimizer and fetch values of loss
53 | 			_, l = sess.run([optimizer, loss], feed_dict={X: x, Y:y}) 
54 | 			total_loss += l
55 | 		print('Epoch {0}: {1}'.format(i, total_loss/n_samples))
56 | 
57 | 	# close the writer when you're done using it
58 | 	writer.close() 
59 | 	
60 | 	# Step 9: output the values of w and b
61 | 	w_out, b_out = sess.run([w, b]) 
62 | 
63 | print('Took: %f seconds' %(time.time() - start))
64 | 
65 | # plot the results
66 | plt.plot(data[:,0], data[:,1], 'bo', label='Real data')
67 | plt.plot(data[:,0], data[:,0] * w_out + b_out, 'r', label='Predicted data')
68 | plt.legend()
69 | plt.show()


--------------------------------------------------------------------------------
/Tensorflow/standford_example/03_linreg_starter.py:
--------------------------------------------------------------------------------
 1 | """ Starter code for simple linear regression example using placeholders
 2 | Created by Chip Huyen (huyenn@cs.stanford.edu)
 3 | CS20: "TensorFlow for Deep Learning Research"
 4 | cs20.stanford.edu
 5 | Lecture 03
 6 | """
 7 | import os
 8 | os.environ['TF_CPP_MIN_LOG_LEVEL']='2'
 9 | import time
10 | 
11 | import numpy as np
12 | import matplotlib.pyplot as plt
13 | import tensorflow as tf
14 | 
15 | import utils
16 | 
17 | DATA_FILE = 'data/birth_life_2010.txt'
18 | 
19 | # Step 1: read in data from the .txt file
20 | data, n_samples = utils.read_birth_life_data(DATA_FILE)
21 | 
22 | # Step 2: create placeholders for X (birth rate) and Y (life expectancy)
23 | # Remember both X and Y are scalars with type float
24 | X, Y = None, None
25 | #############################
26 | ########## TO DO ############
27 | #############################
28 | 
29 | # Step 3: create weight and bias, initialized to 0.0
30 | # Make sure to use tf.get_variable
31 | w, b = None, None
32 | #############################
33 | ########## TO DO ############
34 | #############################
35 | 
36 | # Step 4: build model to predict Y
37 | # e.g. how would you derive at Y_predicted given X, w, and b
38 | Y_predicted = None
39 | #############################
40 | ########## TO DO ############
41 | #############################
42 | 
43 | # Step 5: use the square error as the loss function
44 | loss = None
45 | #############################
46 | ########## TO DO ############
47 | #############################
48 | 
49 | # Step 6: using gradient descent with learning rate of 0.001 to minimize loss
50 | optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.001).minimize(loss)
51 | 
52 | start = time.time()
53 | 
54 | # Create a filewriter to write the model's graph to TensorBoard
55 | #############################
56 | ########## TO DO ############
57 | #############################
58 | 
59 | with tf.Session() as sess:
60 |     # Step 7: initialize the necessary variables, in this case, w and b
61 |     #############################
62 |     ########## TO DO ############
63 |     #############################
64 | 
65 |     # Step 8: train the model for 100 epochs
66 |     for i in range(100):
67 |         total_loss = 0
68 |         for x, y in data:
69 |             # Execute train_op and get the value of loss.
70 |             # Don't forget to feed in data for placeholders
71 |             _, loss = ########## TO DO ############
72 |             total_loss += loss
73 | 
74 |         print('Epoch {0}: {1}'.format(i, total_loss/n_samples))
75 | 
76 |     # close the writer when you're done using it
77 |     #############################
78 |     ########## TO DO ############
79 |     #############################
80 |     writer.close()
81 |     
82 |     # Step 9: output the values of w and b
83 |     w_out, b_out = None, None
84 |     #############################
85 |     ########## TO DO ############
86 |     #############################
87 | 
88 | print('Took: %f seconds' %(time.time() - start))
89 | 
90 | # uncomment the following lines to see the plot 
91 | # plt.plot(data[:,0], data[:,1], 'bo', label='Real data')
92 | # plt.plot(data[:,0], data[:,0] * w_out + b_out, 'r', label='Predicted data')
93 | # plt.legend()
94 | # plt.show()


--------------------------------------------------------------------------------
/Tensorflow/standford_example/03_logreg.py:
--------------------------------------------------------------------------------
  1 | """ Solution for simple logistic regression model for MNIST
  2 | with tf.data module
  3 | MNIST dataset: yann.lecun.com/exdb/mnist/
  4 | Created by Chip Huyen (chiphuyen@cs.stanford.edu)
  5 | CS20: "TensorFlow for Deep Learning Research"
  6 | cs20.stanford.edu
  7 | Lecture 03
  8 | """
  9 | import os
 10 | os.environ['TF_CPP_MIN_LOG_LEVEL']='2'
 11 | 
 12 | import numpy as np
 13 | import tensorflow as tf
 14 | import time
 15 | 
 16 | import utils
 17 | 
 18 | # Define paramaters for the model
 19 | learning_rate = 0.01
 20 | batch_size = 128
 21 | n_epochs = 30
 22 | n_train = 60000
 23 | n_test = 10000
 24 | 
 25 | # Step 1: Read in data
 26 | mnist_folder = 'data/mnist'
 27 | utils.download_mnist(mnist_folder)
 28 | train, val, test = utils.read_mnist(mnist_folder, flatten=True)
 29 | 
 30 | # Step 2: Create datasets and iterator
 31 | train_data = tf.data.Dataset.from_tensor_slices(train)
 32 | train_data = train_data.shuffle(10000) # if you want to shuffle your data
 33 | train_data = train_data.batch(batch_size)
 34 | 
 35 | test_data = tf.data.Dataset.from_tensor_slices(test)
 36 | test_data = test_data.batch(batch_size)
 37 | 
 38 | iterator = tf.data.Iterator.from_structure(train_data.output_types, 
 39 |                                            train_data.output_shapes)
 40 | img, label = iterator.get_next()
 41 | 
 42 | train_init = iterator.make_initializer(train_data)	# initializer for train_data
 43 | test_init = iterator.make_initializer(test_data)	# initializer for train_data
 44 | 
 45 | # Step 3: create weights and bias
 46 | # w is initialized to random variables with mean of 0, stddev of 0.01
 47 | # b is initialized to 0
 48 | # shape of w depends on the dimension of X and Y so that Y = tf.matmul(X, w)
 49 | # shape of b depends on Y
 50 | w = tf.get_variable(name='weights', shape=(784, 10), initializer=tf.random_normal_initializer(0, 0.01))
 51 | b = tf.get_variable(name='bias', shape=(1, 10), initializer=tf.zeros_initializer())
 52 | 
 53 | # Step 4: build model
 54 | # the model that returns the logits.
 55 | # this logits will be later passed through softmax layer
 56 | logits = tf.matmul(img, w) + b 
 57 | 
 58 | # Step 5: define loss function
 59 | # use cross entropy of softmax of logits as the loss function
 60 | entropy = tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=label, name='entropy')
 61 | loss = tf.reduce_mean(entropy, name='loss') # computes the mean over all the examples in the batch
 62 | 
 63 | # Step 6: define training op
 64 | # using gradient descent with learning rate of 0.01 to minimize loss
 65 | optimizer = tf.train.AdamOptimizer(learning_rate).minimize(loss)
 66 | 
 67 | # Step 7: calculate accuracy with test set
 68 | preds = tf.nn.softmax(logits)
 69 | correct_preds = tf.equal(tf.argmax(preds, 1), tf.argmax(label, 1))
 70 | accuracy = tf.reduce_sum(tf.cast(correct_preds, tf.float32))
 71 | 
 72 | writer = tf.summary.FileWriter('./graphs/logreg', tf.get_default_graph())
 73 | with tf.Session() as sess:
 74 |    
 75 |     start_time = time.time()
 76 |     sess.run(tf.global_variables_initializer())
 77 | 
 78 |     # train the model n_epochs times
 79 |     for i in range(n_epochs): 	
 80 |         sess.run(train_init)	# drawing samples from train_data
 81 |         total_loss = 0
 82 |         n_batches = 0
 83 |         try:
 84 |             while True:
 85 |                 _, l = sess.run([optimizer, loss])
 86 |                 total_loss += l
 87 |                 n_batches += 1
 88 |         except tf.errors.OutOfRangeError:
 89 |             pass
 90 |         print('Average loss epoch {0}: {1}'.format(i, total_loss/n_batches))
 91 |     print('Total time: {0} seconds'.format(time.time() - start_time))
 92 | 
 93 |     # test the model
 94 |     sess.run(test_init)			# drawing samples from test_data
 95 |     total_correct_preds = 0
 96 |     try:
 97 |         while True:
 98 |             accuracy_batch = sess.run(accuracy)
 99 |             total_correct_preds += accuracy_batch
100 |     except tf.errors.OutOfRangeError:
101 |         pass
102 | 
103 |     print('Accuracy {0}'.format(total_correct_preds/n_test))
104 | writer.close()
105 | 


--------------------------------------------------------------------------------
/Tensorflow/standford_example/03_logreg_placeholder.py:
--------------------------------------------------------------------------------
 1 | """ Solution for simple logistic regression model for MNIST
 2 | with placeholder
 3 | MNIST dataset: yann.lecun.com/exdb/mnist/
 4 | Created by Chip Huyen (huyenn@cs.stanford.edu)
 5 | CS20: "TensorFlow for Deep Learning Research"
 6 | cs20.stanford.edu
 7 | Lecture 03
 8 | """
 9 | import os
10 | os.environ['TF_CPP_MIN_LOG_LEVEL']='2'
11 | 
12 | import numpy as np
13 | import tensorflow as tf
14 | from tensorflow.examples.tutorials.mnist import input_data
15 | import time
16 | 
17 | import utils
18 | 
19 | # Define paramaters for the model
20 | learning_rate = 0.01
21 | batch_size = 128
22 | n_epochs = 30
23 | 
24 | # Step 1: Read in data
25 | # using TF Learn's built in function to load MNIST data to the folder data/mnist
26 | mnist = input_data.read_data_sets('data/mnist', one_hot=True)
27 | X_batch, Y_batch = mnist.train.next_batch(batch_size)
28 | 
29 | # Step 2: create placeholders for features and labels
30 | # each image in the MNIST data is of shape 28*28 = 784
31 | # therefore, each image is represented with a 1x784 tensor
32 | # there are 10 classes for each image, corresponding to digits 0 - 9. 
33 | # each lable is one hot vector.
34 | X = tf.placeholder(tf.float32, [batch_size, 784], name='image') 
35 | Y = tf.placeholder(tf.int32, [batch_size, 10], name='label')
36 | 
37 | # Step 3: create weights and bias
38 | # w is initialized to random variables with mean of 0, stddev of 0.01
39 | # b is initialized to 0
40 | # shape of w depends on the dimension of X and Y so that Y = tf.matmul(X, w)
41 | # shape of b depends on Y
42 | w = tf.get_variable(name='weights', shape=(784, 10), initializer=tf.random_normal_initializer())
43 | b = tf.get_variable(name='bias', shape=(1, 10), initializer=tf.zeros_initializer())
44 | 
45 | # Step 4: build model
46 | # the model that returns the logits.
47 | # this logits will be later passed through softmax layer
48 | logits = tf.matmul(X, w) + b 
49 | 
50 | # Step 5: define loss function
51 | # use cross entropy of softmax of logits as the loss function
52 | entropy = tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=Y, name='loss')
53 | loss = tf.reduce_mean(entropy) # computes the mean over all the examples in the batch
54 | # loss = tf.reduce_mean(-tf.reduce_sum(tf.nn.softmax(logits) * tf.log(Y), reduction_indices=[1]))
55 | 
56 | # Step 6: define training op
57 | # using gradient descent with learning rate of 0.01 to minimize loss
58 | optimizer = tf.train.AdamOptimizer(learning_rate).minimize(loss)
59 | 
60 | # Step 7: calculate accuracy with test set
61 | preds = tf.nn.softmax(logits)
62 | correct_preds = tf.equal(tf.argmax(preds, 1), tf.argmax(Y, 1))
63 | accuracy = tf.reduce_sum(tf.cast(correct_preds, tf.float32))
64 | 
65 | writer = tf.summary.FileWriter('./graphs/logreg_placeholder', tf.get_default_graph())
66 | with tf.Session() as sess:
67 | 	start_time = time.time()
68 | 	sess.run(tf.global_variables_initializer())	
69 | 	n_batches = int(mnist.train.num_examples/batch_size)
70 | 	
71 | 	# train the model n_epochs times
72 | 	for i in range(n_epochs): 
73 | 		total_loss = 0
74 | 
75 | 		for j in range(n_batches):
76 | 			X_batch, Y_batch = mnist.train.next_batch(batch_size)
77 | 			_, loss_batch = sess.run([optimizer, loss], {X: X_batch, Y:Y_batch}) 
78 | 			total_loss += loss_batch
79 | 		print('Average loss epoch {0}: {1}'.format(i, total_loss/n_batches))
80 | 	print('Total time: {0} seconds'.format(time.time() - start_time))
81 | 
82 | 	# test the model
83 | 	n_batches = int(mnist.test.num_examples/batch_size)
84 | 	total_correct_preds = 0
85 | 
86 | 	for i in range(n_batches):
87 | 		X_batch, Y_batch = mnist.test.next_batch(batch_size)
88 | 		accuracy_batch = sess.run(accuracy, {X: X_batch, Y:Y_batch})
89 | 		total_correct_preds += accuracy_batch	
90 | 
91 | 	print('Accuracy {0}'.format(total_correct_preds/mnist.test.num_examples))
92 | 
93 | writer.close()
94 | 


--------------------------------------------------------------------------------
/Tensorflow/standford_example/03_logreg_starter.py:
--------------------------------------------------------------------------------
  1 | """ Starter code for simple logistic regression model for MNIST
  2 | with tf.data module
  3 | MNIST dataset: yann.lecun.com/exdb/mnist/
  4 | Created by Chip Huyen (chiphuyen@cs.stanford.edu)
  5 | CS20: "TensorFlow for Deep Learning Research"
  6 | cs20.stanford.edu
  7 | Lecture 03
  8 | """
  9 | import os
 10 | os.environ['TF_CPP_MIN_LOG_LEVEL']='2'
 11 | 
 12 | import numpy as np
 13 | import tensorflow as tf
 14 | import time
 15 | 
 16 | import utils
 17 | 
 18 | # Define paramaters for the model
 19 | learning_rate = 0.01
 20 | batch_size = 128
 21 | n_epochs = 30
 22 | n_train = 60000
 23 | n_test = 10000
 24 | 
 25 | # Step 1: Read in data
 26 | mnist_folder = 'data/mnist'
 27 | utils.download_mnist(mnist_folder)
 28 | train, val, test = utils.read_mnist(mnist_folder, flatten=True)
 29 | 
 30 | # Step 2: Create datasets and iterator
 31 | # create training Dataset and batch it
 32 | train_data = tf.data.Dataset.from_tensor_slices(train)
 33 | train_data = train_data.shuffle(10000) # if you want to shuffle your data
 34 | train_data = train_data.batch(batch_size)
 35 | 
 36 | # create testing Dataset and batch it
 37 | test_data = None
 38 | #############################
 39 | ########## TO DO ############
 40 | #############################
 41 | 
 42 | 
 43 | # create one iterator and initialize it with different datasets
 44 | iterator = tf.data.Iterator.from_structure(train_data.output_types, 
 45 |                                            train_data.output_shapes)
 46 | img, label = iterator.get_next()
 47 | 
 48 | train_init = iterator.make_initializer(train_data)	# initializer for train_data
 49 | test_init = iterator.make_initializer(test_data)	# initializer for train_data
 50 | 
 51 | # Step 3: create weights and bias
 52 | # w is initialized to random variables with mean of 0, stddev of 0.01
 53 | # b is initialized to 0
 54 | # shape of w depends on the dimension of X and Y so that Y = tf.matmul(X, w)
 55 | # shape of b depends on Y
 56 | w, b = None, None
 57 | #############################
 58 | ########## TO DO ############
 59 | #############################
 60 | 
 61 | 
 62 | # Step 4: build model
 63 | # the model that returns the logits.
 64 | # this logits will be later passed through softmax layer
 65 | logits = None
 66 | #############################
 67 | ########## TO DO ############
 68 | #############################
 69 | 
 70 | 
 71 | # Step 5: define loss function
 72 | # use cross entropy of softmax of logits as the loss function
 73 | loss = None
 74 | #############################
 75 | ########## TO DO ############
 76 | #############################
 77 | 
 78 | 
 79 | # Step 6: define optimizer
 80 | # using Adamn Optimizer with pre-defined learning rate to minimize loss
 81 | optimizer = None
 82 | #############################
 83 | ########## TO DO ############
 84 | #############################
 85 | 
 86 | 
 87 | # Step 7: calculate accuracy with test set
 88 | preds = tf.nn.softmax(logits)
 89 | correct_preds = tf.equal(tf.argmax(preds, 1), tf.argmax(label, 1))
 90 | accuracy = tf.reduce_sum(tf.cast(correct_preds, tf.float32))
 91 | 
 92 | writer = tf.summary.FileWriter('./graphs/logreg', tf.get_default_graph())
 93 | with tf.Session() as sess:
 94 |    
 95 |     start_time = time.time()
 96 |     sess.run(tf.global_variables_initializer())
 97 | 
 98 |     # train the model n_epochs times
 99 |     for i in range(n_epochs): 	
100 |         sess.run(train_init)	# drawing samples from train_data
101 |         total_loss = 0
102 |         n_batches = 0
103 |         try:
104 |             while True:
105 |                 _, l = sess.run([optimizer, loss])
106 |                 total_loss += l
107 |                 n_batches += 1
108 |         except tf.errors.OutOfRangeError:
109 |             pass
110 |         print('Average loss epoch {0}: {1}'.format(i, total_loss/n_batches))
111 |     print('Total time: {0} seconds'.format(time.time() - start_time))
112 | 
113 |     # test the model
114 |     sess.run(test_init)			# drawing samples from test_data
115 |     total_correct_preds = 0
116 |     try:
117 |         while True:
118 |             accuracy_batch = sess.run(accuracy)
119 |             total_correct_preds += accuracy_batch
120 |     except tf.errors.OutOfRangeError:
121 |         pass
122 | 
123 |     print('Accuracy {0}'.format(total_correct_preds/n_test))
124 | writer.close()


--------------------------------------------------------------------------------
/Tensorflow/standford_example/04_linreg_eager.py:
--------------------------------------------------------------------------------
 1 | """ Starter code for a simple regression example using eager execution.
 2 | Created by Akshay Agrawal (akshayka@cs.stanford.edu)
 3 | CS20: "TensorFlow for Deep Learning Research"
 4 | cs20.stanford.edu
 5 | Lecture 04
 6 | """
 7 | import time
 8 | 
 9 | import tensorflow as tf
10 | import tensorflow.contrib.eager as tfe
11 | import matplotlib.pyplot as plt
12 | 
13 | import utils
14 | 
15 | DATA_FILE = 'data/birth_life_2010.txt'
16 | 
17 | # In order to use eager execution, `tfe.enable_eager_execution()` must be
18 | # called at the very beginning of a TensorFlow program.
19 | tfe.enable_eager_execution()
20 | 
21 | # Read the data into a dataset.
22 | data, n_samples = utils.read_birth_life_data(DATA_FILE)
23 | dataset = tf.data.Dataset.from_tensor_slices((data[:,0], data[:,1]))
24 | 
25 | # Create variables.
26 | w = tfe.Variable(0.0)
27 | b = tfe.Variable(0.0)
28 | 
29 | # Define the linear predictor.
30 | def prediction(x):
31 |   return x * w + b
32 | 
33 | # Define loss functions of the form: L(y, y_predicted)
34 | def squared_loss(y, y_predicted):
35 |   return (y - y_predicted) ** 2
36 | 
37 | def huber_loss(y, y_predicted, m=1.0):
38 |   """Huber loss."""
39 |   t = y - y_predicted
40 |   # Note that enabling eager execution lets you use Python control flow and
41 |   # specificy dynamic TensorFlow computations. Contrast this implementation
42 |   # to the graph-construction one found in `utils`, which uses `tf.cond`.
43 |   return t ** 2 if tf.abs(t) <= m else m * (2 * tf.abs(t) - m)
44 | 
45 | def train(loss_fn):
46 |   """Train a regression model evaluated using `loss_fn`."""
47 |   print('Training; loss function: ' + loss_fn.__name__)
48 |   optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.01)
49 | 
50 |   # Define the function through which to differentiate.
51 |   def loss_for_example(x, y):
52 |     return loss_fn(y, prediction(x))
53 | 
54 |   # `grad_fn(x_i, y_i)` returns (1) the value of `loss_for_example`
55 |   # evaluated at `x_i`, `y_i` and (2) the gradients of any variables used in
56 |   # calculating it.
57 |   grad_fn = tfe.implicit_value_and_gradients(loss_for_example)
58 | 
59 |   start = time.time()
60 |   for epoch in range(100):
61 |     total_loss = 0.0
62 |     for x_i, y_i in tfe.Iterator(dataset):
63 |       loss, gradients = grad_fn(x_i, y_i)
64 |       # Take an optimization step and update variables.
65 |       optimizer.apply_gradients(gradients)
66 |       total_loss += loss
67 |     if epoch % 10 == 0:
68 |       print('Epoch {0}: {1}'.format(epoch, total_loss / n_samples))
69 |   print('Took: %f seconds' % (time.time() - start))
70 |   print('Eager execution exhibits significant overhead per operation. '
71 |         'As you increase your batch size, the impact of the overhead will '
72 |         'become less noticeable. Eager execution is under active development: '
73 |         'expect performance to increase substantially in the near future!')
74 | 
75 | train(huber_loss)
76 | plt.plot(data[:,0], data[:,1], 'bo')
77 | # The `.numpy()` method of a tensor retrieves the NumPy array backing it.
78 | # In future versions of eager, you won't need to call `.numpy()` and will
79 | # instead be able to, in most cases, pass Tensors wherever NumPy arrays are
80 | # expected.
81 | plt.plot(data[:,0], data[:,0] * w.numpy() + b.numpy(), 'r',
82 |          label="huber regression")
83 | plt.legend()
84 | plt.show()
85 | 


--------------------------------------------------------------------------------
/Tensorflow/standford_example/04_linreg_eager_starter.py:
--------------------------------------------------------------------------------
  1 | """ Starter code for a simple regression example using eager execution.
  2 | Created by Akshay Agrawal (akshayka@cs.stanford.edu)
  3 | CS20: "TensorFlow for Deep Learning Research"
  4 | cs20.stanford.edu
  5 | Lecture 04
  6 | """
  7 | import time
  8 | 
  9 | import tensorflow as tf
 10 | import tensorflow.contrib.eager as tfe
 11 | import matplotlib.pyplot as plt
 12 | 
 13 | import utils
 14 | 
 15 | DATA_FILE = 'data/birth_life_2010.txt'
 16 | 
 17 | # In order to use eager execution, `tfe.enable_eager_execution()` must be
 18 | # called at the very beginning of a TensorFlow program.
 19 | #############################
 20 | ########## TO DO ############
 21 | #############################
 22 | 
 23 | # Read the data into a dataset.
 24 | data, n_samples = utils.read_birth_life_data(DATA_FILE)
 25 | dataset = tf.data.Dataset.from_tensor_slices((data[:,0], data[:,1]))
 26 | 
 27 | # Create weight and bias variables, initialized to 0.0.
 28 | #############################
 29 | ########## TO DO ############
 30 | #############################
 31 | w = None
 32 | b = None
 33 | 
 34 | # Define the linear predictor.
 35 | def prediction(x):
 36 |   #############################
 37 |   ########## TO DO ############
 38 |   #############################
 39 |   pass
 40 | 
 41 | # Define loss functions of the form: L(y, y_predicted)
 42 | def squared_loss(y, y_predicted):
 43 |   #############################
 44 |   ########## TO DO ############
 45 |   #############################
 46 |   pass
 47 | 
 48 | def huber_loss(y, y_predicted):
 49 |   """Huber loss with `m` set to `1.0`."""
 50 |   #############################
 51 |   ########## TO DO ############
 52 |   #############################
 53 |   pass
 54 | 
 55 | def train(loss_fn):
 56 |   """Train a regression model evaluated using `loss_fn`."""
 57 |   print('Training; loss function: ' + loss_fn.__name__)
 58 |   optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.01)
 59 | 
 60 |   # Define the function through which to differentiate.
 61 |   #############################
 62 |   ########## TO DO ############
 63 |   #############################
 64 |   def loss_for_example(x, y):
 65 |     pass
 66 | 
 67 |   # Obtain a gradients function using `tfe.implicit_value_and_gradients`.
 68 |   #############################
 69 |   ########## TO DO ############
 70 |   #############################
 71 |   grad_fn = None
 72 | 
 73 |   start = time.time()
 74 |   for epoch in range(100):
 75 |     total_loss = 0.0
 76 |     for x_i, y_i in tfe.Iterator(dataset):
 77 |       # Compute the loss and gradient, and take an optimization step.
 78 |       #############################
 79 |       ########## TO DO ############
 80 |       #############################
 81 |       optimizer.apply_gradients(gradients)
 82 |       total_loss += loss
 83 |     if epoch % 10 == 0:
 84 |       print('Epoch {0}: {1}'.format(epoch, total_loss / n_samples))
 85 |   print('Took: %f seconds' % (time.time() - start))
 86 |   print('Eager execution exhibits significant overhead per operation. '
 87 |         'As you increase your batch size, the impact of the overhead will '
 88 |         'become less noticeable. Eager execution is under active development: '
 89 |         'expect performance to increase substantially in the near future!')
 90 | 
 91 | train(huber_loss)
 92 | plt.plot(data[:,0], data[:,1], 'bo')
 93 | # The `.numpy()` method of a tensor retrieves the NumPy array backing it.
 94 | # In future versions of eager, you won't need to call `.numpy()` and will
 95 | # instead be able to, in most cases, pass Tensors wherever NumPy arrays are
 96 | # expected.
 97 | plt.plot(data[:,0], data[:,0] * w.numpy() + b.numpy(), 'r',
 98 |          label="huber regression")
 99 | plt.legend()
100 | plt.show()
101 | 


--------------------------------------------------------------------------------
/Tensorflow/standford_example/04_word2vec.py:
--------------------------------------------------------------------------------
  1 | """ starter code for word2vec skip-gram model with NCE loss
  2 | CS 20: "TensorFlow for Deep Learning Research"
  3 | cs20.stanford.edu
  4 | Chip Huyen (chiphuyen@cs.stanford.edu)
  5 | Lecture 04
  6 | """
  7 | 
  8 | import os
  9 | os.environ['TF_CPP_MIN_LOG_LEVEL']='2'
 10 | 
 11 | import numpy as np
 12 | from tensorflow.contrib.tensorboard.plugins import projector
 13 | import tensorflow as tf
 14 | 
 15 | import utils
 16 | import word2vec_utils
 17 | 
 18 | # Model hyperparameters
 19 | VOCAB_SIZE = 50000
 20 | BATCH_SIZE = 128
 21 | EMBED_SIZE = 128            # dimension of the word embedding vectors
 22 | SKIP_WINDOW = 1             # the context window
 23 | NUM_SAMPLED = 64            # number of negative examples to sample
 24 | LEARNING_RATE = 1.0
 25 | NUM_TRAIN_STEPS = 100000
 26 | VISUAL_FLD = 'visualization'
 27 | SKIP_STEP = 5000
 28 | 
 29 | # Parameters for downloading data
 30 | DOWNLOAD_URL = 'http://mattmahoney.net/dc/text8.zip'
 31 | EXPECTED_BYTES = 31344016
 32 | NUM_VISUALIZE = 3000        # number of tokens to visualize
 33 | 
 34 | 
 35 | def word2vec(dataset):
 36 |     """ Build the graph for word2vec model and train it """
 37 |     # Step 1: get input, output from the dataset
 38 |     with tf.name_scope('data'):
 39 |         iterator = dataset.make_initializable_iterator()
 40 |         center_words, target_words = iterator.get_next()
 41 | 
 42 |     """ Step 2 + 3: define weights and embedding lookup.
 43 |     In word2vec, it's actually the weights that we care about 
 44 |     """
 45 |     with tf.name_scope('embed'):
 46 |         embed_matrix = tf.get_variable('embed_matrix', 
 47 |                                         shape=[VOCAB_SIZE, EMBED_SIZE],
 48 |                                         initializer=tf.random_uniform_initializer())
 49 |         embed = tf.nn.embedding_lookup(embed_matrix, center_words, name='embedding')
 50 | 
 51 |     # Step 4: construct variables for NCE loss and define loss function
 52 |     with tf.name_scope('loss'):
 53 |         nce_weight = tf.get_variable('nce_weight', shape=[VOCAB_SIZE, EMBED_SIZE],
 54 |                         initializer=tf.truncated_normal_initializer(stddev=1.0 / (EMBED_SIZE ** 0.5)))
 55 |         nce_bias = tf.get_variable('nce_bias', initializer=tf.zeros([VOCAB_SIZE]))
 56 | 
 57 |         # define loss function to be NCE loss function
 58 |         loss = tf.reduce_mean(tf.nn.nce_loss(weights=nce_weight, 
 59 |                                             biases=nce_bias, 
 60 |                                             labels=target_words, 
 61 |                                             inputs=embed, 
 62 |                                             num_sampled=NUM_SAMPLED, 
 63 |                                             num_classes=VOCAB_SIZE), name='loss')
 64 | 
 65 |     # Step 5: define optimizer
 66 |     with tf.name_scope('optimizer'):
 67 |         optimizer = tf.train.GradientDescentOptimizer(LEARNING_RATE).minimize(loss)
 68 |     
 69 |     utils.safe_mkdir('checkpoints')
 70 | 
 71 |     with tf.Session() as sess:
 72 |         sess.run(iterator.initializer)
 73 |         sess.run(tf.global_variables_initializer())
 74 | 
 75 |         total_loss = 0.0 # we use this to calculate late average loss in the last SKIP_STEP steps
 76 |         writer = tf.summary.FileWriter('graphs/word2vec_simple', sess.graph)
 77 | 
 78 |         for index in range(NUM_TRAIN_STEPS):
 79 |             try:
 80 |                 loss_batch, _ = sess.run([loss, optimizer])
 81 |                 total_loss += loss_batch
 82 |                 if (index + 1) % SKIP_STEP == 0:
 83 |                     print('Average loss at step {}: {:5.1f}'.format(index, total_loss / SKIP_STEP))
 84 |                     total_loss = 0.0
 85 |             except tf.errors.OutOfRangeError:
 86 |                 sess.run(iterator.initializer)
 87 |         writer.close()
 88 | 
 89 | def gen():
 90 |     yield from word2vec_utils.batch_gen(DOWNLOAD_URL, EXPECTED_BYTES, VOCAB_SIZE, 
 91 |                                         BATCH_SIZE, SKIP_WINDOW, VISUAL_FLD)
 92 | 
 93 | def main():
 94 |     dataset = tf.data.Dataset.from_generator(gen, 
 95 |                                 (tf.int32, tf.int32), 
 96 |                                 (tf.TensorShape([BATCH_SIZE]), tf.TensorShape([BATCH_SIZE, 1])))
 97 |     word2vec(dataset)
 98 | 
 99 | if __name__ == '__main__':
100 |     main()
101 | 


--------------------------------------------------------------------------------
/Tensorflow/standford_example/04_word2vec_eager.py:
--------------------------------------------------------------------------------
 1 | """ starter code for word2vec skip-gram model with NCE loss
 2 | Eager execution
 3 | CS 20: "TensorFlow for Deep Learning Research"
 4 | cs20.stanford.edu
 5 | Chip Huyen (chiphuyen@cs.stanford.edu) & Akshay Agrawal (akshayka@cs.stanford.edu)
 6 | Lecture 04
 7 | """
 8 | 
 9 | import os
10 | os.environ['TF_CPP_MIN_LOG_LEVEL']='2'
11 | 
12 | import numpy as np
13 | import tensorflow as tf
14 | import tensorflow.contrib.eager as tfe
15 | 
16 | import utils
17 | import word2vec_utils
18 | 
19 | tfe.enable_eager_execution()
20 | 
21 | # Model hyperparameters
22 | VOCAB_SIZE = 50000
23 | BATCH_SIZE = 128
24 | EMBED_SIZE = 128            # dimension of the word embedding vectors
25 | SKIP_WINDOW = 1             # the context window
26 | NUM_SAMPLED = 64            # number of negative examples to sample
27 | LEARNING_RATE = 1.0
28 | NUM_TRAIN_STEPS = 100000
29 | VISUAL_FLD = 'visualization'
30 | SKIP_STEP = 5000
31 | 
32 | # Parameters for downloading data
33 | DOWNLOAD_URL = 'http://mattmahoney.net/dc/text8.zip'
34 | EXPECTED_BYTES = 31344016
35 | 
36 | class Word2Vec(object):
37 |   def __init__(self, vocab_size, embed_size, num_sampled=NUM_SAMPLED):
38 |     self.vocab_size = vocab_size
39 |     self.num_sampled = num_sampled
40 |     self.embed_matrix = tfe.Variable(tf.random_uniform(
41 |                                       [vocab_size, embed_size]))
42 |     self.nce_weight = tfe.Variable(tf.truncated_normal(
43 |                                     [vocab_size, embed_size],
44 |                                     stddev=1.0 / (embed_size ** 0.5)))
45 |     self.nce_bias = tfe.Variable(tf.zeros([vocab_size]))
46 | 
47 |   def compute_loss(self, center_words, target_words):
48 |     """Computes the forward pass of word2vec with the NCE loss.""" 
49 |     embed = tf.nn.embedding_lookup(self.embed_matrix, center_words)
50 |     loss = tf.reduce_mean(tf.nn.nce_loss(weights=self.nce_weight, 
51 |                                         biases=self.nce_bias, 
52 |                                         labels=target_words, 
53 |                                         inputs=embed, 
54 |                                         num_sampled=self.num_sampled, 
55 |                                         num_classes=self.vocab_size))
56 |     return loss
57 | 
58 | 
59 | def gen():
60 |   yield from word2vec_utils.batch_gen(DOWNLOAD_URL, EXPECTED_BYTES,
61 |                                       VOCAB_SIZE, BATCH_SIZE, SKIP_WINDOW,
62 |                                       VISUAL_FLD)
63 | 
64 | def main():
65 |   dataset = tf.data.Dataset.from_generator(gen, (tf.int32, tf.int32),
66 |                               (tf.TensorShape([BATCH_SIZE]),
67 |                               tf.TensorShape([BATCH_SIZE, 1])))
68 |   optimizer = tf.train.GradientDescentOptimizer(LEARNING_RATE)
69 |   model = Word2Vec(vocab_size=VOCAB_SIZE, embed_size=EMBED_SIZE)
70 |   grad_fn = tfe.implicit_value_and_gradients(model.compute_loss)
71 |   total_loss = 0.0  # for average loss in the last SKIP_STEP steps
72 |   num_train_steps = 0
73 |   while num_train_steps < NUM_TRAIN_STEPS:
74 |     for center_words, target_words in tfe.Iterator(dataset):
75 |       if num_train_steps >= NUM_TRAIN_STEPS:
76 |         break
77 |       loss_batch, grads = grad_fn(center_words, target_words)
78 |       total_loss += loss_batch
79 |       optimizer.apply_gradients(grads)
80 |       if (num_train_steps + 1) % SKIP_STEP == 0:
81 |         print('Average loss at step {}: {:5.1f}'.format(
82 |                 num_train_steps, total_loss / SKIP_STEP))
83 |         total_loss = 0.0
84 |       num_train_steps += 1
85 | 
86 | 
87 | if __name__ == '__main__':
88 |     main()
89 | 


--------------------------------------------------------------------------------
/Tensorflow/standford_example/04_word2vec_eager_starter.py:
--------------------------------------------------------------------------------
  1 | """ starter code for word2vec skip-gram model with NCE loss
  2 | Eager execution
  3 | CS 20: "TensorFlow for Deep Learning Research"
  4 | cs20.stanford.edu
  5 | Chip Huyen (chiphuyen@cs.stanford.edu) & Akshay Agrawal (akshayka@cs.stanford.edu)
  6 | Lecture 04
  7 | """
  8 | 
  9 | import os
 10 | os.environ['TF_CPP_MIN_LOG_LEVEL']='2'
 11 | 
 12 | import numpy as np
 13 | import tensorflow as tf
 14 | import tensorflow.contrib.eager as tfe
 15 | 
 16 | import utils
 17 | import word2vec_utils
 18 | 
 19 | # Enable eager execution!
 20 | #############################
 21 | ########## TO DO ############
 22 | #############################
 23 | 
 24 | # Model hyperparameters
 25 | VOCAB_SIZE = 50000
 26 | BATCH_SIZE = 128
 27 | EMBED_SIZE = 128            # dimension of the word embedding vectors
 28 | SKIP_WINDOW = 1             # the context window
 29 | NUM_SAMPLED = 64            # number of negative examples to sample
 30 | LEARNING_RATE = 1.0
 31 | NUM_TRAIN_STEPS = 100000
 32 | VISUAL_FLD = 'visualization'
 33 | SKIP_STEP = 5000
 34 | 
 35 | # Parameters for downloading data
 36 | DOWNLOAD_URL = 'http://mattmahoney.net/dc/text8.zip'
 37 | EXPECTED_BYTES = 31344016
 38 | 
 39 | class Word2Vec(object):
 40 |   def __init__(self, vocab_size, embed_size, num_sampled=NUM_SAMPLED):
 41 |     self.vocab_size = vocab_size
 42 |     self.num_sampled = num_sampled
 43 |     # Create the variables: an embedding matrix, nce_weight, and nce_bias
 44 |     #############################
 45 |     ########## TO DO ############
 46 |     #############################
 47 |     self.embed_matrix = None
 48 |     self.nce_weight = None
 49 |     self.nce_bias = None
 50 | 
 51 |   def compute_loss(self, center_words, target_words):
 52 |     """Computes the forward pass of word2vec with the NCE loss.""" 
 53 |     # Look up the embeddings for the center words
 54 |     #############################
 55 |     ########## TO DO ############
 56 |     #############################
 57 |     embed = None
 58 | 
 59 |     # Compute the loss, using tf.reduce_mean and tf.nn.nce_loss
 60 |     #############################
 61 |     ########## TO DO ############
 62 |     #############################
 63 |     loss = None
 64 |     return loss
 65 | 
 66 | 
 67 | def gen():
 68 |   yield from word2vec_utils.batch_gen(DOWNLOAD_URL, EXPECTED_BYTES,
 69 |                                       VOCAB_SIZE, BATCH_SIZE, SKIP_WINDOW,
 70 |                                       VISUAL_FLD)
 71 | 
 72 | def main():
 73 |   dataset = tf.data.Dataset.from_generator(gen, (tf.int32, tf.int32),
 74 |                               (tf.TensorShape([BATCH_SIZE]),
 75 |                               tf.TensorShape([BATCH_SIZE, 1])))
 76 |   optimizer = tf.train.GradientDescentOptimizer(LEARNING_RATE)
 77 |   # Create the model
 78 |   #############################
 79 |   ########## TO DO ############
 80 |   #############################
 81 |   model = None
 82 | 
 83 |   # Create the gradients function, using `tfe.implicit_value_and_gradients`
 84 |   #############################
 85 |   ########## TO DO ############
 86 |   #############################
 87 |   grad_fn = None
 88 | 
 89 |   total_loss = 0.0  # for average loss in the last SKIP_STEP steps
 90 |   num_train_steps = 0
 91 |   while num_train_steps < NUM_TRAIN_STEPS:
 92 |     for center_words, target_words in tfe.Iterator(dataset):
 93 |       if num_train_steps >= NUM_TRAIN_STEPS:
 94 |         break
 95 | 
 96 |       # Compute the loss and gradients, and take an optimization step.
 97 |       #############################
 98 |       ########## TO DO ############
 99 |       #############################
100 |       
101 |       if (num_train_steps + 1) % SKIP_STEP == 0:
102 |         print('Average loss at step {}: {:5.1f}'.format(
103 |                 num_train_steps, total_loss / SKIP_STEP))
104 |         total_loss = 0.0
105 |       num_train_steps += 1
106 | 
107 | 
108 | if __name__ == '__main__':
109 |     main()
110 | 


--------------------------------------------------------------------------------
/Tensorflow/standford_example/05_randomization.py:
--------------------------------------------------------------------------------
 1 | """ Examples to demonstrate ops level randomization
 2 | CS 20: "TensorFlow for Deep Learning Research"
 3 | cs20.stanford.edu
 4 | Chip Huyen (chiphuyen@cs.stanford.edu)
 5 | Lecture 05
 6 | """
 7 | import os
 8 | os.environ['TF_CPP_MIN_LOG_LEVEL']='2'
 9 | 
10 | import tensorflow as tf
11 | 
12 | # Example 1: session keeps track of the random state
13 | c = tf.random_uniform([], -10, 10, seed=2)
14 | 
15 | with tf.Session() as sess:
16 |     print(sess.run(c)) # >> 3.574932
17 |     print(sess.run(c)) # >> -5.9731865
18 | 
19 | # Example 2: each new session will start the random state all over again.
20 | c = tf.random_uniform([], -10, 10, seed=2)
21 | 
22 | with tf.Session() as sess:
23 |     print(sess.run(c)) # >> 3.574932
24 | 
25 | with tf.Session() as sess:
26 |     print(sess.run(c)) # >> 3.574932
27 | 
28 | # Example 3: with operation level random seed, each op keeps its own seed.
29 | c = tf.random_uniform([], -10, 10, seed=2)
30 | d = tf.random_uniform([], -10, 10, seed=2)
31 | 
32 | with tf.Session() as sess:
33 |     print(sess.run(c)) # >> 3.574932
34 |     print(sess.run(d)) # >> 3.574932
35 | 
36 | # Example 4: graph level random seed
37 | tf.set_random_seed(2)
38 | c = tf.random_uniform([], -10, 10)
39 | d = tf.random_uniform([], -10, 10)
40 | 
41 | with tf.Session() as sess:
42 |     print(sess.run(c)) # >> 9.123926
43 |     print(sess.run(d)) # >> -4.5340395
44 |     


--------------------------------------------------------------------------------
/Tensorflow/standford_example/05_variable_sharing.py:
--------------------------------------------------------------------------------
 1 | """ Examples to demonstrate variable sharing
 2 | CS 20: 'TensorFlow for Deep Learning Research'
 3 | cs20.stanford.edu
 4 | Chip Huyen (chiphuyen@cs.stanford.edu)
 5 | Lecture 05
 6 | """
 7 | import os
 8 | os.environ['TF_CPP_MIN_LOG_LEVEL']='2'
 9 | 
10 | import tensorflow as tf
11 | 
12 | x1 = tf.truncated_normal([200, 100], name='x1')
13 | x2 = tf.truncated_normal([200, 100], name='x2')
14 | 
15 | def two_hidden_layers(x):
16 |     assert x.shape.as_list() == [200, 100]
17 |     w1 = tf.Variable(tf.random_normal([100, 50]), name='h1_weights')
18 |     b1 = tf.Variable(tf.zeros([50]), name='h1_biases')
19 |     h1 = tf.matmul(x, w1) + b1
20 |     assert h1.shape.as_list() == [200, 50]  
21 |     w2 = tf.Variable(tf.random_normal([50, 10]), name='h2_weights')
22 |     b2 = tf.Variable(tf.zeros([10]), name='2_biases')
23 |     logits = tf.matmul(h1, w2) + b2
24 |     return logits
25 | 
26 | def two_hidden_layers_2(x):
27 |     assert x.shape.as_list() == [200, 100]
28 |     w1 = tf.get_variable('h1_weights', [100, 50], initializer=tf.random_normal_initializer())
29 |     b1 = tf.get_variable('h1_biases', [50], initializer=tf.constant_initializer(0.0))
30 |     h1 = tf.matmul(x, w1) + b1
31 |     assert h1.shape.as_list() == [200, 50]  
32 |     w2 = tf.get_variable('h2_weights', [50, 10], initializer=tf.random_normal_initializer())
33 |     b2 = tf.get_variable('h2_biases', [10], initializer=tf.constant_initializer(0.0))
34 |     logits = tf.matmul(h1, w2) + b2
35 |     return logits
36 | 
37 | # logits1 = two_hidden_layers(x1)
38 | # logits2 = two_hidden_layers(x2)
39 | 
40 | # logits1 = two_hidden_layers_2(x1)
41 | # logits2 = two_hidden_layers_2(x2)
42 | 
43 | # with tf.variable_scope('two_layers') as scope:
44 | #     logits1 = two_hidden_layers_2(x1)
45 | #     scope.reuse_variables()
46 | #     logits2 = two_hidden_layers_2(x2)
47 | 
48 | # with tf.variable_scope('two_layers') as scope:
49 | #     logits1 = two_hidden_layers_2(x1)
50 | #     scope.reuse_variables()
51 | #     logits2 = two_hidden_layers_2(x2)
52 | 
53 | def fully_connected(x, output_dim, scope):
54 |     with tf.variable_scope(scope, reuse=tf.AUTO_REUSE) as scope:
55 |         w = tf.get_variable('weights', [x.shape[1], output_dim], initializer=tf.random_normal_initializer())
56 |         b = tf.get_variable('biases', [output_dim], initializer=tf.constant_initializer(0.0))
57 |         return tf.matmul(x, w) + b
58 | 
59 | def two_hidden_layers(x):
60 |     h1 = fully_connected(x, 50, 'h1')
61 |     h2 = fully_connected(h1, 10, 'h2')
62 | 
63 | with tf.variable_scope('two_layers') as scope:
64 |     logits1 = two_hidden_layers(x1)
65 |     # scope.reuse_variables()
66 |     logits2 = two_hidden_layers(x2)
67 | 
68 | writer = tf.summary.FileWriter('./graphs/cool_variables', tf.get_default_graph())
69 | writer.close()


--------------------------------------------------------------------------------
/Tensorflow/standford_example/07_run_kernels.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Simple examples of convolution to do some basic filters
 3 | Also demonstrates the use of TensorFlow data readers.
 4 | 
 5 | We will use some popular filters for our image.
 6 | It seems to be working with grayscale images, but not with rgb images.
 7 | It's probably because I didn't choose the right kernels for rgb images.
 8 | 
 9 | kernels for rgb images have dimensions 3 x 3 x 3 x 3
10 | kernels for grayscale images have dimensions 3 x 3 x 1 x 1
11 | 
12 | CS 20: "TensorFlow for Deep Learning Research"
13 | cs20.stanford.edu
14 | Chip Huyen (chiphuyen@cs.stanford.edu)
15 | Lecture 07
16 | """
17 | import os
18 | os.environ['TF_CPP_MIN_LOG_LEVEL']='2'
19 | 
20 | import sys
21 | sys.path.append('..')
22 | 
23 | from matplotlib import gridspec as gridspec
24 | from matplotlib import pyplot as plt
25 | import tensorflow as tf
26 | 
27 | import kernels
28 | 
29 | def read_one_image(filename):
30 |     ''' This method is to show how to read image from a file into a tensor.
31 |     The output is a tensor object.
32 |     '''
33 |     image_string = tf.read_file(filename)
34 |     image_decoded = tf.image.decode_image(image_string)
35 |     image = tf.cast(image_decoded, tf.float32) / 256.0
36 |     return image
37 | 
38 | def convolve(image, kernels, rgb=True, strides=[1, 3, 3, 1], padding='SAME'):
39 |     images = [image[0]]
40 |     for i, kernel in enumerate(kernels):
41 |         filtered_image = tf.nn.conv2d(image, 
42 |                                       kernel, 
43 |                                       strides=strides,
44 |                                       padding=padding)[0]
45 |         if i == 2:
46 |             filtered_image = tf.minimum(tf.nn.relu(filtered_image), 255)
47 |         images.append(filtered_image)
48 |     return images
49 | 
50 | def show_images(images, rgb=True):
51 |     gs = gridspec.GridSpec(1, len(images))
52 |     for i, image in enumerate(images):
53 |         plt.subplot(gs[0, i])
54 |         if rgb:
55 |             plt.imshow(image)
56 |         else: 
57 |             image = image.reshape(image.shape[0], image.shape[1])
58 |             plt.imshow(image, cmap='gray')
59 |         plt.axis('off')
60 |     plt.show()
61 | 
62 | def main():
63 |     rgb = False
64 |     if rgb:
65 |         kernels_list = [kernels.BLUR_FILTER_RGB, 
66 |                         kernels.SHARPEN_FILTER_RGB, 
67 |                         kernels.EDGE_FILTER_RGB,
68 |                         kernels.TOP_SOBEL_RGB,
69 |                         kernels.EMBOSS_FILTER_RGB]
70 |     else:
71 |         kernels_list = [kernels.BLUR_FILTER,
72 |                         kernels.SHARPEN_FILTER,
73 |                         kernels.EDGE_FILTER,
74 |                         kernels.TOP_SOBEL,
75 |                         kernels.EMBOSS_FILTER]
76 | 
77 |     kernels_list = kernels_list[1:]
78 |     image = read_one_image('data/friday.jpg')
79 |     if not rgb:
80 |         image = tf.image.rgb_to_grayscale(image)
81 |     image = tf.expand_dims(image, 0) # make it into a batch of 1 element
82 |     images = convolve(image, kernels_list, rgb)
83 |     with tf.Session() as sess:
84 |         images = sess.run(images) # convert images from tensors to float values
85 |     show_images(images, rgb)
86 | 
87 | if __name__ == '__main__':
88 |     main()


--------------------------------------------------------------------------------
/Tensorflow/standford_example/kernels.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import tensorflow as tf
  3 | 
  4 | a = np.zeros([3, 3, 3, 3])
  5 | a[1, 1, :, :] = 0.25
  6 | a[0, 1, :, :] = 0.125
  7 | a[1, 0, :, :] = 0.125
  8 | a[2, 1, :, :] = 0.125
  9 | a[1, 2, :, :] = 0.125
 10 | a[0, 0, :, :] = 0.0625
 11 | a[0, 2, :, :] = 0.0625
 12 | a[2, 0, :, :] = 0.0625
 13 | a[2, 2, :, :] = 0.0625
 14 | 
 15 | BLUR_FILTER_RGB = tf.constant(a, dtype=tf.float32)
 16 | 
 17 | a = np.zeros([3, 3, 1, 1])
 18 | # a[1, 1, :, :] = 0.25
 19 | # a[0, 1, :, :] = 0.125
 20 | # a[1, 0, :, :] = 0.125
 21 | # a[2, 1, :, :] = 0.125
 22 | # a[1, 2, :, :] = 0.125
 23 | # a[0, 0, :, :] = 0.0625
 24 | # a[0, 2, :, :] = 0.0625
 25 | # a[2, 0, :, :] = 0.0625
 26 | # a[2, 2, :, :] = 0.0625
 27 | a[1, 1, :, :] = 1.0
 28 | a[0, 1, :, :] = 1.0
 29 | a[1, 0, :, :] = 1.0
 30 | a[2, 1, :, :] = 1.0
 31 | a[1, 2, :, :] = 1.0
 32 | a[0, 0, :, :] = 1.0
 33 | a[0, 2, :, :] = 1.0
 34 | a[2, 0, :, :] = 1.0
 35 | a[2, 2, :, :] = 1.0
 36 | BLUR_FILTER = tf.constant(a, dtype=tf.float32)
 37 | 
 38 | a = np.zeros([3, 3, 3, 3])
 39 | a[1, 1, :, :] = 5
 40 | a[0, 1, :, :] = -1
 41 | a[1, 0, :, :] = -1
 42 | a[2, 1, :, :] = -1
 43 | a[1, 2, :, :] = -1
 44 | 
 45 | SHARPEN_FILTER_RGB = tf.constant(a, dtype=tf.float32)
 46 | 
 47 | a = np.zeros([3, 3, 1, 1])
 48 | a[1, 1, :, :] = 5
 49 | a[0, 1, :, :] = -1
 50 | a[1, 0, :, :] = -1
 51 | a[2, 1, :, :] = -1
 52 | a[1, 2, :, :] = -1
 53 | 
 54 | SHARPEN_FILTER = tf.constant(a, dtype=tf.float32)
 55 | 
 56 | # a = np.zeros([3, 3, 3, 3])
 57 | # a[:, :, :, :] = -1
 58 | # a[1, 1, :, :] = 8
 59 | 
 60 | # EDGE_FILTER_RGB = tf.constant(a, dtype=tf.float32)
 61 | 
 62 | EDGE_FILTER_RGB = tf.constant([
 63 | 			[[[ -1., 0., 0.], [ 0., -1., 0.], [ 0., 0., -1.]],
 64 |             [[ -1., 0., 0.], [ 0., -1., 0.], [ 0., 0., -1.]],
 65 |             [[ -1., 0., 0.], [ 0., -1., 0.], [ 0., 0., -1.]]],
 66 |             [[[ -1., 0., 0.], [ 0., -1., 0.], [ 0., 0., -1.]],
 67 | 			[[ 8., 0., 0.], [ 0., 8., 0.], [ 0., 0., 8.]],
 68 | 			[[ -1., 0., 0.], [ 0., -1., 0.], [ 0., 0., -1.]]],
 69 | 			[[[ -1., 0., 0.], [ 0., -1., 0.], [ 0., 0., -1.]],
 70 | 			[[ -1., 0., 0.], [ 0., -1., 0.], [ 0., 0., -1.]],
 71 | 			[[ -1., 0., 0.], [ 0., -1., 0.], [ 0., 0., -1.]]]
 72 | ])
 73 | 
 74 | a = np.zeros([3, 3, 1, 1])
 75 | # a[:, :, :, :] = -1
 76 | # a[1, 1, :, :] = 8
 77 | a[0, 1, :, :] = -1
 78 | a[1, 0, :, :] = -1
 79 | a[1, 2, :, :] = -1
 80 | a[2, 1, :, :] = -1
 81 | a[1, 1, :, :] = 4
 82 | 
 83 | EDGE_FILTER = tf.constant(a, dtype=tf.float32)
 84 | 
 85 | a = np.zeros([3, 3, 3, 3])
 86 | a[0, :, :, :] = 1
 87 | a[0, 1, :, :] = 2 # originally 2
 88 | a[2, :, :, :] = -1
 89 | a[2, 1, :, :] = -2
 90 | 
 91 | TOP_SOBEL_RGB = tf.constant(a, dtype=tf.float32)
 92 | 
 93 | a = np.zeros([3, 3, 1, 1])
 94 | a[0, :, :, :] = 1
 95 | a[0, 1, :, :] = 2 # originally 2
 96 | a[2, :, :, :] = -1
 97 | a[2, 1, :, :] = -2
 98 | 
 99 | TOP_SOBEL = tf.constant(a, dtype=tf.float32)
100 | 
101 | a = np.zeros([3, 3, 3, 3])
102 | a[0, 0, :, :] = -2
103 | a[0, 1, :, :] = -1 
104 | a[1, 0, :, :] = -1
105 | a[1, 1, :, :] = 1
106 | a[1, 2, :, :] = 1
107 | a[2, 1, :, :] = 1
108 | a[2, 2, :, :] = 2
109 | 
110 | EMBOSS_FILTER_RGB = tf.constant(a, dtype=tf.float32)
111 | 
112 | a = np.zeros([3, 3, 1, 1])
113 | a[0, 0, :, :] = -2
114 | a[0, 1, :, :] = -1 
115 | a[1, 0, :, :] = -1
116 | a[1, 1, :, :] = 1
117 | a[1, 2, :, :] = 1
118 | a[2, 1, :, :] = 1
119 | a[2, 2, :, :] = 2
120 | EMBOSS_FILTER = tf.constant(a, dtype=tf.float32)


--------------------------------------------------------------------------------
/Tensorflow/standford_example/word2vec_utils.py:
--------------------------------------------------------------------------------
 1 | xfrom collections import Counter
 2 | import random
 3 | import os
 4 | import sys
 5 | sys.path.append('..')
 6 | import zipfile
 7 | 
 8 | import numpy as np
 9 | from six.moves import urllib
10 | import tensorflow as tf
11 | 
12 | import utils
13 | 
14 | def read_data(file_path):
15 |     """ Read data into a list of tokens 
16 |     There should be 17,005,207 tokens
17 |     """
18 |     with zipfile.ZipFile(file_path) as f:
19 |         words = tf.compat.as_str(f.read(f.namelist()[0])).split() 
20 |     return words
21 | 
22 | def build_vocab(words, vocab_size, visual_fld):
23 |     """ Build vocabulary of VOCAB_SIZE most frequent words and write it to
24 |     visualization/vocab.tsv
25 |     """
26 |     utils.safe_mkdir(visual_fld)
27 |     file = open(os.path.join(visual_fld, 'vocab.tsv'), 'w')
28 |     
29 |     dictionary = dict()
30 |     count = [('UNK', -1)]
31 |     index = 0
32 |     count.extend(Counter(words).most_common(vocab_size - 1))
33 |     
34 |     for word, _ in count:
35 |         dictionary[word] = index
36 |         index += 1
37 |         file.write(word + '\n')
38 |     
39 |     index_dictionary = dict(zip(dictionary.values(), dictionary.keys()))
40 |     file.close()
41 |     return dictionary, index_dictionary
42 | 
43 | def convert_words_to_index(words, dictionary):
44 |     """ Replace each word in the dataset with its index in the dictionary """
45 |     return [dictionary[word] if word in dictionary else 0 for word in words]
46 | 
47 | def generate_sample(index_words, context_window_size):
48 |     """ Form training pairs according to the skip-gram model. """
49 |     for index, center in enumerate(index_words):
50 |         context = random.randint(1, context_window_size)
51 |         # get a random target before the center word
52 |         for target in index_words[max(0, index - context): index]:
53 |             yield center, target
54 |         # get a random target after the center wrod
55 |         for target in index_words[index + 1: index + context + 1]:
56 |             yield center, target
57 | 
58 | def most_common_words(visual_fld, num_visualize):
59 |     """ create a list of num_visualize most frequent words to visualize on TensorBoard.
60 |     saved to visualization/vocab_[num_visualize].tsv
61 |     """
62 |     words = open(os.path.join(visual_fld, 'vocab.tsv'), 'r').readlines()[:num_visualize]
63 |     words = [word for word in words]
64 |     file = open(os.path.join(visual_fld, 'vocab_' + str(num_visualize) + '.tsv'), 'w')
65 |     for word in words:
66 |         file.write(word)
67 |     file.close()
68 | 
69 | def batch_gen(download_url, expected_byte, vocab_size, batch_size, 
70 |                 skip_window, visual_fld):
71 |     local_dest = 'data/text8.zip'
72 |     utils.download_one_file(download_url, local_dest, expected_byte)
73 |     words = read_data(local_dest)
74 |     dictionary, _ = build_vocab(words, vocab_size, visual_fld)
75 |     index_words = convert_words_to_index(words, dictionary)
76 |     del words           # to save memory
77 |     single_gen = generate_sample(index_words, skip_window)
78 |     
79 |     while True:
80 |         center_batch = np.zeros(batch_size, dtype=np.int32)
81 |         target_batch = np.zeros([batch_size, 1])
82 |         for index in range(batch_size):
83 |             center_batch[index], target_batch[index] = next(single_gen)
84 |         yield center_batch, target_batch


--------------------------------------------------------------------------------
/Text_Classification/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rainmaker712/nlp_ryan/108ce890659ed29d4a143e41e5546f613aa878ca/Text_Classification/.DS_Store


--------------------------------------------------------------------------------
/Text_Classification/.ipynb_checkpoints/cnn_textclassification_keras-checkpoint.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# 6.3 \n",
  8 |     "\n",
  9 |     "https://github.com/jarfo/kchar\n",
 10 |     "https://github.com/carpedm20/lstm-char-cnn-tensorflow\n",
 11 |     "https://github.com/fchollet/keras/blob/master/examples/imdb_cnn.py"
 12 |    ]
 13 |   },
 14 |   {
 15 |    "cell_type": "code",
 16 |    "execution_count": 1,
 17 |    "metadata": {
 18 |     "collapsed": false
 19 |    },
 20 |    "outputs": [
 21 |     {
 22 |      "name": "stderr",
 23 |      "output_type": "stream",
 24 |      "text": [
 25 |       "Using TensorFlow backend.\n"
 26 |      ]
 27 |     }
 28 |    ],
 29 |    "source": [
 30 |     "from keras.preprocessing import sequence\n",
 31 |     "from keras.models import Sequential\n",
 32 |     "from keras.layers import Dense, Dropout, Activation\n",
 33 |     "from keras.layers import Embedding\n",
 34 |     "from keras.layers import Conv1D, GlobalMaxPooling1D\n",
 35 |     "from keras.datasets import imdb"
 36 |    ]
 37 |   },
 38 |   {
 39 |    "cell_type": "code",
 40 |    "execution_count": 2,
 41 |    "metadata": {
 42 |     "collapsed": true
 43 |    },
 44 |    "outputs": [],
 45 |    "source": [
 46 |     "# set parameters:\n",
 47 |     "max_features = 5000\n",
 48 |     "maxlen = 400\n",
 49 |     "batch_size = 32\n",
 50 |     "embedding_dims = 50\n",
 51 |     "filters = 250\n",
 52 |     "kernel_size = 3\n",
 53 |     "hidden_dims = 250\n",
 54 |     "epochs = 2"
 55 |    ]
 56 |   },
 57 |   {
 58 |    "cell_type": "code",
 59 |    "execution_count": 3,
 60 |    "metadata": {
 61 |     "collapsed": false
 62 |    },
 63 |    "outputs": [
 64 |     {
 65 |      "name": "stdout",
 66 |      "output_type": "stream",
 67 |      "text": [
 68 |       "Loading data...\n",
 69 |       "25000 train sequences\n",
 70 |       "25000 test sequences\n"
 71 |      ]
 72 |     }
 73 |    ],
 74 |    "source": [
 75 |     "print('Loading data...')\n",
 76 |     "(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=max_features)\n",
 77 |     "print(len(x_train), 'train sequences')\n",
 78 |     "print(len(x_test), 'test sequences')"
 79 |    ]
 80 |   },
 81 |   {
 82 |    "cell_type": "code",
 83 |    "execution_count": 4,
 84 |    "metadata": {
 85 |     "collapsed": false
 86 |    },
 87 |    "outputs": [
 88 |     {
 89 |      "name": "stdout",
 90 |      "output_type": "stream",
 91 |      "text": [
 92 |       "Pad sequences (samples x time)\n",
 93 |       "x_train shape: (25000, 400)\n",
 94 |       "x_test shape: (25000, 400)\n",
 95 |       "Build model...\n"
 96 |      ]
 97 |     }
 98 |    ],
 99 |    "source": [
100 |     "print('Pad sequences (samples x time)')\n",
101 |     "x_train = sequence.pad_sequences(x_train, maxlen=maxlen)\n",
102 |     "x_test = sequence.pad_sequences(x_test, maxlen=maxlen)\n",
103 |     "print('x_train shape:', x_train.shape)\n",
104 |     "print('x_test shape:', x_test.shape)\n",
105 |     "print('Build model...')\n"
106 |    ]
107 |   },
108 |   {
109 |    "cell_type": "code",
110 |    "execution_count": 8,
111 |    "metadata": {
112 |     "collapsed": false
113 |    },
114 |    "outputs": [],
115 |    "source": [
116 |     "model = Sequential()\n",
117 |     "\n",
118 |     "# we start off with an efficient embedding layer which maps\n",
119 |     "# our vocab indices into embedding_dims dimensions\n",
120 |     "model.add(Embedding(max_features,\n",
121 |     "                    embedding_dims,\n",
122 |     "                    input_length=maxlen))\n",
123 |     "model.add(Dropout(0.2))"
124 |    ]
125 |   },
126 |   {
127 |    "cell_type": "code",
128 |    "execution_count": null,
129 |    "metadata": {
130 |     "collapsed": true
131 |    },
132 |    "outputs": [],
133 |    "source": [
134 |     "# we add a Convolution1D, which will learn filters\n",
135 |     "# word group filters of size filter_length:\n",
136 |     "model.add(Conv1D(filters,\n",
137 |     "                 kernel_size,\n",
138 |     "                 padding='valid',\n",
139 |     "                 activation='relu',\n",
140 |     "                 strides=1))\n",
141 |     "# we use max pooling:\n",
142 |     "model.add(GlobalMaxPooling1D())\n",
143 |     "\n",
144 |     "# We add a vanilla hidden layer:\n",
145 |     "model.add(Dense(hidden_dims))\n",
146 |     "model.add(Dropout(0.2))\n",
147 |     "model.add(Activation('relu'))\n",
148 |     "\n",
149 |     "# We project onto a single unit output layer, and squash it with a sigmoid:\n",
150 |     "model.add(Dense(1))\n",
151 |     "model.add(Activation('sigmoid'))\n",
152 |     "\n",
153 |     "model.compile(loss='binary_crossentropy',\n",
154 |     "              optimizer='adam',\n",
155 |     "              metrics=['accuracy'])\n",
156 |     "model.fit(x_train, y_train,\n",
157 |     "          batch_size=batch_size,\n",
158 |     "          epochs=epochs,\n",
159 |     "          validation_data=(x_test, y_test))"
160 |    ]
161 |   }
162 |  ],
163 |  "metadata": {
164 |   "anaconda-cloud": {},
165 |   "kernelspec": {
166 |    "display_name": "Python [conda root]",
167 |    "language": "python",
168 |    "name": "conda-root-py"
169 |   },
170 |   "language_info": {
171 |    "codemirror_mode": {
172 |     "name": "ipython",
173 |     "version": 3
174 |    },
175 |    "file_extension": ".py",
176 |    "mimetype": "text/x-python",
177 |    "name": "python",
178 |    "nbconvert_exporter": "python",
179 |    "pygments_lexer": "ipython3",
180 |    "version": "3.5.2"
181 |   }
182 |  },
183 |  "nbformat": 4,
184 |  "nbformat_minor": 1
185 | }
186 | 


--------------------------------------------------------------------------------
/Text_Classification/.ipynb_checkpoints/seq2seq_keras-checkpoint.ipynb:
--------------------------------------------------------------------------------
1 | {
2 |  "cells": [],
3 |  "metadata": {},
4 |  "nbformat": 4,
5 |  "nbformat_minor": 1
6 | }
7 | 


--------------------------------------------------------------------------------
/Text_Classification/To_Do/cnn_keras_text_class_kor.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding: utf-8 -*-
 3 | """
 4 | Created on Sat May  6 15:08:54 2017
 5 | 
 6 | #Data
 7 | 영화 한글 데이터:  https://github.com/e9t/nsmc
 8 | 
 9 | @author: ryan
10 | 
11 | On Progress
12 | """


--------------------------------------------------------------------------------
/Text_Classification/To_Do/hierachical_attention_keras_text_class_eng.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding: utf-8 -*-
 3 | """
 4 | Created on Sat May  6 15:08:54 2017
 5 | 
 6 | #Data
 7 | 
 8 | @author: ryan
 9 | 
10 | On Progress
11 | """
12 | 


--------------------------------------------------------------------------------
/Text_Classification/To_Do/lstm_keras_text_class_eng.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding: utf-8 -*-
 3 | """
 4 | Created on Sat May  6 15:08:54 2017
 5 | 
 6 | #Data
 7 | 
 8 | @author: ryan
 9 | 
10 | On Progress
11 | """
12 | 


--------------------------------------------------------------------------------
/Text_Classification/To_Do/rnn_attention_keras_text_class_eng .py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding: utf-8 -*-
 3 | """
 4 | Created on Sat May  6 15:08:54 2017
 5 | 
 6 | #Data
 7 | 
 8 | @author: ryan
 9 | 
10 | On Progress
11 | """
12 | 


--------------------------------------------------------------------------------
/Text_Classification/cnn_keras_text_class_imdb.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding: utf-8 -*-
  3 | """
  4 | Created on Sat May  6 22:55:55 2017
  5 | 
  6 | @author: ryan
  7 | """
  8 | 
  9 | import numpy as np
 10 | import pandas as pd
 11 | import pickle
 12 | from collections import defaultdict
 13 | import re
 14 | 
 15 | from bs4 import BeautifulSoup
 16 | 
 17 | import sys
 18 | import os
 19 | 
 20 | from keras.preprocessing import sequence
 21 | from keras.models import Sequential
 22 | from keras.layers import Dense, Dropout, Activation
 23 | from keras.layers import Embedding
 24 | from keras.layers import Conv1D, GlobalMaxPooling1D
 25 | from keras.datasets import imdb
 26 | 
 27 | 
 28 | # set parameters:
 29 | max_features = 5000
 30 | maxlen = 400
 31 | batch_size = 32
 32 | embedding_dims = 50
 33 | filters = 250
 34 | kernel_size = 3
 35 | hidden_dims = 250
 36 | epochs = 2
 37 | 
 38 | print('Loading data...')
 39 | (x_train, y_train), (x_val, y_val) = imdb.load_data(num_words=max_features)
 40 | print(len(x_train), 'train sequences')
 41 | print(len(x_val), 'test sequences')
 42 | 
 43 | print('Pad sequences (samples x time)')
 44 | x_train = sequence.pad_sequences(x_train, maxlen=maxlen)
 45 | x_test = sequence.pad_sequences(x_test, maxlen=maxlen)
 46 | print('x_train shape:', x_train.shape)
 47 | print('x_test shape:', x_test.shape)
 48 | 
 49 | print('Build model...')
 50 | model = Sequential()
 51 | 
 52 | # we start off with an efficient embedding layer which maps
 53 | # our vocab indices into embedding_dims dimensions
 54 | model.add(Embedding(max_features,
 55 |                     embedding_dims,
 56 |                     input_length=maxlen))
 57 | model.add(Dropout(0.2))
 58 | 
 59 | # we add a Convolution1D, which will learn filters
 60 | # word group filters of size filter_length:
 61 | model.add(Conv1D(filters,
 62 |                  kernel_size,
 63 |                  padding='valid',
 64 |                  activation='relu',
 65 |                  strides=1))
 66 | # we use max pooling:
 67 | model.add(GlobalMaxPooling1D())
 68 | 
 69 | # We add a vanilla hidden layer:
 70 | model.add(Dense(hidden_dims))
 71 | model.add(Dropout(0.2))
 72 | model.add(Activation('relu'))
 73 | 
 74 | # We project onto a single unit output layer, and squash it with a sigmoid:
 75 | model.add(Dense(1))
 76 | model.add(Activation('sigmoid'))
 77 | 
 78 | model.compile(loss='binary_crossentropy',
 79 |               optimizer='adam',
 80 |               metrics=['accuracy'])
 81 | history = model.fit(x_train, y_train,
 82 |           batch_size=batch_size,
 83 |           epochs=epochs,
 84 |           validation_data=(x_test, y_test))
 85 | 
 86 | # list all data in history
 87 | print(history.history.keys())
 88 | 
 89 | # summarize history for accuracy
 90 | import matplotlib.pyplot as plt
 91 | 
 92 | plt.plot(history.history['acc'])
 93 | plt.plot(history.history['val_acc'])
 94 | plt.title('model accuracy')
 95 | plt.ylabel('accuracy')
 96 | plt.xlabel('epoch')
 97 | plt.legend(['train', 'test'], loc='upper left')
 98 | plt.show()
 99 | # summarize history for loss
100 | plt.plot(history.history['loss'])
101 | plt.plot(history.history['val_loss'])
102 | plt.title('model loss')
103 | plt.ylabel('loss')
104 | plt.xlabel('epoch')
105 | plt.legend(['train', 'test'], loc='upper left')
106 | plt.show()


--------------------------------------------------------------------------------
/Text_Classification/cnn_keras_text_class_imdb2.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding: utf-8 -*-
 3 | """
 4 | Created on Sat Jun  3 18:51:43 2017
 5 | 
 6 | @author: ryan
 7 | """
 8 | 
 9 | '''This example demonstrates the use of Convolution1D for text classification.
10 | Gets to 0.89 test accuracy after 2 epochs.
11 | 90s/epoch on Intel i5 2.4Ghz CPU.
12 | 10s/epoch on Tesla K40 GPU.
13 | '''
14 | 
15 | from __future__ import print_function
16 | 
17 | from keras.preprocessing import sequence
18 | from keras.models import Sequential
19 | from keras.layers import Dense, Dropout, Activation
20 | from keras.layers import Embedding
21 | from keras.layers import Conv1D, GlobalMaxPooling1D
22 | from keras.datasets import imdb
23 | 
24 | # set parameters:
25 | max_features = 5000
26 | maxlen = 400
27 | batch_size = 32
28 | embedding_dims = 50
29 | filters = 250
30 | kernel_size = 3
31 | hidden_dims = 250
32 | epochs = 2
33 | 
34 | print('Loading data...')
35 | (x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=max_features)
36 | print(len(x_train), 'train sequences')
37 | print(len(x_test), 'test sequences')
38 | 
39 | print('Pad sequences (samples x time)')
40 | x_train = sequence.pad_sequences(x_train, maxlen=maxlen)
41 | x_test = sequence.pad_sequences(x_test, maxlen=maxlen)
42 | print('x_train shape:', x_train.shape)
43 | print('x_test shape:', x_test.shape)
44 | 
45 | print('Build model...')
46 | model = Sequential()
47 | 
48 | # we start off with an efficient embedding layer which maps
49 | # our vocab indices into embedding_dims dimensions
50 | model.add(Embedding(max_features,
51 |                     embedding_dims,
52 |                     input_length=maxlen))
53 | model.add(Dropout(0.2))
54 | 
55 | # we add a Convolution1D, which will learn filters
56 | # word group filters of size filter_length:
57 | model.add(Conv1D(filters,
58 |                  kernel_size,
59 |                  padding='valid',
60 |                  activation='relu',
61 |                  strides=1))
62 | # we use max pooling:
63 | model.add(GlobalMaxPooling1D())
64 | 
65 | # We add a vanilla hidden layer:
66 | model.add(Dense(hidden_dims))
67 | model.add(Dropout(0.2))
68 | model.add(Activation('relu'))
69 | 
70 | # We project onto a single unit output layer, and squash it with a sigmoid:
71 | model.add(Dense(1))
72 | model.add(Activation('sigmoid'))
73 | 
74 | model.compile(loss='binary_crossentropy',
75 |               optimizer='adam',
76 |               metrics=['accuracy'])
77 | model.fit(x_train, y_train,
78 |           batch_size=batch_size,
79 |           epochs=epochs,
80 |           validation_data=(x_test, y_test))


--------------------------------------------------------------------------------
/Text_Classification/nets/__pycache__/text_cnn.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rainmaker712/nlp_ryan/108ce890659ed29d4a143e41e5546f613aa878ca/Text_Classification/nets/__pycache__/text_cnn.cpython-35.pyc


--------------------------------------------------------------------------------
/Text_Classification/nets/text_cnn.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import tensorflow as tf
 3 | from collections import OrderedDict
 4 | 
 5 | def textcnn(input_placeholder, target_placeholder, vocab_size, embedding_dim, filter_sizes, num_filters, is_training=True, keep_prob=0.8, scope='TextCNN'):
 6 |     
 7 |     # Get 'sequence_length' and 'num_classes'
 8 |     sequence_length = input_placeholder.get_shape()[1]
 9 |     num_classes = target_placeholder.get_shape()[1]
10 |         
11 |     # Declare 'end_points' which is an ordered dictionary
12 |     end_points = OrderedDict()
13 |     
14 |     # tf.random_uniform_initializer의 형태를 간소화
15 |     random_uniform = lambda minval, maxval: tf.random_uniform_initializer(minval=minval, maxval=maxval)
16 |     
17 |     # tf.truncated_normal_initializer의 형태를 간소화
18 |     trunc_normal = lambda stddev: tf.truncated_normal_initializer(mean=0.0, stddev=stddev)
19 | 
20 |     # tf.contrib.layers.xavier_initializer의 형태를 간소화
21 |     xavier = tf.contrib.layers.xavier_initializer()
22 | 
23 |     # tf.contrib.layers.xavier_initializer_conv2d의 형태를 간소화
24 |     xavier_conv = tf.contrib.layers.xavier_initializer_conv2d()
25 | 
26 |     # tf.constant_initializer의 형태를 간소화
27 |     constant = lambda value: tf.constant_initializer(value=value)
28 |     
29 |     with tf.variable_scope(scope):
30 |         
31 |         end_point = 'Embedding'
32 |         with tf.variable_scope(end_point):
33 |             w_embedding = tf.get_variable(name='w_embedding', shape=[vocab_size, embedding_dim], 
34 |                                           initializer=random_uniform(-1.0, 1.0))
35 |             embedded_chars = tf.nn.embedding_lookup(params=w_embedding, ids=input_placeholder, name='embedded_chars')
36 |             embedded_chars_expanded = tf.expand_dims(input=embedded_chars, axis=-1, name='embedded_chars_expanded')
37 |             end_points[end_point] = w_embedding
38 |         
39 |         pooled_output = []
40 |         for i, filter_size in enumerate(filter_sizes):
41 |             end_point = 'Conv-maxpool-%d' % filter_size
42 |             with tf.variable_scope(end_point):
43 |                 filter_shape = [filter_size, embedding_dim, 1, num_filters]
44 |                 bias_shape = [num_filters]
45 |                 w_conv = tf.get_variable(name='w_conv', shape=filter_shape, initializer=trunc_normal(0.01))
46 |                 b_conv = tf.get_variable(name='b_conv', shape=bias_shape, initializer=constant(0.0))
47 |                 conv = tf.nn.conv2d(input=embedded_chars_expanded, filter=w_conv, strides=[1, 1, 1, 1], padding='VALID', name='conv')
48 |                 activated = tf.nn.relu(features=tf.nn.bias_add(conv, b_conv), name='relu')
49 |                 pooled = tf.nn.max_pool(value=activated, ksize=[1, sequence_length - filter_size + 1, 1, 1], strides=[1, 1, 1, 1], padding='VALID', name='maxpool')
50 |                 pooled_output.append(pooled)
51 |                 end_points[end_point] = pooled
52 |         
53 |         end_point = 'Flatten'
54 |         with tf.variable_scope(end_point):
55 |             num_filters_total = num_filters * len(filter_sizes)
56 |             h_pool = tf.concat(values=pooled_output, axis=3, name='concat')
57 |             h_pool_flat = tf.reshape(tensor=h_pool, shape=[-1, num_filters_total], name='flatten')
58 |             end_points[end_point] = h_pool_flat
59 |         
60 |         end_point = 'Fully-connected'
61 |         with tf.variable_scope(end_point):
62 |             dropout = tf.contrib.slim.dropout(h_pool_flat, keep_prob=keep_prob, is_training=is_training, scope='dropout')
63 |             w_fc = tf.get_variable(name='w_fc', shape=[num_filters_total, num_classes], initializer=xavier)
64 |             b_fc = tf.get_variable(name='b_fc', shape=[num_classes], initializer=constant(0.0))
65 |             logits = tf.nn.xw_plus_b(x=dropout, weights=w_fc, biases=b_fc, name='logits')
66 |             end_points[end_point] = logits
67 |     
68 |     return logits, end_points


--------------------------------------------------------------------------------
/Text_Classification/seq2seq_keras.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "code",
 5 |    "execution_count": null,
 6 |    "metadata": {
 7 |     "collapsed": true
 8 |    },
 9 |    "outputs": [],
10 |    "source": []
11 |   }
12 |  ],
13 |  "metadata": {
14 |   "anaconda-cloud": {},
15 |   "kernelspec": {
16 |    "display_name": "Python [conda root]",
17 |    "language": "python",
18 |    "name": "conda-root-py"
19 |   },
20 |   "language_info": {
21 |    "codemirror_mode": {
22 |     "name": "ipython",
23 |     "version": 3
24 |    },
25 |    "file_extension": ".py",
26 |    "mimetype": "text/x-python",
27 |    "name": "python",
28 |    "nbconvert_exporter": "python",
29 |    "pygments_lexer": "ipython3",
30 |    "version": "3.5.2"
31 |   }
32 |  },
33 |  "nbformat": 4,
34 |  "nbformat_minor": 1
35 | }
36 | 


--------------------------------------------------------------------------------
/VAE/vae_sample.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Arxiv Insights: https://www.youtube.com/watch?v=9zKuYvjFFS8&t=609s
  3 | https://github.com/hwalsuklee/tensorflow-mnist-CVAE
  4 | """
  5 | 
  6 | import tensorflow as tf
  7 | 
  8 | # Gaussian MLP as conditional encoder
  9 | def gaussian_MLP_conditional_encoder(x, y, n_hidden, n_output, keep_prob):
 10 |     with tf.variable_scope("gaussian_MLP_encoder"):
 11 |         # concatenate condition and image
 12 |         dim_y = int(y.get_shape()[1])
 13 |         input = tf.concat(axis=1, values=[x, y])
 14 | 
 15 |         # initializers
 16 |         w_init = tf.contrib.layers.variance_scaling_initializer()
 17 |         b_init = tf.constant_initializer(0.)
 18 | 
 19 |         # 1st hidden layer
 20 |         w0 = tf.get_variable('w0', [input.get_shape()[1], n_hidden+dim_y], initializer=w_init)
 21 |         b0 = tf.get_variable('b0', [n_hidden+dim_y], initializer=b_init)
 22 |         h0 = tf.matmul(input, w0) + b0
 23 |         h0 = tf.nn.elu(h0)
 24 |         h0 = tf.nn.dropout(h0, keep_prob)
 25 | 
 26 |         # 2nd hidden layer
 27 |         w1 = tf.get_variable('w1', [h0.get_shape()[1], n_hidden], initializer=w_init)
 28 |         b1 = tf.get_variable('b1', [n_hidden], initializer=b_init)
 29 |         h1 = tf.matmul(h0, w1) + b1
 30 |         h1 = tf.nn.tanh(h1)
 31 |         h1 = tf.nn.dropout(h1, keep_prob)
 32 | 
 33 |         # output layer
 34 |         # borrowed from https: // github.com / altosaar / vae / blob / master / vae.py
 35 |         wo = tf.get_variable('wo', [h1.get_shape()[1], n_output * 2], initializer=w_init)
 36 |         bo = tf.get_variable('bo', [n_output * 2], initializer=b_init)
 37 | 
 38 |         gaussian_params = tf.matmul(h1, wo) + bo
 39 | 
 40 |         # The mean parameter is unconstrained
 41 |         mean = gaussian_params[:, :n_output]
 42 |         # The standard deviation must be positive. Parametrize with a softplus and
 43 |         # add a small epsilon for numerical stability
 44 |         stddev = 1e-6 + tf.nn.softplus(gaussian_params[:, n_output:])
 45 | 
 46 |     return mean, stddev
 47 | 
 48 | # Bernoulli MLP as conditional decoder
 49 | def bernoulli_MLP_conditional_decoder(z, y, n_hidden, n_output, keep_prob, reuse=False):
 50 | 
 51 |     with tf.variable_scope("bernoulli_MLP_decoder", reuse=reuse):
 52 |         # concatenate condition and latent vectors
 53 |         input = tf.concat(axis=1, values=[z, y])
 54 | 
 55 |         # initializers
 56 |         w_init = tf.contrib.layers.variance_scaling_initializer()
 57 |         b_init = tf.constant_initializer(0.)
 58 | 
 59 |         # 1st hidden layer
 60 |         w0 = tf.get_variable('w0', [input.get_shape()[1], n_hidden], initializer=w_init)
 61 |         b0 = tf.get_variable('b0', [n_hidden], initializer=b_init)
 62 |         h0 = tf.matmul(input, w0) + b0
 63 |         h0 = tf.nn.tanh(h0)
 64 |         h0 = tf.nn.dropout(h0, keep_prob)
 65 | 
 66 |         # 2nd hidden layer
 67 |         w1 = tf.get_variable('w1', [h0.get_shape()[1], n_hidden], initializer=w_init)
 68 |         b1 = tf.get_variable('b1', [n_hidden], initializer=b_init)
 69 |         h1 = tf.matmul(h0, w1) + b1
 70 |         h1 = tf.nn.elu(h1)
 71 |         h1 = tf.nn.dropout(h1, keep_prob)
 72 | 
 73 |         # output layer-mean
 74 |         wo = tf.get_variable('wo', [h1.get_shape()[1], n_output], initializer=w_init)
 75 |         bo = tf.get_variable('bo', [n_output], initializer=b_init)
 76 |         y = tf.sigmoid(tf.matmul(h1, wo) + bo)
 77 | 
 78 |     return y
 79 | 
 80 | # Gateway
 81 | def autoencoder(x_hat, x, y, dim_img, dim_z, n_hidden, keep_prob):
 82 | 
 83 |     # encoding
 84 |     mu, sigma = gaussian_MLP_conditional_encoder(x_hat, y, n_hidden, dim_z, keep_prob)
 85 | 
 86 |     # sampling by re-parameterization technique
 87 |     z = mu + sigma * tf.random_normal(tf.shape(mu), 0, 1, dtype=tf.float32) #Latent vector
 88 | 
 89 |     # decoding
 90 |     x_ = bernoulli_MLP_conditional_decoder(z, y, n_hidden, dim_img, keep_prob)
 91 |     x_ = tf.clip_by_value(x_, 1e-8, 1 - 1e-8)
 92 | 
 93 |     # ELBO
 94 |     marginal_likelihood = tf.reduce_sum(x * tf.log(x_) + (1 - x) * tf.log(1 - x_), 1)
 95 |     KL_divergence = 0.5 * tf.reduce_sum(tf.square(mu) + tf.square(sigma) - tf.log(1e-8 + tf.square(sigma)) - 1, 1)
 96 | 
 97 |     marginal_likelihood = tf.reduce_mean(marginal_likelihood)
 98 |     KL_divergence = tf.reduce_mean(KL_divergence)
 99 | 
100 |     ELBO = marginal_likelihood - KL_divergence
101 | 
102 |     # minimize loss instead of maximizing ELBO
103 |     loss = -ELBO
104 | 
105 |     return x_, z, loss, -marginal_likelihood, KL_divergence
106 | 
107 | # Conditional Decoder (Generator)
108 | def decoder(z, y, dim_img, n_hidden):
109 | 
110 |     x_ = bernoulli_MLP_conditional_decoder(z, y, n_hidden, dim_img, 1.0, reuse=True)
111 | 
112 |     return x_


--------------------------------------------------------------------------------
/dataset/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rainmaker712/nlp_ryan/108ce890659ed29d4a143e41e5546f613aa878ca/dataset/.DS_Store


--------------------------------------------------------------------------------
/pytorch_basic/.ipynb_checkpoints/10.pytorch_rnn-checkpoint.ipynb:
--------------------------------------------------------------------------------
1 | {
2 |  "cells": [],
3 |  "metadata": {},
4 |  "nbformat": 4,
5 |  "nbformat_minor": 1
6 | }
7 | 


--------------------------------------------------------------------------------
/pytorch_basic/.ipynb_checkpoints/sec 6. Linear regression wih Python-checkpoint.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "code",
 5 |    "execution_count": 2,
 6 |    "metadata": {
 7 |     "collapsed": false
 8 |    },
 9 |    "outputs": [
10 |     {
11 |      "ename": "NameError",
12 |      "evalue": "name 'np' is not defined",
13 |      "output_type": "error",
14 |      "traceback": [
15 |       "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
16 |       "\u001b[0;31mNameError\u001b[0m                                 Traceback (most recent call last)",
17 |       "\u001b[0;32m<ipython-input-2-6ffa898b69c6>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrandom\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mseed\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m      2\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      3\u001b[0m \u001b[0mx\u001b[0m\u001b[0;34m=\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrandom\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrand\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mn\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      4\u001b[0m \u001b[0my\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mx\u001b[0m \u001b[0;34m**\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrandom\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrand\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mn\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      5\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
18 |       "\u001b[0;31mNameError\u001b[0m: name 'np' is not defined"
19 |      ]
20 |     }
21 |    ],
22 |    "source": [
23 |     "np.random.seed(1)\n",
24 |     "\n",
25 |     "x= np.random.rand(n)\n",
26 |     "y = x ** np.random.rand(n)\n",
27 |     "\n",
28 |     "colors = np.random.rand()\n",
29 |     "plt.plot(np.unique(x), np.poly1d(np.ployfit(x,y,1))(np.unique(x)))\n",
30 |     "\n",
31 |     "plt.scatter(x,y, colors, alpha=0.5)\n",
32 |     "plt.show()"
33 |    ]
34 |   }
35 |  ],
36 |  "metadata": {
37 |   "anaconda-cloud": {},
38 |   "kernelspec": {
39 |    "display_name": "Python [conda root]",
40 |    "language": "python",
41 |    "name": "conda-root-py"
42 |   },
43 |   "language_info": {
44 |    "codemirror_mode": {
45 |     "name": "ipython",
46 |     "version": 3
47 |    },
48 |    "file_extension": ".py",
49 |    "mimetype": "text/x-python",
50 |    "name": "python",
51 |    "nbconvert_exporter": "python",
52 |    "pygments_lexer": "ipython3",
53 |    "version": "3.5.2"
54 |   }
55 |  },
56 |  "nbformat": 4,
57 |  "nbformat_minor": 1
58 | }
59 | 


--------------------------------------------------------------------------------
/pytorch_basic/10.pytorch_rnn.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "code",
 5 |    "execution_count": null,
 6 |    "metadata": {
 7 |     "collapsed": true
 8 |    },
 9 |    "outputs": [],
10 |    "source": []
11 |   }
12 |  ],
13 |  "metadata": {
14 |   "anaconda-cloud": {},
15 |   "kernelspec": {
16 |    "display_name": "Python [conda root]",
17 |    "language": "python",
18 |    "name": "conda-root-py"
19 |   },
20 |   "language_info": {
21 |    "codemirror_mode": {
22 |     "name": "ipython",
23 |     "version": 3
24 |    },
25 |    "file_extension": ".py",
26 |    "mimetype": "text/x-python",
27 |    "name": "python",
28 |    "nbconvert_exporter": "python",
29 |    "pygments_lexer": "ipython3",
30 |    "version": "3.5.2"
31 |   }
32 |  },
33 |  "nbformat": 4,
34 |  "nbformat_minor": 1
35 | }
36 | 


--------------------------------------------------------------------------------
/pytorch_basic/Start_Pytorch.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "code",
 5 |    "execution_count": 3,
 6 |    "metadata": {},
 7 |    "outputs": [],
 8 |    "source": [
 9 |     "from torch.autograd import Variable\n",
10 |     "import torch"
11 |    ]
12 |   },
13 |   {
14 |    "cell_type": "code",
15 |    "execution_count": 4,
16 |    "metadata": {},
17 |    "outputs": [],
18 |    "source": [
19 |     "torch.add?"
20 |    ]
21 |   },
22 |   {
23 |    "cell_type": "code",
24 |    "execution_count": null,
25 |    "metadata": {},
26 |    "outputs": [],
27 |    "source": []
28 |   }
29 |  ],
30 |  "metadata": {
31 |   "kernelspec": {
32 |    "display_name": "Python 3",
33 |    "language": "python",
34 |    "name": "python3"
35 |   },
36 |   "language_info": {
37 |    "codemirror_mode": {
38 |     "name": "ipython",
39 |     "version": 3
40 |    },
41 |    "file_extension": ".py",
42 |    "mimetype": "text/x-python",
43 |    "name": "python",
44 |    "nbconvert_exporter": "python",
45 |    "pygments_lexer": "ipython3",
46 |    "version": "3.6.3"
47 |   }
48 |  },
49 |  "nbformat": 4,
50 |  "nbformat_minor": 2
51 | }
52 | 


--------------------------------------------------------------------------------
/pytorch_basic/cnn_cifar10_pytorch.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding: utf-8 -*-
  3 | """
  4 | Created on Sat Jun 10 15:48:47 2017
  5 | 
  6 | @author: ryan
  7 | """
  8 | 
  9 | #-----------CNN------------------#
 10 | import torch
 11 | import torchvision
 12 | import torchvision.transforms as transforms
 13 | 
 14 | transform = transforms.Compose(
 15 |     [transforms.ToTensor(),
 16 |      transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
 17 | 
 18 | trainset = torchvision.datasets.CIFAR10(root='./data', train=True,
 19 |                                         download=True, transform=transform)
 20 | trainloader = torch.utils.data.DataLoader(trainset, batch_size=4,
 21 |                                           shuffle=True, num_workers=2)
 22 | 
 23 | testset = torchvision.datasets.CIFAR10(root='./data', train=False,
 24 |                                        download=True, transform=transform)
 25 | testloader = torch.utils.data.DataLoader(testset, batch_size=4,
 26 |                                          shuffle=False, num_workers=2)
 27 | 
 28 | classes = ('plane', 'car', 'bird', 'cat',
 29 |            'deer', 'dog', 'frog', 'horse', 'ship', 'truck')
 30 | 
 31 | import matplotlib.pyplot as plt
 32 | import numpy as np
 33 | 
 34 | # functions to show an image
 35 | 
 36 | def imshow(img):
 37 |     img = img / 2 + 0.5     # unnormalize
 38 |     npimg = img.numpy()
 39 |     plt.imshow(np.transpose(npimg, (1, 2, 0)))
 40 | 
 41 | 
 42 | # get some random training images
 43 | dataiter = iter(trainloader)
 44 | images, labels = dataiter.next()
 45 | 
 46 | # show images
 47 | imshow(torchvision.utils.make_grid(images))
 48 | # print labels
 49 | print(' '.join('%5s' % classes[labels[j]] for j in range(4)))
 50 | 
 51 | #1. Loading and normalizing cifar10
 52 | 
 53 | #2. Define a Convolution Neural Network
 54 | from torch.autograd import Variable
 55 | import torch.nn as nn
 56 | import torch.nn.functional as F
 57 | 
 58 | class Net(nn.Module):
 59 |     def __init__(self):
 60 |         super(Net, self).__init__()
 61 |         self.conv1 = nn.Conv2d(3, 6, 5)
 62 |         self.pool = nn.MaxPool2d(2,2)
 63 |         self.conv2 = nn.Conv2d(6, 16, 5)
 64 |         self.fc1 = nn.Linear(16 * 5 * 5, 120)
 65 |         self.fc2 = nn.Linear(120, 84)
 66 |         self.fc3 = nn.Linear(84, 10)
 67 |         
 68 |     def forward(self, x):
 69 |         x = self.pool(F.relu(self.conv1(x)))
 70 |         x = self.pool(F.relu(self.conv2(x)))
 71 |         x = x.view(-1, 16 * 5 * 5)
 72 |         x = F.relu(self.fc1(x))
 73 |         x = F.relu(self.fc2(x))
 74 |         x = self.fc3(x)
 75 |         return x
 76 | 
 77 | net = Net()
 78 | 
 79 | #3. Define a Loss function and optimizer
 80 | 
 81 | #Cross-Entropy and SGD with momentum
 82 | import torch.optim as optim
 83 | 
 84 | criterion = nn.CrossEntropyLoss()
 85 | optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)
 86 | 
 87 | #4. train the network
 88 | net.cuda()
 89 | 
 90 | for epoch in range(1000):  # loop over the dataset multiple times
 91 | 
 92 |     running_loss = 0.0
 93 |     for i, data in enumerate(trainloader, 0):
 94 |         # get the inputs
 95 |         inputs, labels = data
 96 | 
 97 |         # wrap them in Variable
 98 |         #inputs, labels = Variable(inputs), Variable(labels) #CPU Ver
 99 |         inputs, labels = Variable(inputs.cuda()), Variable(labels.cuda()) #GPU Ver
100 | 
101 |         # zero the parameter gradients
102 |         optimizer.zero_grad()
103 | 
104 |         # forward + backward + optimize
105 |         outputs = net(inputs)
106 |         loss = criterion(outputs, labels)
107 |         loss.backward()
108 |         optimizer.step()
109 | 
110 |         # print statistics
111 |         running_loss += loss.data[0]
112 |         if i % 2000 == 1999:    # print every 2000 mini-batches
113 |             print('[%d, %5d] loss: %.3f' %
114 |                   (epoch + 1, i + 1, running_loss / 2000))
115 |             running_loss = 0.0
116 | 
117 | print('Finished Training')
118 | 
119 | #5. Test the network on the test data
120 | dataiter = iter(testloader)
121 | images, labels = dataiter.next()
122 | #print image
123 | imshow(torchvision.utils.make_grid(images))
124 | print('GroundTruth: ', ' '.join('%5s' % classes[labels[j]] for j in range(4)))
125 | 
126 | outputs = net(Variable(images))
127 | 
128 | _, predicted = torch.max(outputs.data, 1)
129 | print('Predicted: ', ' '.join('%5s' % classes[predicted[j][0]]
130 |                               for j in range(4)))
131 | 
132 | #Performance Test
133 | correct = 0
134 | total = 0
135 | for data in testloader:
136 |     images, labels = data
137 |     outputs = net(Variable(images))
138 |     _, predicted = torch.max(outputs.data, 1)
139 |     total += labels.size(0)
140 |     correct += (predicted == labels).sum()
141 | 
142 | print('Accuracy of the network on the 10000 test images: %d %%' % (
143 |     100 * correct / total))
144 | 
145 | #정확히 맞추는 것과 못 맞추는 것 구분
146 | class_correct = list(0. for i in range(10))
147 | class_total = list(0. for i in range(10))
148 | for data in testloader:
149 |     images, labels = data
150 |     outputs = net(Variable(images))
151 |     _, predicted = torch.max(outputs.data, 1)
152 |     c = (predicted == labels).squeeze()
153 |     for i in range(4):
154 |         label = labels[i]
155 |         class_correct[label] += c[i]
156 |         class_total[label] += 1
157 | 
158 | for i in range(10):
159 |     print('Accuracy of %5s : %2d %%' % (
160 |         classes[i], 100 * class_correct[i] / class_total[i]))
161 | 
162 | 
163 |     
164 | 
165 | 


--------------------------------------------------------------------------------
/pytorch_basic/pytorch_basic.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding: utf-8 -*-
  3 | """
  4 | Created on Tue Jun  6 16:24:52 2017
  5 | 
  6 | @author: ryan
  7 | """
  8 | 
  9 | """Pytorch Intro"""
 10 | 
 11 | import torch
 12 | 
 13 | import os
 14 | os.environ["CUDA_VISIBLE_DEVICES"] = "0, 1"
 15 | 
 16 | #GPU
 17 | dtype = torch.cuda.FloatTensor
 18 | 
 19 | ##Tensors
 20 | x = torch.Tensor(5,3).type(dtype)
 21 | x = torch.rand(5,3).type(dtype)
 22 | x.size()
 23 | 
 24 | ##Operations
 25 | y = torch.rand(5,3).type(dtype)
 26 | print(x+y)
 27 | 
 28 | #print(torch.add(x,y))
 29 | result = torch.Tensor(5,3).type(dtype)
 30 | torch.add(x, y, out=result)
 31 | print(result)
 32 | 
 33 | #Indexing
 34 | print(x[:, 1])
 35 | 
 36 | ##Numpy Bridge
 37 | 
 38 | #Convert torch Tensor to numpy Array
 39 | a = torch.ones(5)
 40 | print(a)
 41 | 
 42 | b = a.numpy()
 43 | print(b)
 44 | 
 45 | #Convert numpy array to torch
 46 | import numpy as np
 47 | a = np.ones(5)
 48 | b = torch.from_numpy(a)
 49 | np.add(a, 1, out=a)
 50 | print(a)
 51 | print(b)
 52 | 
 53 | #Cuda Tensors
 54 | if torch.cuda.is_available():
 55 |     x = x.cuda()
 56 |     y = y.cuda()
 57 |     x + y
 58 | 
 59 | 
 60 | """ Autograd: Automatic differentiation """
 61 | 
 62 | ##Variable
 63 | # If Variable is not a scala, you need to specify arg. for backward()
 64 | 
 65 | import torch
 66 | from torch.autograd import Variable
 67 | 
 68 | x = Variable(torch.ones(2,2), requires_grad=True).type(dtype)
 69 | y = x + 2
 70 | print(y)
 71 | 
 72 | z = y * y * 3
 73 | out = z.mean()
 74 | print(z, out)
 75 | 
 76 | 
 77 | ##Gradients
 78 | out.backward()
 79 | 
 80 | print(x.grad)
 81 | 
 82 | import time
 83 | from datetime import timedelta
 84 | 
 85 | start_time = time.monotonic()
 86 | x = torch.randn(3)
 87 | x = Variable(x, requires_grad=True)
 88 | y = x*2
 89 | while y.data.norm() < 1000000:
 90 |     y = y * 2
 91 | end_time = time.monotonic()
 92 | 
 93 | print(timedelta(seconds=end_time - start_time))
 94 | 
 95 | gradients = torch.FloatTensor([0.1, 1.0, 0.0001])
 96 | y.backward(gradients)
 97 | 
 98 | print(x.grad)
 99 | 
100 | 
101 | 
102 | 


--------------------------------------------------------------------------------
/pytorch_basic/pytorch_nlp.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding: utf-8 -*-
  3 | """
  4 | Created on Tue Jun  6 17:37:50 2017
  5 | 
  6 | @author: ryan
  7 | """
  8 | 
  9 | import torch
 10 | import torch.autograd as autograd
 11 | import torch.nn as nn
 12 | import torch.nn.functional as F
 13 | import torch.optim as optim
 14 | 
 15 | torch.manual_seed(1)
 16 | 
 17 | #Creating Tensors
 18 | V_data = [1,2,3]
 19 | V = torch.Tensor(V_data)
 20 | print(V)
 21 | 
 22 | #Create matrix
 23 | M_data = [[1,2,3], [4,5,6]]
 24 | M = torch.Tensor(M_data)
 25 | print(M)
 26 | 
 27 | # Create 3D tensor of size 2*2*2
 28 | T_data = [[[1,2],[3,4]],
 29 |           [[5,6],[7,8]]]
 30 | T = torch.Tensor(T_data)
 31 | print(T)
 32 | 
 33 | # Index into V and get a scalar
 34 | print(V[0])
 35 | 
 36 | # Index into M and get a vector
 37 | print(M[0])
 38 | 
 39 | # Index into T and get a matrix
 40 | print(T[0])
 41 | 
 42 | x = torch.randn((3, 4, 5))
 43 | print(x)
 44 | 
 45 | ##Operations with Tensors
 46 | x = torch.Tensor([1., 2., 3.])
 47 | y = torch.Tensor([4., 5., 6.])
 48 | z = x + y
 49 | print(z)
 50 | 
 51 | ##Concat
 52 | # By default, it concatenates along the first axis (concatenates rows)
 53 | x_1 = torch.randn(2, 5)
 54 | y_1 = torch.randn(3, 5)
 55 | z_1 = torch.cat([x_1, y_1])
 56 | print(z_1)
 57 | 
 58 | # Concatenate columns:
 59 | x_2 = torch.randn(2, 3)
 60 | y_2 = torch.randn(2, 5)
 61 | # second arg specifies which axis to concat along
 62 | z_2 = torch.cat([x_2, y_2], 1)
 63 | print(z_2)
 64 | 
 65 | # If your tensors are not compatible, torch will complain.  Uncomment to see the error
 66 | # torch.cat([x_1, x_2])
 67 | 
 68 | 
 69 | ##Reshaping Tensors
 70 | x = torch.randn(2,3,4)
 71 | print(x)
 72 | print(x.view(2,12)) #2rows with 12 col.
 73 | print(x.view(2,-1)) #Same, If one of the dim. is -1, its size can be inferred
 74 | 
 75 | #Comp. Graphs and Auto Diff: How your data is combeind
 76 | 
 77 | # Variables wrap tensor objects
 78 | x = autograd.Variable(torch.Tensor([1., 2., 3]), requires_grad=True)
 79 | # You can access the data with the .data attribute
 80 | print(x.data)
 81 | 
 82 | # You can also do all the same operations you did with tensors with Variables.
 83 | y = autograd.Variable(torch.Tensor([4., 5., 6]), requires_grad=True)
 84 | z = x + y
 85 | print(z.data)
 86 | 
 87 | # BUT z knows something extra.
 88 | #print(z.grad_fn) does not work
 89 | 
 90 | s = z.sum()
 91 | print(s)
 92 | #print(s.grad_fn) does not work
 93 | 
 94 | s.backward()
 95 | print(x.grad)
 96 | 
 97 | ##Sumamry
 98 | 
 99 | x = torch.randn((2,2))
100 | y = torch.randn((2,2))
101 | 
102 | z= x + y
103 | 
104 | var_x = autograd.Variable(x)
105 | var_y = autograd.Variable(y)
106 | 
107 | var_z = var_x + var_y
108 | print(var_z.grad_fn)
109 | 
110 | var_z_data = var_z.data  # Get the wrapped Tensor object out of var_z...
111 | new_var_z = autograd.Variable(var_z_data)
112 | 
113 | print(new_var_z.grad_fn)
114 | 
115 | 
116 | 
117 | 
118 | 
119 | 
120 | 
121 | 
122 | 
123 | 
124 | 


--------------------------------------------------------------------------------
/pytorch_basic/pytorch_nlp2.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding: utf-8 -*-
 3 | """
 4 | Created on Tue Jun  6 17:37:50 2017
 5 | 
 6 | @author: ryan
 7 | """
 8 | 
 9 | import torch
10 | import torch.autograd as autograd
11 | import torch.nn as nn
12 | import torch.nn.functional as F
13 | import torch.optim as optim
14 | 
15 | torch.manual_seed(1)
16 | 
17 | lin = nn.Linear(5,3)
18 | data = autograd.Variable(torch.randn(2,5))
19 | #차원수 변환
20 | print(lin(data))
21 | 
22 | #Non linearity
23 | print(data)
24 | print(F.relu(data))
25 | 
26 | # Softmax is also in torch.functional
27 | data = autograd.Variable(torch.randn(5))
28 | print(data)
29 | print(F.softmax(data))
30 | print(F.softmax(data).sum())  # Sums to 1 because it is a distribution!
31 | print(F.log_softmax(data))  # theres also log_softmax
32 | 
33 | #BOW 모델 연습
34 | 
35 | data = [("me gusta comer en la cafeteria".split(), "SPANISH"),
36 |         ("Give it to me".split(), "ENGLISH"),
37 |         ("No creo que sea una buena idea".split(), "SPANISH"),
38 |         ("No it is not a good idea to get lost at sea".split(), "ENGLISH")]
39 | 
40 | test_data = [("Yo creo que si".split(), "SPANISH"),
41 |              ("it is lost on me".split(), "ENGLISH")]
42 | 
43 | # word_to_ix maps each word in the vocab to a unique integer, which will be its
44 | # index into the Bag of words vector 
45 | word_to_ix = {}
46 | for sent, _ in data + test_data:
47 |     for word in sent:
48 |         if word not in word_to_ix:
49 |             word_to_ix[word] = len(word_to_ix)
50 | print(word_to_ix)
51 | 
52 | VOCAB_SIZE = len(word_to_ix)
53 | NUM_LABELS = 2
54 | 
55 | class BoWClassifier(nn.Module): #inheriting from nn.Module!
56 |     def __init__(self, num_labels, vocab_size):
57 |         # calls the init function of nn.Module.  Dont get confused by syntax,
58 |         # just always do it in an nn.Module
59 |         super(BoWClassifier, self).__init__()
60 |         """
61 |         상속하게 되면 명확히 상속된 클래스 이름을 한정자로 부모 클래스의 속성과
62 |         메소드를 접근 할 수 있지만 super()를 이용하여 부모 클래스를 접근 가능
63 |         Super는 하나의 클래스이다.
64 |         Super를 지정하고 접근하면 클래스의 속성과 메소드를 접근해서 처리 가능
65 |         주로 오버라이딩을 작성할 때 super를 이용하여 상위 속성이나 메소드를 참조
66 |         """
67 |         
68 |         # Define the parameters that you will need.  In this case, we need A and b,
69 |         # the parameters of the affine mapping.
70 |         # Torch defines nn.Linear(), which provides the affine map.
71 |         # Make sure you understand why the input dimension is vocab_size
72 |         # and the output is num_labels!
73 |         self.linear = nn.Linear(vocab_size, num_labels)
74 | 
75 |         # NOTE! The non-linearity log softmax does not have parameters! So we don't need
76 |         # to worry about that here
77 | 
78 | 
79 | 
80 | 
81 | 
82 | 
83 | 
84 | 
85 |  
86 | 
87 | 


--------------------------------------------------------------------------------
/pytorch_basic/pytorch_nlp3.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding: utf-8 -*-
  3 | """
  4 | Created on Sat Jun 10 11:48:58 2017
  5 | 
  6 | @author: ryan
  7 | http://pytorch.org/tutorials/beginner/nlp/word_embeddings_tutorial.html
  8 | 
  9 | """
 10 | 
 11 | import torch
 12 | import torch.autograd as autograd
 13 | import torch.nn as nn
 14 | import torch.nn.functional as F
 15 | import torch.optim as optim
 16 | 
 17 | torch.manual_seed(1)
 18 | 
 19 | word_to_ix = {"안녕": 0, "반가워": 1}
 20 | embeds = nn.Embedding(2, 5)  # 2 words in vocab, 5 dimensional embeddings
 21 | lookup_tensor = torch.LongTensor([word_to_ix["안녕"]])
 22 | hello_embed = embeds(autograd.Variable(lookup_tensor))
 23 | print(hello_embed)
 24 | 
 25 | ##
 26 | CONTEXT_SIZE = 2
 27 | EMBEDDING_DIM = 10
 28 | 
 29 | # We will use Shakespeare Sonnet 2
 30 | test_sentence = """미국 로스앤젤레스에 사는 배우 척 매카시는 사람들과 산책을 해주고 돈을 번다. 지난해 그가 시작한 '친구 대여(Rent-a-Friend)'는 새로운 형태의 비즈니스다. 매카시는 일감이 많지 않은 무명 배우였지만 이 부업은 조수들을 고용해야 할 만큼 번창하고 있다. 다른 도시와 외국에서도 '출장 산책' 주문이 쇄도한다.
 31 | 
 32 | 매카시는 집 근처 공원과 거리를 고객과 함께 걸으면서 이야기를 나누는 대가로 1마일(1.6㎞)에 7달러를 받는다. 사회적 관계를 구매 가능한 상품으로 포장한 셈이다. 이름 붙이자면 '고독 비즈니스'다. 그는 영국 일간지 가디언과의 인터뷰에서 "혼자 산책하기 두렵거나 친구 없는 사람으로 비칠까봐 걱정하는 사람이 많았다"며 "자기 이야기를 누가 들어준다는 데 기뻐하며 다시 나를 찾는다"고 했다.
 33 | 
 34 | 20~30대에서는 미혼과 만혼(晩婚), 40대 이후로는 이혼과 고령화 등으로 1인 가구가 빠르게 늘어가는 한국 사회에서 고독은 강 건너 불구경이 아니다. 우리는 페이스북·트위터·인스타그램 같은 소셜미디어로 긴밀하게 연결돼 있지만 관계의 응집력은 어느 때보다 느슨하다. '혼밥' '혼술' '혼영(나 홀로 영화)' '혼행(나 홀로 여행)' 같은 소비 패턴이 방증한다. 외로움을 감추기보다 즐기려는 경향도 나타난다. Why?는 예스24에 의뢰해 지난 1~5일 설문조사를 했다. 5864명(여성 4398명)이 응답했다. 고독을 바라보는 한국인의 태도가 드러났다.
 35 | """.split()
 36 | # we should tokenize the input, but we will ignore that for now
 37 | # build a list of tuples.  Each tuple is ([ word_i-2, word_i-1 ], target word)
 38 | 
 39 | trigrams = [([test_sentence[i], test_sentence[i + 1]], test_sentence[i + 2])
 40 |             for i in range(len(test_sentence) -2)]
 41 | 
 42 | #중복 단어 제외 및 일반 단어 넣어 주기
 43 | vocab = set(test_sentence)
 44 | word_to_ix = {word: i for i , word in enumerate(vocab)}
 45 | 
 46 | #https://wikidocs.net/28
 47 | 
 48 | class NGramLanguageModeler(nn.Module):
 49 |     def __init__(self, vocab_size, embedding_dim, context_size):
 50 |         super(NGramLanguageModeler, self).__init__()
 51 |         self.embeddings = nn.Embedding(vocab_size, embedding_dim)
 52 |         self.linear1 = nn.Linear(context_size * embedding_dim, 128)
 53 |         self.linear2 = nn.Linear(128, vocab_size)
 54 |     
 55 |     def forward(self, inputs):
 56 |         embeds = self.embeddings(inputs).view((1,-1))
 57 |         out = F.relu(self.linear1(embeds))
 58 |         out = self.linear2(out)
 59 |         log_probs = F.log_softmax(out)
 60 |         return log_probs
 61 |         
 62 | losses = []
 63 | loss_function = nn.NLLLoss()
 64 | model = NGramLanguageModeler(len(vocab), EMBEDDING_DIM, CONTEXT_SIZE)
 65 | optimizer = optim.SGD(model.parameters(), lr = 0.001)
 66 | 
 67 | for epoch in range(100):
 68 |     total_loss = torch.Tensor([0])
 69 |     for context, target in trigrams:
 70 |         
 71 |         #Step1: 입력전처리 (integer indices(색인) 와 변수로 변환)
 72 |         context_idxs = [word_to_ix[w] for w in context]
 73 |         context_var = autograd.Variable(torch.LongTensor(context_idxs))
 74 |         
 75 |         #Step2: torch는 gradients를 accumlates한다. 새로운 instances를 넘기기 전에,
 76 |         #모든 그레디언트를 오래된 instnaces로 부터 zero out 해야함
 77 |         model.zero_grad()
 78 |         
 79 |         #Step3: 전진 학습을 하며, 다음 단어에 대한 log prob.얻기
 80 |         log_probs = model(context_var)
 81 |         
 82 |         #Step4: log function 사용하기
 83 |         loss = loss_function(log_probs, autograd.Variable(
 84 |                                                           torch.LongTensor([word_to_ix[target]])))
 85 |         
 86 |         #Step5: 백프로게이션 실행 후 그레디언트 수치 업데이트
 87 |         loss.backward()
 88 |         optimizer.step()
 89 |         
 90 |         total_loss += loss.data
 91 |     losses.append(total_loss)
 92 | print(losses)
 93 | 
 94 | 
 95 | """Exercise: CBow"""
 96 | #.view() check
 97 | CONTEXT_SIZE = 2  # 2 words to the left, 2 to the right
 98 | raw_text = """We are about to study the idea of a computational process.
 99 | Computational processes are abstract beings that inhabit computers.
100 | As they evolve, processes manipulate other abstract things called data.
101 | The evolution of a process is directed by a pattern of rules
102 | called a program. People create programs to direct processes. In effect,
103 | we conjure the spirits of the computer with our spells.""".split()
104 | 
105 | # By deriving a set from `raw_text`, we deduplicate the array
106 | vocab = set(raw_text)
107 | vocab_size = len(vocab)
108 | 
109 | word_to_ix = {word: i for i, word in enumerate(vocab)}
110 | data = []
111 | 
112 | for i in range(2, len(raw_text) - 2):
113 |     context = [raw_text[i - 2], raw_text[i - 1],
114 |                raw_text[i + 1], raw_text[i + 2]]
115 |     target = raw_text[i]
116 |     data.append((context, target))
117 | 
118 | class CBOW(nn.Module):
119 |     
120 |     def __init__(self):
121 |         pass
122 |     
123 |     def forward(self, inputs):
124 |         pass
125 | 
126 | def make_context_vector(context, word_to_ix):
127 |     idxs = [word_to_ix[w] for w in context]
128 |     tensor = torch.LongTensor(idxs)
129 |     return autograd.Variable(tensor)
130 |     
131 | make_context_vector(data[0][0], word_to_ix)
132 | 
133 | 
134 | 
135 | 
136 | 
137 | 
138 | 


--------------------------------------------------------------------------------
/pytorch_basic/pytorch_seq2seq(LSTM).py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding: utf-8 -*-
  3 | """
  4 | Created on Sat Jun 10 18:06:39 2017
  5 | # Author: Robert Guthrie
  6 | http://pytorch.org/tutorials/beginner/nlp/sequence_models_tutorial.html
  7 | """
  8 | 
  9 | import torch
 10 | import torch.autograd as autograd
 11 | import torch.nn as nn
 12 | import torch.nn.functional as F
 13 | import torch.optim as optim
 14 | 
 15 | torch.manual_seed(1)
 16 | 
 17 | lstm = nn.LSTM(3,3) #Input dim, output dim (3,3)
 18 | inputs = [autograd.Variable(torch.randn((1, 3)))
 19 |           for _ in range(5)]  # make a sequence of length 5
 20 | 
 21 | #hidden state 초기화
 22 | hidden = (autograd.Variable(torch.randn(1,1,3)),
 23 |           autograd.Variable(torch.randn(1,1,3)))
 24 | 
 25 | for i in inputs:
 26 |     # Step through the sequence one elements at a time.
 27 |     # after each step, hidden contains the hidden state
 28 |     out, hidden = lstm(i.view(1,1,-1), hidden)
 29 |     
 30 | # 전체 seq.를 한번에 진행이 가능하다.
 31 | # LSTM에서 받은 첫번째 값은 
 32 | # 두번째는 가장 최근의 hidden state이다.
 33 | # 그 이유는, "out"은 모든 hidden state 차례대로 접근 할 수 있고,
 34 | # "hidden"은 seq를 진행하며 backprop을 하게 해주기 때문이다.
 35 | inputs = torch.cat(inputs).view(len(inputs), 1, -1)
 36 | hidden = (autograd.Variable(torch.randn(1, 1, 3)), autograd.Variable(
 37 |           torch.randn(1,1,3)))
 38 | out. hidden = lstm(inputs, hidden)
 39 | print(out)
 40 | print(hidden)
 41 | 
 42 | """LSTM for POS Tagging
 43 | 
 44 | """
 45 | 
 46 | def prepare_sequence(seq, to_ix):
 47 |     idxs = [to_ix[w] for w in seq]
 48 |     tensor = torch.LongTensor(idxs)
 49 |     return autograd.Variable(tensor)
 50 | 
 51 | training_data = [
 52 |     ("The dog ate the apple".split(), ["DET", "NN", "V", "DET", "NN"]),
 53 |     ("Everybody read that book".split(), ["NN", "V", "DET", "NN"])
 54 | ]
 55 | 
 56 | word_to_ix = {}
 57 | for sent, tags in training_data:
 58 |     for word in sent:
 59 |         if word not in word_to_ix:
 60 |             word_to_ix[word] = len(word_to_ix)
 61 | 
 62 | tag_to_ix = {"DET": 0, "NN": 1, "V": 2}
 63 | 
 64 | #일반적으로 약 32~64 차원이지만, 값을 적게하여 학습이 진행 되면 값이 어떻게 보내는지 체크
 65 | EMBEDDING_DIM = 6
 66 | HIDDEN_DIM = 6
 67 | 
 68 | #Create the Model
 69 | class LSTMTagger(nn.Module):
 70 |     
 71 |     def __init__(self, embedding_dim, hidden_dim, vocab_size, tagset_size):
 72 |         super(LSTMTagger, self).__init__()
 73 |         self.hidden_dim = hidden_dim
 74 |         
 75 |         self.word_embeddings = nn.Embedding(vocab_size, embedding_dim)
 76 |         
 77 |         #LSTM -> input: word embeddings / output: hidden state / dim: hidden_dim
 78 |         self.lstm = nn.LSTM(embedding_dim, hidden_dim)
 79 |         
 80 |         #linear layer는 hidden에서 tag공간으로 변경
 81 |         self.hidden2tag = nn.Linear(hidden_dim, tagset_size)
 82 |         self.hidden = self.init_hidden()
 83 |         
 84 |     def init_hidden(self):
 85 |         # The axes semantics are (num_layers, mini_batch_size, hidden_dim)
 86 |         return (autograd.Variable(torch.zeros(1,1, self.hidden_dim)),
 87 |                 autograd.Variable(torch.zeros(1,1, self.hidden_dim)))
 88 |         
 89 |     def forward(self, sentence):
 90 |         embeds = self.word_embeddings(sentence)
 91 |         lstm_out, self.hidden = self.lstm(
 92 |                                           embeds.view(len(sentence), 1, -1), self.hidden)
 93 |         tag_space = self.hidden2tag(lstm_out.view(len(sentence), -1))
 94 |         tag_scores = F.log_softmax(tag_space)
 95 |         tag_scores = F.log_softmax(tag_space)
 96 |         return tag_scores
 97 |         
 98 | #Training Model
 99 | model = LSTMTagger(EMBEDDING_DIM, HIDDEN_DIM, len(word_to_ix), len(tag_to_ix))
100 | loss_function = nn.NLLLoss()
101 | optimizer = optim.SGD(model.parameters(), lr=0.1)
102 | 
103 | #학습 전에 성능을 확인해보자 - i: word / j: tag
104 | inputs = prepare_sequence(training_data[0][0], word_to_ix)
105 | tag_scores = model(inputs)
106 | print(tag_scores)
107 | 
108 | for epoch in range(300): #toy data이기 때문에 300번만 하는 것, 원래는 그 이상
109 |     for sentence, tags in training_data:
110 |         #Step1: Pytorch는 gradient를 중첩하는 방식이므로, 각각의 instance들을 명확히 해주는 작업이 필요.
111 |         model.zero_grad()
112 |         
113 |         #또한, hidden state LSTM을 명확히 해주는 것이 필요
114 |         #지난 history를 보유하고 있는 instance를 떼어 정보를 공유
115 |         model.hidden = model.init_hidden()
116 |         
117 |         #Step2: input에서 단어의 index형태로 변환시키는 작업
118 |         sentence_in = prepare_sequence(sentence, word_to_ix)
119 |         targets = prepare_sequence(tags, tag_to_ix)
120 |         
121 |         #Step3: Run our forward pass.
122 |         tag_scores = model(sentence_in)
123 |         
124 |         #Step4: Compare the loss, gradients, and update the param. by calling optimizer.step()
125 |         loss = loss_function(tag_scores, targets)
126 |         loss.backward()
127 |         optimizer.step()
128 |         
129 | #학습 후 점수 확인하기
130 | inputs = prepare_sequence(training_data[0][0], word_to_ix)
131 | tag_scores = model(inputs)
132 | print(tag_scores)
133 | #결과 값을 보면, 예측한 seq는 0 1 2 0 1 (가장 높은 수) 이다.
134 | #문장은 "the dog ate the apple."
135 | #확인해보면, DET, NOUN, VERB, DET, NOUN 이므로 정확한 문장
136 | 
137 |     
138 | 
139 | 


--------------------------------------------------------------------------------
/pytorch_basic/sec 6. Linear regression wih Python.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "code",
 5 |    "execution_count": 4,
 6 |    "metadata": {
 7 |     "collapsed": false
 8 |    },
 9 |    "outputs": [
10 |     {
11 |      "ename": "NameError",
12 |      "evalue": "name 'n' is not defined",
13 |      "output_type": "error",
14 |      "traceback": [
15 |       "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
16 |       "\u001b[0;31mNameError\u001b[0m                                 Traceback (most recent call last)",
17 |       "\u001b[0;32m<ipython-input-4-3659c6fa8f82>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[1;32m      2\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrandom\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mseed\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      3\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 4\u001b[0;31m \u001b[0mx\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrandom\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrand\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mn\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m      5\u001b[0m \u001b[0my\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mx\u001b[0m \u001b[0;34m**\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrandom\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrand\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mn\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      6\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
18 |       "\u001b[0;31mNameError\u001b[0m: name 'n' is not defined"
19 |      ]
20 |     }
21 |    ],
22 |    "source": [
23 |     "import numpy as np\n",
24 |     "np.random.seed(1)\n",
25 |     "\n",
26 |     "x = np.random.rand(n)\n",
27 |     "y = x ** np.random.rand(n)\n",
28 |     "\n",
29 |     "colors = np.random.rand()\n",
30 |     "plt.plot(np.unique(x), np.poly1d(np.ployfit(x,y,1))(np.unique(x)))\n",
31 |     "\n",
32 |     "plt.scatter(x,y, colors, alpha=0.5)\n",
33 |     "plt.show()"
34 |    ]
35 |   },
36 |   {
37 |    "cell_type": "code",
38 |    "execution_count": 5,
39 |    "metadata": {
40 |     "collapsed": true
41 |    },
42 |    "outputs": [],
43 |    "source": [
44 |     "# Linear Regression model by pytorch"
45 |    ]
46 |   }
47 |  ],
48 |  "metadata": {
49 |   "anaconda-cloud": {},
50 |   "kernelspec": {
51 |    "display_name": "Python [conda root]",
52 |    "language": "python",
53 |    "name": "conda-root-py"
54 |   },
55 |   "language_info": {
56 |    "codemirror_mode": {
57 |     "name": "ipython",
58 |     "version": 3
59 |    },
60 |    "file_extension": ".py",
61 |    "mimetype": "text/x-python",
62 |    "name": "python",
63 |    "nbconvert_exporter": "python",
64 |    "pygments_lexer": "ipython3",
65 |    "version": "3.5.2"
66 |   }
67 |  },
68 |  "nbformat": 4,
69 |  "nbformat_minor": 1
70 | }
71 | 


--------------------------------------------------------------------------------
/pytorch_basic/text_loader.py:
--------------------------------------------------------------------------------
 1 | # References
 2 | # https://github.com/yunjey/pytorch-tutorial/blob/master/tutorials/01-basics/pytorch_basics/main.py
 3 | # http://pytorch.org/tutorials/beginner/data_loading_tutorial.html#dataset-class
 4 | import gzip
 5 | from torch.utils.data import Dataset, DataLoader
 6 | 
 7 | 
 8 | class TextDataset(Dataset):
 9 |     # Initialize your data, download, etc.
10 | 
11 |     def __init__(self, filename="./data/shakespeare.txt.gz"):
12 |         self.len = 0
13 |         with gzip.open(filename, 'rt') as f:
14 |             self.targetLines = [x.strip() for x in f if x.strip()]
15 |             self.srcLines = [x.lower().replace(' ', '')
16 |                              for x in self.targetLines]
17 |             self.len = len(self.srcLines)
18 | 
19 |     def __getitem__(self, index):
20 |         return self.srcLines[index], self.targetLines[index]
21 | 
22 |     def __len__(self):
23 |         return self.len
24 | 
25 | 
26 | # Test the loader
27 | if __name__ == "__main__":
28 |     dataset = TextDataset()
29 |     train_loader = DataLoader(dataset=dataset,
30 |                               batch_size=3,
31 |                               shuffle=True,
32 |                               num_workers=2)
33 | 
34 |     for i, (src, target) in enumerate(train_loader):
35 |         print(i, "data", src)


--------------------------------------------------------------------------------