├── .DS_Store
├── .gitignore
├── Algorithm
├── .ipynb_checkpoints
│ ├── HelloCoding_Algo-checkpoint.ipynb
│ └── algo_basic-checkpoint.ipynb
├── 06-01_calc.py
├── Algo_basic.py
├── HelloCoding_Algo.ipynb
├── IsPrime.py
├── algo_basic.ipynb
├── binary_search_1.py
├── euler_prob1.py
├── graph_algo.py
└── selection_sort_2.py
├── Chatbot
├── .ipynb_checkpoints
│ ├── qa_chatbot-checkpoint.ipynb
│ └── seq2seq-checkpoint.ipynb
├── Slack_Bot
│ ├── .Rhistory
│ ├── __init__.py
│ ├── __pycache__
│ │ └── mcbot_chat.cpython-35.pyc
│ ├── data
│ │ ├── 29일간의실종.txt
│ │ ├── desktop.ini
│ │ ├── 게리온의무리들.txt
│ │ ├── 게임의종말.txt
│ │ ├── 경찰청사람들.txt
│ │ ├── 사랑과욕망의덫.txt
│ │ ├── 연산군.txt
│ │ ├── 욕망이타는숲.txt
│ │ ├── 위대한개츠비.txt
│ │ └── 황야의이리.txt
│ ├── lstm_bot.py
│ ├── markov-toji.json
│ ├── markov_chain_bot.py
│ ├── mcbot_chat.py
│ ├── modubot.py
│ ├── print_bot_id.py
│ ├── toji.model
│ ├── toji.wakati
│ └── toji2.json
├── __init__.py
├── __pycache__
│ └── helpers.cpython-36.pyc
├── helpers.py
├── seq2seq.ipynb
└── seq2seq_tut.py
├── Dataset
└── dataset.md
├── ETC
├── Lec1.txt
├── Lec4.txt
└── Subtitle_tools.ipynb
├── HTML
└── code_academy.html
├── Kaggle
├── Quora
│ └── Quora_shin.ipynb
└── What_Cooking
│ └── Cooking.ipynb
├── Keras_Basic
├── .ipynb_checkpoints
│ ├── Keras_tutorial_imdb_text_classification-checkpoint.ipynb
│ └── Keras_tutorial_text_generation-checkpoint.ipynb
├── Keras_Cheat_Sheet_Python.pdf
├── Keras_basic_fin.py
├── Keras_classification_basic.py
├── Keras_fine_tuning_basic.py
├── Keras_tutorial_imdb_text_classification.ipynb
└── Keras_tutorial_text_generation.ipynb
├── Machine_Comprehension
├── Attention_Keras
│ ├── .Rhistory
│ ├── Attention_Keras_QA_Model.py
│ ├── CNNQA_architecture.json
│ ├── CNNQA_weights.h5.7z
│ ├── Glove.py
│ ├── KerasQA.ods
│ └── embedding_data.h5
├── DMN_QA
│ ├── DynamicMemoryNetwork.pdf
│ ├── bAbi.pdf
│ ├── dataset
│ │ └── babi_tasks_1-20_v1-2.tar.gz
│ ├── image
│ │ ├── algo_process1.png
│ │ ├── algo_process2.png
│ │ ├── algo_process3.png
│ │ └── babi_dataset.png
│ └── qa_chatbot.ipynb
└── Readme_MC.md
├── Math_Stat
├── .Rhistory
├── .ipynb_checkpoints
│ ├── ML_Basic_Siraj Raval-checkpoint.ipynb
│ └── support_vector_machine_lesson-checkpoint.ipynb
├── ML_Basic_Siraj Raval.ipynb
├── Readme.md
├── data.csv
└── support_vector_machine_lesson.ipynb
├── Natural Language Generation
└── lstm_keras_generation.py
├── Python
├── .ipynb_checkpoints
│ ├── Python_Data_Science-Matplotlib-checkpoint.ipynb
│ ├── Python_Data_science_toolbox_part1-checkpoint.ipynb
│ └── Python_Review-checkpoint.ipynb
├── Cheat_Sheet.py
├── Decorator.py
├── OOP.ipynb
├── Python_Data_Science-Matplotlib.ipynb
├── Python_Data_science_toolbox_part1.ipynb
├── Python_Review.ipynb
├── Visualization
│ ├── .ipynb_checkpoints
│ │ └── Bokeh-checkpoint.ipynb
│ └── Bokeh.ipynb
└── attribute.py
├── Quora_insincere
├── .gitignore
├── README.md
├── jupyter_examples
│ ├── Data_Prepro.ipynb
│ ├── Modeling.ipynb
│ ├── data_preprocessing.py
│ ├── lstm_kernel_shin.ipynb
│ ├── lstm_kernel_simple.ipynb
│ ├── test_kernel1.ipynb
│ └── test_kernel_ryan.ipynb
└── lstm.py
├── Readme.md
├── Tensorflow
├── .gitignore
├── 04_word2vec_eager.py
├── Chatbot_Attention.ipynb
├── TF_README.md
├── nmt_with_attention.ipynb
├── nmt_with_attention_chatbot_kor.ipynb
├── standford_example
│ ├── 02_lazy_loading.py
│ ├── 02_placeholder.py
│ ├── 02_simple_tf.py
│ ├── 02_variables.py
│ ├── 03_linreg_dataset.py
│ ├── 03_linreg_placeholder.py
│ ├── 03_linreg_starter.py
│ ├── 03_logreg.py
│ ├── 03_logreg_placeholder.py
│ ├── 03_logreg_starter.py
│ ├── 04_linreg_eager.py
│ ├── 04_linreg_eager_starter.py
│ ├── 04_word2vec.py
│ ├── 04_word2vec_eager.py
│ ├── 04_word2vec_eager_starter.py
│ ├── 04_word2vec_visualize.py
│ ├── 05_randomization.py
│ ├── 05_variable_sharing.py
│ ├── 07_convnet_layers.py
│ ├── 07_convnet_mnist.py
│ ├── 07_convnet_mnist_starter.py
│ ├── 07_run_kernels.py
│ ├── 11_char_rnn.py
│ ├── kernels.py
│ └── word2vec_utils.py
└── tf_eagar(Define by run) 튜토리얼.ipynb
├── Text_Classification
├── .DS_Store
├── .ipynb_checkpoints
│ ├── Bag of Words Meets Bags of Popcorn-checkpoint.ipynb
│ ├── bagofwords_text_classficiation_kaggle-checkpoint.ipynb
│ ├── cnn_textclassification_keras-checkpoint.ipynb
│ └── seq2seq_keras-checkpoint.ipynb
├── To_Do
│ ├── cnn_keras_text_class_kor.py
│ ├── hierachical_attention_keras_text_class_eng.py
│ ├── lstm_keras_text_class_eng.py
│ └── rnn_attention_keras_text_class_eng .py
├── bagofwords_text_classficiation_kaggle.ipynb
├── char_text_classification_keras.py
├── cnn_keras_text_class_imdb.py
├── cnn_keras_text_class_imdb2.py
├── cnn_keras_text_class_imdb2_korean.py
├── cnn_keras_text_class_kaggle_eng.py
├── cnn_pytorch_text_class_kaggle_eng.py
├── nets
│ ├── __pycache__
│ │ └── text_cnn.cpython-35.pyc
│ └── text_cnn.py
└── seq2seq_keras.ipynb
├── VAE
└── vae_sample.py
├── dataset
└── .DS_Store
└── pytorch_basic
├── .ipynb_checkpoints
├── 10.pytorch_rnn-checkpoint.ipynb
├── Pytorch_basic-checkpoint.ipynb
└── sec 6. Linear regression wih Python-checkpoint.ipynb
├── 10.pytorch_rnn.ipynb
├── Pytorch Seq2Seq.ipynb
├── Pytorch.ipynb
├── Pytorch_basic.ipynb
├── Pytorch_mnist.ipynb
├── Start_Pytorch.ipynb
├── cnn_cifar10_pytorch.py
├── cnn_text_pytorch.py
├── pytorch_basic.py
├── pytorch_nlp.py
├── pytorch_nlp2.py
├── pytorch_nlp3.py
├── pytorch_seq2seq(LSTM).py
├── sec 6. Linear regression wih Python.ipynb
├── seq2seq_models.py
└── text_loader.py
/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rainmaker712/nlp_ryan/108ce890659ed29d4a143e41e5546f613aa878ca/.DS_Store
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # Created by .ignore support plugin (hsz.mobi)
2 | ### JetBrains template
3 | # Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio and Webstorm
4 | # Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839
5 |
6 | # User-specific stuff:
7 | .idea/**/workspace.xml
8 | .idea/**/tasks.xml
9 | .idea/dictionaries
10 |
11 | # Sensitive or high-churn files:
12 | .idea/**/dataSources/
13 | .idea/**/dataSources.ids
14 | .idea/**/dataSources.xml
15 | .idea/**/dataSources.local.xml
16 | .idea/**/sqlDataSources.xml
17 | .idea/**/dynamic.xml
18 | .idea/**/uiDesigner.xml
19 |
20 | # Gradle:
21 | .idea/**/gradle.xml
22 | .idea/**/libraries
23 |
24 | # CMake
25 | cmake-build-debug/
26 |
27 | # Mongo Explorer plugin:
28 | .idea/**/mongoSettings.xml
29 |
30 | ## File-based project format:
31 | *.iws
32 |
33 | ## Plugin-specific files:
34 |
35 | # IntelliJ
36 | out/
37 |
38 | # mpeltonen/sbt-idea plugin
39 | .idea_modules/
40 |
41 | # JIRA plugin
42 | atlassian-ide-plugin.xml
43 |
44 | # Cursive Clojure plugin
45 | .idea/replstate.xml
46 |
47 | # Crashlytics plugin (for Android Studio and IntelliJ)
48 | com_crashlytics_export_strings.xml
49 | crashlytics.properties
50 | crashlytics-build.properties
51 | fabric.properties
52 | ### Python template
53 | # Byte-compiled / optimized / DLL files
54 | __pycache__/
55 | *.py[cod]
56 | *$py.class
57 |
58 | # C extensions
59 | *.so
60 |
61 | # Distribution / packaging
62 | .Python
63 | build/
64 | develop-eggs/
65 | dist/
66 | downloads/
67 | eggs/
68 | .eggs/
69 | lib/
70 | lib64/
71 | parts/
72 | sdist/
73 | var/
74 | wheels/
75 | *.egg-info/
76 | .installed.cfg
77 | *.egg
78 | MANIFEST
79 |
80 | # PyInstaller
81 | # Usually these files are written by a python script from a template
82 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
83 | *.manifest
84 | *.spec
85 |
86 | # Installer logs
87 | pip-log.txt
88 | pip-delete-this-directory.txt
89 |
90 | # Unit test / coverage reports
91 | htmlcov/
92 | .tox/
93 | .coverage
94 | .coverage.*
95 | .cache
96 | nosetests.xml
97 | coverage.xml
98 | *.cover
99 | .hypothesis/
100 |
101 | # Translations
102 | *.mo
103 | *.pot
104 |
105 | # Django stuff:
106 | *.log
107 | .static_storage/
108 | .media/
109 | local_settings.py
110 |
111 | # Flask stuff:
112 | instance/
113 | .webassets-cache
114 |
115 | # Scrapy stuff:
116 | .scrapy
117 |
118 | # Sphinx documentation
119 | docs/_build/
120 |
121 | # PyBuilder
122 | target/
123 |
124 | # Jupyter Notebook
125 | .ipynb_checkpoints
126 |
127 | # pyenv
128 | .python-version
129 |
130 | # celery beat schedule file
131 | celerybeat-schedule
132 |
133 | # SageMath parsed files
134 | *.sage.py
135 |
136 | # Environments
137 | .env
138 | .venv
139 | env/
140 | venv/
141 | ENV/
142 | env.bak/
143 | venv.bak/
144 |
145 | # Spyder project settings
146 | .spyderproject
147 | .spyproject
148 |
149 | # Rope project settings
150 | .ropeproject
151 |
152 | # mkdocs documentation
153 | /site
154 |
155 | # mypy
156 | .mypy_cache/
157 | ### macOS template
158 | # General
159 | .DS_Store
160 | .AppleDouble
161 | .LSOverride
162 |
163 | # Icon must end with two \r
164 | Icon
165 |
166 | # Thumbnails
167 | ._*
168 |
169 | # Files that might appear in the root of a volume
170 | .DocumentRevisions-V100
171 | .fseventsd
172 | .Spotlight-V100
173 | .TemporaryItems
174 | .Trashes
175 | .VolumeIcon.icns
176 | .com.apple.timemachine.donotpresent
177 |
178 | # Directories potentially created on remote AFP share
179 | .AppleDB
180 | .AppleDesktop
181 | Network Trash Folder
182 | Temporary Items
183 | .apdisk
184 |
185 | .idea/
186 | data_in/
187 | data_out/*
188 | checkpoint/
189 | logs/
190 | OLD/
191 | practice/
192 | scala_data_pre/
193 | target/
194 | .vscode/
195 | .ipynb_checkpoints/
196 | .DS_Store
197 | .DS_Store*
198 | my_test_model/
199 | result/
200 | blog_origin.pkl
201 |
--------------------------------------------------------------------------------
/Algorithm/.ipynb_checkpoints/HelloCoding_Algo-checkpoint.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# 1.원래 저장위치에서 문자열을 역순으로 변환하기\n",
8 | "\n",
9 | "문자열 역순 변환\n",
10 | "- 문자열의 길이 알아내기\n",
11 | "- 문자열의 첫번째문자와 마지막 문자 교환\n",
12 | "- 문자열의 두번째 문자와 마지막 문자 -1을 서로 교환\n",
13 | "\n",
14 | "위의 절차를 반복 (유니코드 한글에 대한 주의)"
15 | ]
16 | },
17 | {
18 | "cell_type": "code",
19 | "execution_count": 1,
20 | "metadata": {
21 | "collapsed": true
22 | },
23 | "outputs": [],
24 | "source": [
25 | "#문자열 뒤집기 알고리즘\n",
26 | "\n",
27 | "#Sample String\n",
28 | "Sam_string = 'ABCD'"
29 | ]
30 | },
31 | {
32 | "cell_type": "code",
33 | "execution_count": 3,
34 | "metadata": {
35 | "collapsed": false
36 | },
37 | "outputs": [
38 | {
39 | "data": {
40 | "text/plain": [
41 | "'DCBA'"
42 | ]
43 | },
44 | "execution_count": 3,
45 | "metadata": {},
46 | "output_type": "execute_result"
47 | }
48 | ],
49 | "source": [
50 | "#Python은 심플하게 끝남\n",
51 | "\n",
52 | "def reverseString(str):\n",
53 | " return str[::-1]\n",
54 | "\n",
55 | "reverseString(Sam_string)"
56 | ]
57 | },
58 | {
59 | "cell_type": "markdown",
60 | "metadata": {},
61 | "source": [
62 | "# 스택 (접착지 메모)\n",
63 | "\n",
64 | "- 스택에는 푸시와 팝이라는 두가지 연산\n",
65 | "- 모든 함수 호출은 호출 스택을 사용\n",
66 | "- 호출 스택은 너무 켜져 메모리를 크게 소모 할 수 도 있음"
67 | ]
68 | },
69 | {
70 | "cell_type": "code",
71 | "execution_count": 9,
72 | "metadata": {
73 | "collapsed": false
74 | },
75 | "outputs": [
76 | {
77 | "data": {
78 | "text/plain": [
79 | "'D'"
80 | ]
81 | },
82 | "execution_count": 9,
83 | "metadata": {},
84 | "output_type": "execute_result"
85 | }
86 | ],
87 | "source": [
88 | "#Stack을 사용해 보자\n",
89 | "#Sam_string\n",
90 | "# 문자열의 길이 알아내기\n",
91 | "# 문자열의 첫번째문자와 마지막 문자 교환\n",
92 | "# 문자열의 두번째 문자와 마지막 문자 -1을 서로 교환\n",
93 | "\n",
94 | "len(Sam_string)\n",
95 | "\n",
96 | "Sam_string[0]\n",
97 | "Sam_string[-1]\n"
98 | ]
99 | },
100 | {
101 | "cell_type": "code",
102 | "execution_count": 16,
103 | "metadata": {
104 | "collapsed": false
105 | },
106 | "outputs": [],
107 | "source": [
108 | "def reverseString2(str):\n",
109 | " stack = []\n",
110 | " for ch in str:\n",
111 | " stack.append(ch)\n",
112 | " \n",
113 | " result = \"\"\n",
114 | " while len(stack) > 0:\n",
115 | " result += stack.pop()\n",
116 | " \n",
117 | " return result\n",
118 | "\n"
119 | ]
120 | },
121 | {
122 | "cell_type": "markdown",
123 | "metadata": {},
124 | "source": [
125 | "# 선택 정렬"
126 | ]
127 | },
128 | {
129 | "cell_type": "code",
130 | "execution_count": null,
131 | "metadata": {
132 | "collapsed": true
133 | },
134 | "outputs": [],
135 | "source": []
136 | }
137 | ],
138 | "metadata": {
139 | "anaconda-cloud": {},
140 | "kernelspec": {
141 | "display_name": "Python [conda root]",
142 | "language": "python",
143 | "name": "conda-root-py"
144 | },
145 | "language_info": {
146 | "codemirror_mode": {
147 | "name": "ipython",
148 | "version": 3
149 | },
150 | "file_extension": ".py",
151 | "mimetype": "text/x-python",
152 | "name": "python",
153 | "nbconvert_exporter": "python",
154 | "pygments_lexer": "ipython3",
155 | "version": "3.5.2"
156 | }
157 | },
158 | "nbformat": 4,
159 | "nbformat_minor": 1
160 | }
161 |
--------------------------------------------------------------------------------
/Algorithm/.ipynb_checkpoints/algo_basic-checkpoint.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# 1. Bubble Sort\n",
8 | "\n",
9 | "* performance: O(n^2)\n",
10 | "* space complexity O(1)\n",
11 | "\n",
12 | "Procedure: \n",
13 | "Loop1 \n",
14 | "6,5,3,1 / 5,6,3,1 / 5,3,6,1 /5,3,1,6 \n",
15 | "Loop2 \n",
16 | "3,5,1,6 / 3,1,5,6 / 3,1,5,6 \n",
17 | "Loop3 \n",
18 | "1,3,5,6 "
19 | ]
20 | },
21 | {
22 | "cell_type": "code",
23 | "execution_count": 2,
24 | "metadata": {
25 | "collapsed": false
26 | },
27 | "outputs": [
28 | {
29 | "name": "stdout",
30 | "output_type": "stream",
31 | "text": [
32 | "[1, 2, 3, 4, 5, 6]\n"
33 | ]
34 | }
35 | ],
36 | "source": [
37 | "import unittest\n",
38 | "\n",
39 | "def bubblesort(alist):\n",
40 | " for i in range(len(alist)-1):\n",
41 | " for j in range(len(alist)-1):\n",
42 | " if alist[j] > alist[j+1]:\n",
43 | " alist[j], alist[j+1] = alist[j+1], alist[j]\n",
44 | " return alist\n",
45 | " \n",
46 | "sort = [4,6,1,3,5,2]\n",
47 | "print(bubblesort(sort))\n",
48 | "\n",
49 | "class unit_test(unittest.TestCase):\n",
50 | " def test(self):\n",
51 | " self.assertEqual([1, 2, 3, 4, 5, 6], bubblesort([4, 6, 1, 3, 5, 2]))\n",
52 | " self.assertEqual([1, 2, 3, 4, 5, 6], bubblesort([6, 4, 3, 1, 2, 5]))\n",
53 | " self.assertEqual([1, 2, 3, 4, 5, 6], bubblesort([6, 5, 4, 3, 2, 1]))"
54 | ]
55 | },
56 | {
57 | "cell_type": "markdown",
58 | "metadata": {},
59 | "source": [
60 | "# 2. Selection Sort\n",
61 | "\n",
62 | "* Worst case performance: O(n^2)\n",
63 | "* Best Case perform: O(n^2)\n",
64 | "* Avg. Case perform: O(n^2)\n",
65 | "* Worst case space complexity: O(n) total, O(1) auxilary\n",
66 | "\n",
67 | "Procedure: \n",
68 | "4,6,1,3,5,2 \n",
69 | "Min: 4 (첫 번째 포인트) \n",
70 | "4,6,1,3,5,2 \n",
71 | "Min: 1 \n",
72 | "\n",
73 | "1,6,4,3,5,2 \n",
74 | "Min: 6 (두 번째 포인트) \n",
75 | "1,6,4,3,5,2 \n",
76 | "Min: 2 \n",
77 | "\n",
78 | "1,2,4,3,5,6 \n",
79 | "Min: 4 (세 번째 포인트) "
80 | ]
81 | },
82 | {
83 | "cell_type": "markdown",
84 | "metadata": {},
85 | "source": [
86 | "# 3. Insertion Sort\n",
87 | "\n",
88 | "쉽지만 성능이 낮음\n",
89 | "\n",
90 | "Procedure: \n",
91 | "4,6,1,3,5,2 \n",
92 | "\n",
93 | "4,6 \n",
94 | "4,1,6 \n",
95 | "1,4,6 \n",
96 | "\n",
97 | "1,4,6,3 \n",
98 | "1,4,3,6 \n",
99 | "1,3,4,6 \n"
100 | ]
101 | },
102 | {
103 | "cell_type": "markdown",
104 | "metadata": {},
105 | "source": [
106 | "# 4. Merge Sort\n",
107 | "\n",
108 | "Perform: O(nlogn)\n",
109 | "space complex: O(n)\n",
110 | "\n",
111 | "1. 정렬되지 않은 리스트를 지속적으로 쪼갠다\n",
112 | "2. 정렬된 아이템과 병합한다.\n",
113 | "\n",
114 | "Procedure: \n",
115 | "6,2,4,1,3,6,5,8\n",
116 | "\n",
117 | "Step1: \n",
118 | "6,2,4,1 / 3,7,5,8 \n",
119 | "* 6,2,4,1\n",
120 | "\n",
121 | "6,2 / 4,1 \n",
122 | "6|2 / 4|1\n",
123 | "\n",
124 | "Step2:\n",
125 | "2|6 -> 2,6 -> 1|4 -> 1,4\n",
126 | "2,6 / 1,4 -> 1,2,4,6\n",
127 | "\n",
128 | "Step3:\n",
129 | "3/7 / 5/8\n",
130 | "\n"
131 | ]
132 | }
133 | ],
134 | "metadata": {
135 | "anaconda-cloud": {},
136 | "kernelspec": {
137 | "display_name": "Python [conda root]",
138 | "language": "python",
139 | "name": "conda-root-py"
140 | },
141 | "language_info": {
142 | "codemirror_mode": {
143 | "name": "ipython",
144 | "version": 3
145 | },
146 | "file_extension": ".py",
147 | "mimetype": "text/x-python",
148 | "name": "python",
149 | "nbconvert_exporter": "python",
150 | "pygments_lexer": "ipython3",
151 | "version": "3.5.2"
152 | }
153 | },
154 | "nbformat": 4,
155 | "nbformat_minor": 1
156 | }
157 |
--------------------------------------------------------------------------------
/Algorithm/06-01_calc.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # -*- coding: utf-8 -*-
3 | """
4 | Created on Sat Jul 29 16:01:08 2017
5 |
6 | @author: ryan
7 |
8 | #탑코더 06 알고리즘
9 | """
10 |
11 | A = []
12 | B = []
13 | C = []
14 |
15 | n = 5
16 | m = 5
17 |
18 | def calc(n, m):
19 | A
20 | for i in range(n):
21 | B
22 | i += 1
23 | print("count {} times B called".format(i))
24 | for j in range(m):
25 | C
26 | j += 1
27 | print("count {} times C called".format(j))
28 |
29 |
30 | calc(n, m)
31 |
32 | #계산량은 O(nm)
33 | #Top Coder 에서는 10^7 까지는 괜찮지만 10^8을 넘으면 안됨
34 |
--------------------------------------------------------------------------------
/Algorithm/Algo_basic.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # -*- coding: utf-8 -*-
3 | """
4 | Created on Mon Jun 12 00:08:58 2017
5 |
6 | @author: ryan
7 | """
8 |
9 | """
10 | Bubble Sort
11 | performance: O(n^2)
12 | space complexity O(1)
13 |
14 | Procedure:
15 | Loop1
16 | 6,5,3,1 / 5,6,3,1 / 5,3,6,1 /5,3,1,6
17 | Loop2
18 | 3,5,1,6 / 3,1,5,6 / 3,1,5,6
19 | Loop3
20 | 1,3,5,6
21 | """
22 |
23 | import unittest
24 |
25 | def bubblesort(alist):
26 | for i in range(len(alist)-1):
27 | for j in range(len(alist)-1):
28 | if alist[j] > alist[j+1]:
29 | alist[j], alist[j+1] = alist[j+1], alist[j]
30 | return alist
31 |
32 | sort = [4,6,1,3,5,2]
33 | bubblesort(sort)
34 |
35 | class unit_test(unittest.TestCase):
36 | def test(self):
37 | self.assertEqual([1, 2, 3, 4, 5, 6], bubblesort([4, 6, 1, 3, 5, 2]))
38 | self.assertEqual([1, 2, 3, 4, 5, 6], bubblesort([6, 4, 3, 1, 2, 5]))
39 | self.assertEqual([1, 2, 3, 4, 5, 6], bubblesort([6, 5, 4, 3, 2, 1]))
40 |
41 |
42 |
--------------------------------------------------------------------------------
/Algorithm/HelloCoding_Algo.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# 1.원래 저장위치에서 문자열을 역순으로 변환하기\n",
8 | "\n",
9 | "문자열 역순 변환\n",
10 | "- 문자열의 길이 알아내기\n",
11 | "- 문자열의 첫번째문자와 마지막 문자 교환\n",
12 | "- 문자열의 두번째 문자와 마지막 문자 -1을 서로 교환\n",
13 | "\n",
14 | "위의 절차를 반복 (유니코드 한글에 대한 주의)"
15 | ]
16 | },
17 | {
18 | "cell_type": "code",
19 | "execution_count": 1,
20 | "metadata": {
21 | "collapsed": true
22 | },
23 | "outputs": [],
24 | "source": [
25 | "#문자열 뒤집기 알고리즘\n",
26 | "\n",
27 | "#Sample String\n",
28 | "Sam_string = 'ABCD'"
29 | ]
30 | },
31 | {
32 | "cell_type": "code",
33 | "execution_count": 3,
34 | "metadata": {
35 | "collapsed": false
36 | },
37 | "outputs": [
38 | {
39 | "data": {
40 | "text/plain": [
41 | "'DCBA'"
42 | ]
43 | },
44 | "execution_count": 3,
45 | "metadata": {},
46 | "output_type": "execute_result"
47 | }
48 | ],
49 | "source": [
50 | "#Python은 심플하게 끝남\n",
51 | "\n",
52 | "def reverseString(str):\n",
53 | " return str[::-1]\n",
54 | "\n",
55 | "reverseString(Sam_string)"
56 | ]
57 | },
58 | {
59 | "cell_type": "markdown",
60 | "metadata": {},
61 | "source": [
62 | "# 스택 (접착지 메모)\n",
63 | "\n",
64 | "- 스택에는 푸시와 팝이라는 두가지 연산\n",
65 | "- 모든 함수 호출은 호출 스택을 사용\n",
66 | "- 호출 스택은 너무 켜져 메모리를 크게 소모 할 수 도 있음"
67 | ]
68 | },
69 | {
70 | "cell_type": "code",
71 | "execution_count": 9,
72 | "metadata": {
73 | "collapsed": false
74 | },
75 | "outputs": [
76 | {
77 | "data": {
78 | "text/plain": [
79 | "'D'"
80 | ]
81 | },
82 | "execution_count": 9,
83 | "metadata": {},
84 | "output_type": "execute_result"
85 | }
86 | ],
87 | "source": [
88 | "#Stack을 사용해 보자\n",
89 | "#Sam_string\n",
90 | "# 문자열의 길이 알아내기\n",
91 | "# 문자열의 첫번째문자와 마지막 문자 교환\n",
92 | "# 문자열의 두번째 문자와 마지막 문자 -1을 서로 교환\n",
93 | "\n",
94 | "len(Sam_string)\n",
95 | "\n",
96 | "Sam_string[0]\n",
97 | "Sam_string[-1]\n"
98 | ]
99 | },
100 | {
101 | "cell_type": "code",
102 | "execution_count": 16,
103 | "metadata": {
104 | "collapsed": false
105 | },
106 | "outputs": [],
107 | "source": [
108 | "def reverseString2(str):\n",
109 | " stack = []\n",
110 | " for ch in str:\n",
111 | " stack.append(ch)\n",
112 | " \n",
113 | " result = \"\"\n",
114 | " while len(stack) > 0:\n",
115 | " result += stack.pop()\n",
116 | " \n",
117 | " return result\n",
118 | "\n"
119 | ]
120 | },
121 | {
122 | "cell_type": "markdown",
123 | "metadata": {},
124 | "source": [
125 | "# 선택 정렬"
126 | ]
127 | },
128 | {
129 | "cell_type": "code",
130 | "execution_count": null,
131 | "metadata": {
132 | "collapsed": true
133 | },
134 | "outputs": [],
135 | "source": []
136 | },
137 | {
138 | "cell_type": "markdown",
139 | "metadata": {},
140 | "source": [
141 | "## 1.1 문자열에 포함된 문자들이 전부 유일한지 검사 하는 알고리즘\n",
142 | "\n",
143 | "https://www.youtube.com/watch?v=piDwgBqmqKM&list=PLVNY1HnUlO24RlncfRjfoZHnD0YWVsvhq"
144 | ]
145 | },
146 | {
147 | "cell_type": "code",
148 | "execution_count": 1,
149 | "metadata": {
150 | "collapsed": true
151 | },
152 | "outputs": [],
153 | "source": [
154 | "test1 = 'ABCD' #True\n",
155 | "test2 = 'ABAD' #False"
156 | ]
157 | },
158 | {
159 | "cell_type": "code",
160 | "execution_count": null,
161 | "metadata": {
162 | "collapsed": true
163 | },
164 | "outputs": [],
165 | "source": [
166 | "fkdls;s;skfkfld;sfksld;sjflldldls;a'fkdls;dldldldldls;s;dldlfjdk'"
167 | ]
168 | }
169 | ],
170 | "metadata": {
171 | "anaconda-cloud": {},
172 | "kernelspec": {
173 | "display_name": "Python [conda root]",
174 | "language": "python",
175 | "name": "conda-root-py"
176 | },
177 | "language_info": {
178 | "codemirror_mode": {
179 | "name": "ipython",
180 | "version": 3
181 | },
182 | "file_extension": ".py",
183 | "mimetype": "text/x-python",
184 | "name": "python",
185 | "nbconvert_exporter": "python",
186 | "pygments_lexer": "ipython3",
187 | "version": "3.5.2"
188 | }
189 | },
190 | "nbformat": 4,
191 | "nbformat_minor": 1
192 | }
193 |
--------------------------------------------------------------------------------
/Algorithm/IsPrime.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # -*- coding: utf-8 -*-
3 | """
4 | Created on Sun Jun 18 00:22:29 2017
5 |
6 | @author: ryan
7 | """
8 |
9 | #Check whether Prime number or not
10 |
11 | def isPrime(num):
12 | if num > 0:
13 |
14 | if (num % 2) != 0:
15 | print("{} is prime num".format(num))
16 | else:
17 | print("{} is not prime num".format(num))
18 |
19 | else:
20 | print("input value must be greater than zero")
21 |
22 | a = -3
23 |
24 | isPrime(a)
--------------------------------------------------------------------------------
/Algorithm/algo_basic.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# 1. Bubble Sort\n",
8 | "\n",
9 | "* performance: O(n^2)\n",
10 | "* space complexity O(1)\n",
11 | "\n",
12 | "Procedure: \n",
13 | "Loop1 \n",
14 | "6,5,3,1 / 5,6,3,1 / 5,3,6,1 /5,3,1,6 \n",
15 | "Loop2 \n",
16 | "3,5,1,6 / 3,1,5,6 / 3,1,5,6 \n",
17 | "Loop3 \n",
18 | "1,3,5,6 "
19 | ]
20 | },
21 | {
22 | "cell_type": "code",
23 | "execution_count": 2,
24 | "metadata": {
25 | "collapsed": false
26 | },
27 | "outputs": [
28 | {
29 | "name": "stdout",
30 | "output_type": "stream",
31 | "text": [
32 | "[1, 2, 3, 4, 5, 6]\n"
33 | ]
34 | }
35 | ],
36 | "source": [
37 | "import unittest\n",
38 | "\n",
39 | "def bubblesort(alist):\n",
40 | " for i in range(len(alist)-1):\n",
41 | " for j in range(len(alist)-1):\n",
42 | " if alist[j] > alist[j+1]:\n",
43 | " alist[j], alist[j+1] = alist[j+1], alist[j]\n",
44 | " return alist\n",
45 | " \n",
46 | "sort = [4,6,1,3,5,2]\n",
47 | "print(bubblesort(sort))\n",
48 | "\n",
49 | "class unit_test(unittest.TestCase):\n",
50 | " def test(self):\n",
51 | " self.assertEqual([1, 2, 3, 4, 5, 6], bubblesort([4, 6, 1, 3, 5, 2]))\n",
52 | " self.assertEqual([1, 2, 3, 4, 5, 6], bubblesort([6, 4, 3, 1, 2, 5]))\n",
53 | " self.assertEqual([1, 2, 3, 4, 5, 6], bubblesort([6, 5, 4, 3, 2, 1]))"
54 | ]
55 | },
56 | {
57 | "cell_type": "markdown",
58 | "metadata": {},
59 | "source": [
60 | "# 2. Selection Sort\n",
61 | "\n",
62 | "* Worst case performance: O(n^2)\n",
63 | "* Best Case perform: O(n^2)\n",
64 | "* Avg. Case perform: O(n^2)\n",
65 | "* Worst case space complexity: O(n) total, O(1) auxilary\n",
66 | "\n",
67 | "Procedure: \n",
68 | "4,6,1,3,5,2 \n",
69 | "Min: 4 (첫 번째 포인트) \n",
70 | "4,6,1,3,5,2 \n",
71 | "Min: 1 \n",
72 | "\n",
73 | "1,6,4,3,5,2 \n",
74 | "Min: 6 (두 번째 포인트) \n",
75 | "1,6,4,3,5,2 \n",
76 | "Min: 2 \n",
77 | "\n",
78 | "1,2,4,3,5,6 \n",
79 | "Min: 4 (세 번째 포인트) "
80 | ]
81 | },
82 | {
83 | "cell_type": "markdown",
84 | "metadata": {},
85 | "source": [
86 | "# 3. Insertion Sort\n",
87 | "\n",
88 | "쉽지만 성능이 낮음\n",
89 | "\n",
90 | "Procedure: \n",
91 | "4,6,1,3,5,2 \n",
92 | "\n",
93 | "4,6 \n",
94 | "4,1,6 \n",
95 | "1,4,6 \n",
96 | "\n",
97 | "1,4,6,3 \n",
98 | "1,4,3,6 \n",
99 | "1,3,4,6 \n"
100 | ]
101 | },
102 | {
103 | "cell_type": "markdown",
104 | "metadata": {},
105 | "source": [
106 | "# 4. Merge Sort\n",
107 | "\n",
108 | "Perform: O(nlogn)\n",
109 | "space complex: O(n)\n",
110 | "\n",
111 | "1. 정렬되지 않은 리스트를 지속적으로 쪼갠다\n",
112 | "2. 정렬된 아이템과 병합한다.\n",
113 | "\n",
114 | "Procedure: \n",
115 | "6,2,4,1,3,6,5,8\n",
116 | "\n",
117 | "Step1: \n",
118 | "6,2,4,1 / 3,7,5,8 \n",
119 | "* 6,2,4,1\n",
120 | "\n",
121 | "6,2 / 4,1 \n",
122 | "6|2 / 4|1\n",
123 | "\n",
124 | "Step2: \n",
125 | "2|6 -> 2,6 -> 1|4 -> 1,4 \n",
126 | "2,6 / 1,4 -> 1,2,4,6\n",
127 | "\n",
128 | "Step3: \n",
129 | "3|7 / 5|8\n",
130 | "\n",
131 | "Step4: \n",
132 | "3|7 -> 3,7 -> 5|8 -> 5,8 \n",
133 | "3,7 / 5,8 -> 3,5,7,8\n",
134 | "\n",
135 | "Step5: \n",
136 | "1,2,4,6 / 3,5,7,8 -> 1,2,3,4,5,6,7,8\n",
137 | "\n",
138 | "\n",
139 | "\n"
140 | ]
141 | },
142 | {
143 | "cell_type": "code",
144 | "execution_count": null,
145 | "metadata": {
146 | "collapsed": true
147 | },
148 | "outputs": [],
149 | "source": []
150 | }
151 | ],
152 | "metadata": {
153 | "anaconda-cloud": {},
154 | "kernelspec": {
155 | "display_name": "Python [conda root]",
156 | "language": "python",
157 | "name": "conda-root-py"
158 | },
159 | "language_info": {
160 | "codemirror_mode": {
161 | "name": "ipython",
162 | "version": 3
163 | },
164 | "file_extension": ".py",
165 | "mimetype": "text/x-python",
166 | "name": "python",
167 | "nbconvert_exporter": "python",
168 | "pygments_lexer": "ipython3",
169 | "version": "3.5.2"
170 | }
171 | },
172 | "nbformat": 4,
173 | "nbformat_minor": 1
174 | }
175 |
--------------------------------------------------------------------------------
/Algorithm/binary_search_1.py:
--------------------------------------------------------------------------------
1 | #Binary search
2 |
3 | alist = [2,4,6,8,10,14,21]
4 |
5 | exp_val = 14
6 |
7 | #hint1
8 | low = 0
9 | high = len(alist) - 1
10 | mid = (low + high) // 2
11 | guess = alist[mid]
12 |
13 | #hint2
14 | if guess < item:
15 | low = mid + 1
16 |
17 | #hint3: cannot solve
18 | def binarySearch(list, item):
19 | low = 0
20 | high = len(list) - 1
21 |
22 | while low <= high: # Key point
23 | mid = (low + high) // 2
24 | guess = list[mid]
25 | if guess == item:
26 | return mid
27 | elif guess > item:
28 | high = mid - 1
29 | else:
30 | low = mid + 1
31 | return None
32 |
33 | binarySearch(alist, exp_val)
34 |
35 |
36 |
37 | #my code
38 | i = 0
39 | while i > 100:
40 | i += 1
41 | if guess < exp_val:
42 | mid = (mid + high) // 2
43 | guess = alist[mid]
44 | elif guess > exp_val:
45 | mid = (mid + low) // 2
46 | guess = alist[mid]
47 | else:
48 | print("Value Location {}, Value {}".format(mid, guess))
49 | break
50 |
51 |
52 |
53 |
--------------------------------------------------------------------------------
/Algorithm/euler_prob1.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rainmaker712/nlp_ryan/108ce890659ed29d4a143e41e5546f613aa878ca/Algorithm/euler_prob1.py
--------------------------------------------------------------------------------
/Algorithm/graph_algo.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rainmaker712/nlp_ryan/108ce890659ed29d4a143e41e5546f613aa878ca/Algorithm/graph_algo.py
--------------------------------------------------------------------------------
/Algorithm/selection_sort_2.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # -*- coding: utf-8 -*-
3 | """
4 | Created on Sun Jun 25 22:49:03 2017
5 |
6 | @author: ryan
7 | """
8 |
9 |
--------------------------------------------------------------------------------
/Chatbot/.ipynb_checkpoints/seq2seq-checkpoint.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [],
3 | "metadata": {},
4 | "nbformat": 4,
5 | "nbformat_minor": 2
6 | }
7 |
--------------------------------------------------------------------------------
/Chatbot/Slack_Bot/.Rhistory:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rainmaker712/nlp_ryan/108ce890659ed29d4a143e41e5546f613aa878ca/Chatbot/Slack_Bot/.Rhistory
--------------------------------------------------------------------------------
/Chatbot/Slack_Bot/__init__.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # -*- coding: utf-8 -*-
--------------------------------------------------------------------------------
/Chatbot/Slack_Bot/__pycache__/mcbot_chat.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rainmaker712/nlp_ryan/108ce890659ed29d4a143e41e5546f613aa878ca/Chatbot/Slack_Bot/__pycache__/mcbot_chat.cpython-35.pyc
--------------------------------------------------------------------------------
/Chatbot/Slack_Bot/data/desktop.ini:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rainmaker712/nlp_ryan/108ce890659ed29d4a143e41e5546f613aa878ca/Chatbot/Slack_Bot/data/desktop.ini
--------------------------------------------------------------------------------
/Chatbot/Slack_Bot/lstm_bot.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # -*- coding: utf-8 -*-
3 | """
4 | Created on Sun Jun 25 20:02:25 2017
5 | 소설 사이트 다운로드:http://blog.naver.com/PostView.nhn?blogId=dmswjd5366&logNo=220010721513
6 | @author: ryan
7 | """
8 |
9 | import codecs
10 | from bs4 import BeautifulSoup
11 | from keras.models import Sequential
12 | from keras.layers import Dense, Activation, Dropout
13 | from keras.layers import LSTM
14 | from keras.optimizers import RMSprop
15 | from keras.utils.data_utils import get_file
16 | import numpy as np
17 | import random, sys
18 |
19 | import os
20 | import pandas as pd
21 | import json
22 |
23 | #import chardet
24 |
25 | #data = pd.read_csv("/home/ryan/nlp_ryan/Chatbot/Slack_Bot/data/toji1.txt", "r", encoding="utf-8")
26 | fp = codecs.open("/home/ryan/nlp_ryan/Chatbot/Slack_Bot/data/toji1.txt", "r", encoding="utf-8")
27 | #soup = BeautifulSoup(fp, "html.parser")
28 | #body = soup.select_one("body")
29 | #text = body.getText() + " "
30 |
31 | print('코퍼스의 길이: ', len(dic))
32 | # 문자를 하나하나 읽어 들이고 ID 붙이기
33 | chars = sorted(list(set(text)))
34 | print('사용되고 있는 문자의 수:', len(chars))
35 | char_indices = dict((c, i) for i, c in enumerate(chars)) # 문자 → ID
36 | indices_char = dict((i, c) for i, c in enumerate(chars)) # ID → 문자
37 | # 텍스트를 maxlen개의 문자로 자르고 다음에 오는 문자 등록하기
38 | maxlen = 20
39 | step = 3
40 | sentences = []
41 | next_chars = []
42 | for i in range(0, len(text) - maxlen, step):
43 | sentences.append(text[i: i + maxlen])
44 | next_chars.append(text[i + maxlen])
45 | print('학습할 구문의 수:', len(sentences))
46 | print('텍스트를 ID 벡터로 변환합니다...')
47 | X = np.zeros((len(sentences), maxlen, len(chars)), dtype=np.bool)
48 | y = np.zeros((len(sentences), len(chars)), dtype=np.bool)
49 | for i, sentence in enumerate(sentences):
50 | for t, char in enumerate(sentence):
51 | X[i, t, char_indices[char]] = 1
52 | y[i, char_indices[next_chars[i]]] = 1
53 | # 모델 구축하기(LSTM)
54 | print('모델을 구축합니다...')
55 | model = Sequential()
56 | model.add(LSTM(128, input_shape=(maxlen, len(chars))))
57 | model.add(Dense(len(chars)))
58 | model.add(Activation('softmax'))
59 | optimizer = RMSprop(lr=0.01)
60 | model.compile(loss='categorical_crossentropy', optimizer=optimizer)
61 | # 후보를 배열에서 꺼내기
62 | def sample(preds, temperature=1.0):
63 | preds = np.asarray(preds).astype('float64')
64 | preds = np.log(preds) / temperature
65 | exp_preds = np.exp(preds)
66 | preds = exp_preds / np.sum(exp_preds)
67 | probas = np.random.multinomial(1, preds, 1)
68 | return np.argmax(probas)
69 | # 학습시키고 텍스트 생성하기 반복
70 | for iteration in range(1, 60):
71 | print()
72 | print('-' * 50)
73 | print('반복 =', iteration)
74 | model.fit(X, y, batch_size=128, nb_epoch=1) #
75 | # 임의의 시작 텍스트 선택하기
76 | start_index = random.randint(0, len(text) - maxlen - 1)
77 | # 다양한 다양성의 문장 생성
78 | for diversity in [0.2, 0.5, 1.0, 1.2]:
79 | print()
80 | print('--- 다양성 = ', diversity)
81 | generated = ''
82 | sentence = text[start_index: start_index + maxlen]
83 | generated += sentence
84 | print('--- 시드 = "' + sentence + '"')
85 | sys.stdout.write(generated)
86 | # 시드를 기반으로 텍스트 자동 생성
87 | for i in range(400):
88 | x = np.zeros((1, maxlen, len(chars)))
89 | for t, char in enumerate(sentence):
90 | x[0, t, char_indices[char]] = 1.
91 | # 다음에 올 문자를 예측하기
92 | preds = model.predict(x, verbose=0)[0]
93 | next_index = sample(preds, diversity)
94 | next_char = indices_char[next_index]
95 | # 출력하기
96 | generated += next_char
97 | sentence = sentence[1:] + next_char
98 | sys.stdout.write(next_char)
99 | sys.stdout.flush()
100 | print()
--------------------------------------------------------------------------------
/Chatbot/Slack_Bot/markov_chain_bot.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # -*- coding: utf-8 -*-
3 | """
4 | Created on Sun Jun 25 20:02:25 2017
5 |
6 | @author: ryan
7 | """
8 |
9 | import codecs
10 | from bs4 import BeautifulSoup
11 | from konlpy.tag import Twitter
12 | import urllib.request
13 | import os, re, json, random
14 | # 마르코프 체인 딕셔너리 만들기 --- (※1)
15 | def make_dic(words):
16 | tmp = ["@"]
17 | dic = {}
18 | for word in words:
19 | tmp.append(word)
20 | if len(tmp) < 3: continue
21 | if len(tmp) > 3: tmp = tmp[1:]
22 | set_word3(dic, tmp)
23 | if word == ".":
24 | tmp = ["@"]
25 | continue
26 | return dic
27 | # 딕셔너리에 데이터 등록하기 --- (※2)
28 | def set_word3(dic, s3):
29 | w1, w2, w3 = s3
30 | if not w1 in dic: dic[w1] = {}
31 | if not w2 in dic[w1]: dic[w1][w2] = {}
32 | if not w3 in dic[w1][w2]: dic[w1][w2][w3] = 0
33 | dic[w1][w2][w3] += 1
34 |
35 | # 문장 만들기 --- (※3)
36 | def make_sentence(dic):
37 | ret = []
38 | if not "@" in dic: return "no dic"
39 | top = dic["@"]
40 | w1 = word_choice(top)
41 | w2 = word_choice(top[w1])
42 | ret.append(w1)
43 | ret.append(w2)
44 | while True:
45 | w3 = word_choice(dic[w1][w2])
46 | ret.append(w3)
47 | if w3 == ".": break
48 | w1, w2 = w2, w3
49 | ret = "".join(ret)
50 | # 띄어쓰기
51 | params = urllib.parse.urlencode({
52 | "_callback": "",
53 | "q": ret
54 | })
55 | # 네이버 맞춤법 검사기를 사용합니다.
56 | data = urllib.request.urlopen("https://m.search.naver.com/p/csearch/dcontent/spellchecker.nhn?" + params)
57 | data = data.read().decode("utf-8")[1:-2]
58 | data = json.loads(data)
59 | data = data["message"]["result"]["html"]
60 | #data = soup = BeautifulSoup(data, "html.parser").getText()
61 | data = BeautifulSoup(data, "html.parser").getText()
62 |
63 | # 리턴
64 | return data
65 |
66 | def word_choice(sel):
67 | keys = sel.keys()
68 | return random.choice(list(keys))
69 |
70 | # 문장 읽어 들이기 --- (※4)
71 | toji_file = "toji.txt"
72 | dict_file = "/home/ryan/nlp_ryan/Chatbot/Slack_Bot/markov-toji.json"
73 |
74 | if not os.path.exists(dict_file):
75 | # 토지 텍스트 파일 읽어 들이기
76 | fp = codecs.open("BEXX0003.txt", "r", encoding="utf-16")
77 | soup = BeautifulSoup(fp, "html.parser")
78 | body = soup.select_one("body > text")
79 | text = body.getText()
80 | text = text.replace("…", "") # 현재 koNLPy가 …을 구두점으로 잡지 못하는 문제 임시 해결
81 | # 형태소 분석
82 | twitter = Twitter()
83 | malist = twitter.pos(text, norm=True)
84 | words = []
85 | for word in malist:
86 | # 구두점 등은 대상에서 제외(단 마침표는 포함)
87 | if not word[1] in ["Punctuation"]:
88 | words.append(word[0])
89 | if word[0] == ".":
90 | words.append(word[0])
91 | # 딕셔너리 생성
92 | dic = make_dic(words)
93 | json.dump(dic, open(dict_file,"w", encoding="utf-8"))
94 | else:
95 | dic = json.load(open(dict_file,"r"))
96 | # 문장 만들기 --- (※6)
97 |
98 |
99 |
100 | for i in range(3):
101 | s = make_sentence(dic)
102 | print(s)
103 | print("---")
--------------------------------------------------------------------------------
/Chatbot/Slack_Bot/mcbot_chat.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # -*- coding: utf-8 -*-
3 | """
4 | Created on Sun Jun 25 20:02:25 2017
5 |
6 | @author: ryan
7 | """
8 |
9 | import codecs
10 | from bs4 import BeautifulSoup
11 | from konlpy.tag import Twitter
12 | import urllib.request
13 | import os, re, json, random
14 |
15 | # 문장 읽어 들이기 --- (※4)
16 | dict_file = "/home/ryan/nlp_ryan/Chatbot/Slack_Bot/markov-toji.json"
17 | dic = json.load(open(dict_file,"r"))
18 |
19 | # 문장 만들기 --- (※3)
20 | def make_sentence(dic):
21 | ret = []
22 | if not "@" in dic: return "no dic"
23 | top = dic["@"]
24 | w1 = word_choice(top)
25 | w2 = word_choice(top[w1])
26 | ret.append(w1)
27 | ret.append(w2)
28 | while True:
29 | w3 = word_choice(dic[w1][w2])
30 | ret.append(w3)
31 | if w3 == ".": break
32 | w1, w2 = w2, w3
33 | ret = "".join(ret)
34 | # 띄어쓰기
35 | params = urllib.parse.urlencode({
36 | "_callback": "",
37 | "q": ret
38 | })
39 | # 네이버 맞춤법 검사기를 사용합니다.
40 | data = urllib.request.urlopen("https://m.search.naver.com/p/csearch/dcontent/spellchecker.nhn?" + params)
41 | data = data.read().decode("utf-8")[1:-2]
42 | data = json.loads(data)
43 | data = data["message"]["result"]["html"]
44 | #data = soup = BeautifulSoup(data, "html.parser").getText()
45 | data = BeautifulSoup(data, "html.parser").getText()
46 |
47 | # 리턴
48 | return data
49 |
50 | def word_choice(sel):
51 | keys = sel.keys()
52 | return random.choice(list(keys))
--------------------------------------------------------------------------------
/Chatbot/Slack_Bot/modubot.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # -*- coding: utf-8 -*-
3 | """
4 | Created on Sun Jun 25 15:35:47 2017
5 |
6 | modu-deepnlp
7 | modubot
8 |
9 | http://www.usefulparadigm.com/2016/04/06/creating-a-slack-bot-with-aws-lambda-and-api-gateway/
10 | https://www.fullstackpython.com/blog/build-first-slack-bot-python.html
11 |
12 | @author: ryan
13 | https://hooks.slack.com/services/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
14 |
15 | """
16 |
17 | import sys
18 | sys.path.append('/home/ryan/nlp_ryan/Chatbot/Slack_Bot')
19 | from mcbot_chat import make_sentence
20 | import os, re, json, random
21 |
22 | dict_file = "/home/ryan/nlp_ryan/Chatbot/Slack_Bot/markov-toji.json"
23 | dic = json.load(open(dict_file,"r"))
24 |
25 | import os
26 | import time
27 | from slackclient import SlackClient
28 | import random
29 |
30 | #Bot ID & Token
31 | #slack_client.api_call("api.test")
32 | BOT_NAME = 'modubot'
33 | BOT_ID = 'U5Z492W0J'
34 | slack_token = 'your token'
35 |
36 | #export BOT_NAME='modubot'
37 | #export slack_token='xoxb-203145098018-UFRw9AIzGDiZcuc4aSF1kFdl'
38 |
39 | # instantiate Slack & Twilio clients
40 | slack_client = SlackClient(slack_token)
41 |
42 | #Check if everything is alright
43 | is_ok = slack_client.api_call("users.list").get('ok')
44 |
45 | # find the id of our slack bot
46 | if(is_ok):
47 | for user in slack_client.api_call("users.list").get('members'):
48 | if user.get('name') == BOT_ID:
49 | print(user.get('id'))
50 |
51 | # how the bot is mentioned on slack
52 | def get_mention(user):
53 | return '<@{user}>'.format(user=user)
54 |
55 | slack_mention = get_mention(BOT_ID)
56 |
57 | #Start Chatbot
58 | SOCKET_DELAY = 1
59 |
60 | def is_private(event):
61 | """Checks if private slack channel"""
62 | return event.get('channel').startswith('D')
63 |
64 | def is_for_me(event):
65 | #chekc if not my own event
66 | type = event.get('type')
67 | if type and type == 'message' and not(event.get('user')==BOT_ID):
68 | #in case it is a private message
69 | if is_private(event):
70 | return True
71 | #in case it is not a private
72 | text = event.get('text')
73 | channel = event.get('channel')
74 | if slack_mention in text.strip().split():
75 | return True
76 |
77 | def post_message(message, channel):
78 | slack_client.api_call('chat.postMessage', channel=channel,
79 | text=message, as_user=True)
80 |
81 | import nltk
82 |
83 | def is_hi(message):
84 | tokens = [word.lower() for word in message.strip().split()]
85 | return any(g in tokens
86 | for g in ['안녕', '안녕하세요', '테스트'])
87 |
88 | def is_bye(message):
89 | tokens = [word.lower() for word in message.strip().split()]
90 | return any(g in tokens
91 | for g in ['bye', 'goodbye', 'revoir', 'adios', 'later', 'cya'])
92 |
93 | def say_hi(user_mention):
94 | """Say Hi to a user by formatting their mention"""
95 | response_template = random.choice([make_sentence(dic)])
96 | return response_template.format(mention=user_mention)
97 |
98 | def say_bye(user_mention):
99 | """Say Goodbye to a user"""
100 | response_template = random.choice(['see you later, alligator...',
101 | 'adios amigo',
102 | 'Bye {mention}!',
103 | 'Au revoir!'])
104 | return response_template.format(mention=user_mention)
105 |
106 |
107 | def handle_message(message, user, channel):
108 | if is_hi(message):
109 | user_mention = get_mention(user)
110 | post_message(message=say_hi(user_mention), channel=channel)
111 | elif is_bye(message):
112 | user_mention = get_mention(user)
113 | post_message(message=say_bye(user_mention), channel=channel)
114 |
115 | def run():
116 | if slack_client.rtm_connect():
117 | print('[.] modubot is ON...')
118 | while True:
119 | event_list = slack_client.rtm_read()
120 | if len(event_list) > 0:
121 | for event in event_list:
122 | print(event)
123 | if is_for_me(event):
124 | handle_message(message=event.get('text'), user=event.get('user'), channel=event.get('channel'))
125 | time.sleep(SOCKET_DELAY)
126 | else:
127 | print('[!] Connection to Slack failed.')
128 |
129 | if __name__=='__main__':
130 | run()
131 |
--------------------------------------------------------------------------------
/Chatbot/Slack_Bot/print_bot_id.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # -*- coding: utf-8 -*-
3 | """
4 | Created on Sun Jun 25 15:35:47 2017
5 |
6 | modu-deepnlp
7 | modubot
8 | @author: ryan
9 | https://hooks.slack.com/services/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
10 |
11 | """
12 |
13 | import os
14 | from slackclient import SlackClient
15 |
16 | token = 'your token'
17 | slack_client = SlackClient(token)
18 | #slack_client = SlackClient(os.environ.get('SLACK_BOT_TOKEN'))
19 | print(slack_client.api_call("api.test"))
20 | print(slack_client.api_call("api.test"))
21 |
22 | if __name__ == "__main__":
23 | api_call = slack_client.api_call("users.list")
24 | if api_call.get('ok'):
25 | # retrieve all users so we can find our bot
26 | users = api_call.get('members')
27 | for user in users:
28 | if 'name' in user and user.get('name') == BOT_NAME:
29 | print("Bot ID for '" + user['name'] + "' is " + user.get('id'))
30 | else:
31 | print("could not find bot user with the name " + BOT_NAME)
32 |
33 |
34 |
35 |
--------------------------------------------------------------------------------
/Chatbot/Slack_Bot/toji.model:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rainmaker712/nlp_ryan/108ce890659ed29d4a143e41e5546f613aa878ca/Chatbot/Slack_Bot/toji.model
--------------------------------------------------------------------------------
/Chatbot/__init__.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # -*- coding: utf-8 -*-
3 | """
4 | Created on Thu Aug 10 16:42:07 2017
5 |
6 | @author: naver
7 | """
8 |
9 |
--------------------------------------------------------------------------------
/Chatbot/__pycache__/helpers.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rainmaker712/nlp_ryan/108ce890659ed29d4a143e41e5546f613aa878ca/Chatbot/__pycache__/helpers.cpython-36.pyc
--------------------------------------------------------------------------------
/Chatbot/helpers.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # -*- coding: utf-8 -*-
3 | """
4 | Created on Thu Aug 10 16:38:16 2017
5 |
6 | @author: naver
7 | """
8 |
9 | import numpy as np
10 |
11 | def batch(inputs, max_sequence_length=None):
12 | """
13 | Args:
14 | inputs:
15 | list of sentences (integer lists)
16 | max_sequence_length:
17 | integer specifying how large should `max_time` dimension be.
18 | If None, maximum sequence length would be used
19 |
20 | Outputs:
21 | inputs_time_major:
22 | input sentences transformed into time-major matrix
23 | (shape [max_time, batch_size]) padded with 0s
24 | sequence_lengths:
25 | batch-sized list of integers specifying amount of active
26 | time steps in each input sequence
27 | """
28 |
29 | sequence_lengths = [len(seq) for seq in inputs]
30 | batch_size = len(inputs)
31 |
32 | if max_sequence_length is None:
33 | max_sequence_length = max(sequence_lengths)
34 |
35 | inputs_batch_major = np.zeros(shape=[batch_size, max_sequence_length], dtype=np.int32) # == PAD
36 |
37 | for i, seq in enumerate(inputs):
38 | for j, element in enumerate(seq):
39 | inputs_batch_major[i, j] = element
40 |
41 | # [batch_size, max_time] -> [max_time, batch_size]
42 | inputs_time_major = inputs_batch_major.swapaxes(0, 1)
43 |
44 | return inputs_time_major, sequence_lengths
45 |
46 |
47 | def random_sequences(length_from, length_to,
48 | vocab_lower, vocab_upper,
49 | batch_size):
50 | """ Generates batches of random integer sequences,
51 | sequence length in [length_from, length_to],
52 | vocabulary in [vocab_lower, vocab_upper]
53 | """
54 | if length_from > length_to:
55 | raise ValueError('length_from > length_to')
56 |
57 | def random_length():
58 | if length_from == length_to:
59 | return length_from
60 | return np.random.randint(length_from, length_to + 1)
61 |
62 | while True:
63 | yield [
64 | np.random.randint(low=vocab_lower,
65 | high=vocab_upper,
66 | size=random_length()).tolist()
67 | for _ in range(batch_size)
68 | ]
--------------------------------------------------------------------------------
/Chatbot/seq2seq.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 5,
6 | "metadata": {
7 | "collapsed": true
8 | },
9 | "outputs": [],
10 | "source": [
11 | "import numpy as np\n",
12 | "\n",
13 | "def batch(inputs, max_sequence_length=None):\n",
14 | " \"\"\"\n",
15 | " Args:\n",
16 | " inputs:\n",
17 | " list of sentences (integer lists)\n",
18 | " max_sequence_length:\n",
19 | " integer specifying how large should `max_time` dimension be.\n",
20 | " If None, maximum sequence length would be used\n",
21 | " \n",
22 | " Outputs:\n",
23 | " inputs_time_major:\n",
24 | " input sentences transformed into time-major matrix \n",
25 | " (shape [max_time, batch_size]) padded with 0s\n",
26 | " sequence_lengths:\n",
27 | " batch-sized list of integers specifying amount of active \n",
28 | " time steps in each input sequence\n",
29 | " \"\"\"\n",
30 | " \n",
31 | " sequence_lengths = [len(seq) for seq in inputs]\n",
32 | " batch_size = len(inputs)\n",
33 | " \n",
34 | " if max_sequence_length is None:\n",
35 | " max_sequence_length = max(sequence_lengths)\n",
36 | " \n",
37 | " inputs_batch_major = np.zeros(shape=[batch_size, max_sequence_length], dtype=np.int32) # == PAD\n",
38 | " \n",
39 | " for i, seq in enumerate(inputs):\n",
40 | " for j, element in enumerate(seq):\n",
41 | " inputs_batch_major[i, j] = element\n",
42 | "\n",
43 | " # [batch_size, max_time] -> [max_time, batch_size]\n",
44 | " inputs_time_major = inputs_batch_major.swapaxes(0, 1)\n",
45 | "\n",
46 | " return inputs_time_major, sequence_lengths\n",
47 | "\n",
48 | "\n",
49 | "def random_sequences(length_from, length_to,\n",
50 | " vocab_lower, vocab_upper,\n",
51 | " batch_size):\n",
52 | " \"\"\" Generates batches of random integer sequences,\n",
53 | " sequence length in [length_from, length_to],\n",
54 | " vocabulary in [vocab_lower, vocab_upper]\n",
55 | " \"\"\"\n",
56 | " if length_from > length_to:\n",
57 | " raise ValueError('length_from > length_to')\n",
58 | "\n",
59 | " def random_length():\n",
60 | " if length_from == length_to:\n",
61 | " return length_from\n",
62 | " return np.random.randint(length_from, length_to + 1)\n",
63 | " \n",
64 | " while True:\n",
65 | " yield [\n",
66 | " np.random.randint(low=vocab_lower,\n",
67 | " high=vocab_upper,\n",
68 | " size=random_length()).tolist()\n",
69 | " for _ in range(batch_size)\n",
70 | " ]"
71 | ]
72 | },
73 | {
74 | "cell_type": "code",
75 | "execution_count": 6,
76 | "metadata": {},
77 | "outputs": [],
78 | "source": [
79 | "x = [[5, 7, 8], [6, 3], [3], [1]]\n",
80 | "\n",
81 | "xt, xlen = batch(x)"
82 | ]
83 | }
84 | ],
85 | "metadata": {
86 | "kernelspec": {
87 | "display_name": "Python 3",
88 | "language": "python",
89 | "name": "python3"
90 | },
91 | "language_info": {
92 | "codemirror_mode": {
93 | "name": "ipython",
94 | "version": 3
95 | },
96 | "file_extension": ".py",
97 | "mimetype": "text/x-python",
98 | "name": "python",
99 | "nbconvert_exporter": "python",
100 | "pygments_lexer": "ipython3",
101 | "version": "3.6.1"
102 | }
103 | },
104 | "nbformat": 4,
105 | "nbformat_minor": 2
106 | }
107 |
--------------------------------------------------------------------------------
/Dataset/dataset.md:
--------------------------------------------------------------------------------
1 | Premade Datasets
2 | 1. http://research.microsoft.com/en-us/um/redmond/projects/mctest/index.html
3 | MCTest is a freely available set of 660 stories and associated questions intended for research on the machine comprehension of text.
4 | 2. http://www.gutenberg.org/wiki/Gutenberg:Offline_Catalogs
5 | Gutenberge has a lot of books
6 | 3. https://catalog.ldc.upenn.edu/LDC2006T13
7 | Web 1T 5-gram Version 1, contributed by Google Inc., contains English word n-grams and their observed frequency counts.
8 | The length of the n-grams ranges from unigrams (single words) to five-grams.
9 | This data is expected to be useful for statistical language modeling, e.g., for machine translation or speech recognition, etc.
10 | 4. http://www.iesl.cs.umass.edu/data
11 | A lot of datasets
12 | 5. http://webdatacommons.org/webtables/
13 | A subset of the HTML tables on the Web contains relational data which can be useful for various applications.
14 | The Web Data Commons project has extracted two large corpora of relational Web tables from the Common Crawl and offers them for public download.
15 | This page provides an overview of the corpora as well as their use cases.
16 | 6. http://statmt.org/ngrams/
17 | Unpruend Unpruned 5-gram counts and language models trained on 9 billion web pages -- Large amounts of raw data in many languages
18 | 7. https://en.wikipedia.org/wiki/Wikipedia:Database_download
19 | Wikipedia Database Download
20 | 8. https://aws.amazon.com/ko/datasets/google-books-ngrams/
21 | A data set containing Google Books n-gram corpora.
22 | 9. https://aws.amazon.com/ko/public-datasets/common-crawl/
23 | The Common Crawl corpus includes web crawl data collected over 8 years.
24 | Common Crawl offers the largest, most comprehensive, open repository of web crawl data on the cloud.
25 | 10. http://commoncrawl.org/the-data/tutorials/
26 | 착한 아이들 ㅋㅋ
27 | 11. https://wikireverse.org/data
28 | The full dataset of 36 million links can be downloaded as a torrent.
29 | The download is a tarball containing 4 tab-delimited files.
30 | The data is 1.1 GB when compressed and 5.4 GB when extracted.
31 | 12. https://www.cs.cornell.edu/~cristian/Cornell_Movie-Dialogs_Corpus.html
32 | This corpus contains a large metadata-rich collection of fictional conversations extracted from raw movie scripts.
33 | 13. https://www.uow.edu.au/~dlee/corpora.htm
34 | several dozens of english corpus
35 | 14. http://research.google.com/research-outreach.html#/research-outreach/research-datasets
36 | Google Datasets
37 | 15. http://www.cs.cornell.edu/home/llee/data/
38 | Collection of Cornell Datasets
39 | 16. https://github.com/rkadlec/ubuntu-ranking-dataset-creator
40 | Ubuntu Dialogue Datasets
41 | 17. http://ebiquity.umbc.edu/resource/html/id/351
42 | The UMBC webBase corpus (http://ebiq.org/r/351) is a dataset containing a collection of English paragraphs with over three billion words
43 | processed from the February 2007 crawl from the Stanford WebBase project (http://bit.ly/WebBase). Compressed, it is about 13GB in size.
44 |
45 |
46 |
47 | Movie Subtitles Datasets (BE AWARE OF COPYRIGHTS!!!)
48 |
49 | http://www.opensubtitles.org/en/search
50 | https://subscene.com/
51 | http://www.moviesubtitles.org/
52 | http://www.divxsubtitles.net/
53 | http://www.subs4free.com/
54 |
55 | https://videoconverter.iskysoft.com/video-tips/download-subtitles.html (15 Best Subtitle Software and Top 10 Subtitle Download Sites)
56 |
57 |
58 |
59 | Q&A Datasets
60 | https://www.researchgate.net/post/What_are_the_datasets_available_for_question_answering_system
61 | https://archive.org/details/stackexchange
62 | https://rajpurkar.github.io/SQuAD-explorer/
63 | https://www.quora.com/Datasets-How-can-I-get-corpus-of-a-question-answering-website-like-Quora-or-Yahoo-Answers-or-Stack-Overflow-for-analyzing-answer-quality
64 | http://jmcauley.ucsd.edu/data/amazon/qa/
65 |
66 |
67 | A lot of Datasets
68 | https://www.reddit.com/r/datasets/comments/3bxlg7/i_have_every_publicly_available_reddit_comment/
69 | https://github.com/caesar0301/awesome-public-datasets#natural-language
70 |
71 |
72 | Miscellaneous
73 | https://github.com/deepmind/rc-data
74 | http://u.cs.biu.ac.il/~koppel/BlogCorpus.htm
75 | http://wiki.dbpedia.org/Downloads2015-10
76 | https://aws.amazon.com/ko/datasets/google-books-ngrams/
--------------------------------------------------------------------------------
/HTML/code_academy.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | Ship To It - Company Packing List
5 |
6 |
7 |
8 |
9 |
10 |
11 | 
12 | - Action List
13 | - Profiles
14 | - Settings
15 |
16 |
17 | Search the table
18 |
19 |
20 |
21 |
22 | Company Name |
23 | Number of Items to Ship |
24 | Next Action |
25 |
26 |
27 |
28 |
29 | Adam's Greenworks |
30 | 14 |
31 | Package Items |
32 |
33 |
34 | Davie's Burgers |
35 | 2 |
36 | Send Invoice |
37 |
38 |
39 | Baker's Bike Shop |
40 | 3 |
41 | Send Invoice |
42 |
43 |
44 | Miss Sally's Southern |
45 | 4 |
46 | Ship |
47 |
48 |
49 | Summit Resort Rentals |
50 | 4 |
51 | Ship |
52 |
53 |
54 | Strike Fitness |
55 | 1 |
56 | Enter Order |
57 |
58 |
59 |
60 |
61 |
62 |
--------------------------------------------------------------------------------
/Keras_Basic/Keras_Cheat_Sheet_Python.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rainmaker712/nlp_ryan/108ce890659ed29d4a143e41e5546f613aa878ca/Keras_Basic/Keras_Cheat_Sheet_Python.pdf
--------------------------------------------------------------------------------
/Keras_Basic/Keras_basic_fin.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # -*- coding: utf-8 -*-
3 | """
4 | Created on Sun Apr 30 15:20:50 2017
5 |
6 | @author: ryan
7 |
8 | Most of infomation from DataCamp Keras Course
9 | https://www.datacamp.com/community/blog/new-course-deep-learning-in-python-first-keras-2-0-online-course#gs.8RUVmWM
10 |
11 | """
12 |
13 | # Import necessary modules
14 | #import keras
15 | from keras.layers import Dense
16 | from keras.models import Sequential
17 | from keras.datasets import boston_housing
18 | from keras.wrappers.scikit_learn import KerasRegressor
19 | from sklearn.model_selection import cross_val_score
20 | from sklearn.model_selection import KFold
21 | import numpy as np
22 |
23 | (x_train, y_train), (x_test, y_test) = boston_housing.load_data()
24 |
25 | print(x_train.shape, y_train.shape) #(404, 13) / (404,)
26 |
27 | # Save the number of columns in training set: n_cols
28 | n_cols = x_train.shape[1]
29 |
30 | #Define Model for boston data
31 |
32 | # Set up the model: model
33 | model = Sequential()
34 | model.add(Dense(13, activation='relu', input_shape=(n_cols,), kernel_initializer = 'normal'))
35 | # Add the output layer
36 | model.add(Dense(1, kernel_initializer='normal'))
37 | #Complile model 일반적으로 Adam을 추천 (CS231 강의에서도 잘 모르겠으면 Adam 사용 추천)
38 | model.compile(optimizer='adam', loss='mean_squared_error')
39 |
40 | # Verify that model contains information from compiling
41 | print("Loss function: " + model.loss)
42 |
43 | """
44 | 모델 학습 / 구조 확인 및 시각화
45 | """
46 | model.summary() #모델의 구조를 확인
47 | # Fit the model
48 | history = model.fit(x_train, y_train, epochs=100)
49 | # Test the model
50 | '''Predictions'''
51 | # Calculate predictions: predictions
52 | score = model.evaluate(x_test, y_test)
53 |
54 | # list all data in history
55 | print(history.history.keys())
56 |
57 | #Loss 시각화
58 | import matplotlib.pyplot as plt
59 |
60 | plt.plot(history.history['loss'])
61 | plt.title('model loss')
62 | plt.ylabel('loss')
63 | plt.xlabel('epoch')
64 | plt.legend(['train'], loc='upper left')
65 | plt.show()
66 |
67 |
68 |
--------------------------------------------------------------------------------
/Keras_Basic/Keras_classification_basic.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # -*- coding: utf-8 -*-
3 | """
4 | Created on Sun Apr 30 15:20:50 2017
5 |
6 | @author: ryan
7 |
8 | Most of infomation from DataCamp Keras Course
9 | https://www.datacamp.com/community/blog/new-course-deep-learning-in-python-first-keras-2-0-online-course#gs.8RUVmWM
10 |
11 | """
12 |
13 | # Import necessary modules
14 | import keras
15 | from keras.layers import Dense
16 | from keras.models import Sequential
17 | from keras.utils import to_categorical
18 |
19 | # Convert the target to categorical: target
20 | target = to_categorical(df.survived)
21 |
22 | model = Sequential()
23 | model.add(Dense(32, activation='relu', input_shape=(n_cols,)))
24 | model.add(Dense(2, activation='softmax'))
25 | # Compile the model
26 | model.compile(optimizer='sgd', loss='categorical_crossentropy', metrics=['accuracy'])
27 | # Fit the model
28 | model.fit(predictors, target)
29 |
30 | '''Predictions'''
31 | # Calculate predictions: predictions
32 | predictions = model.predict(pred_data)
33 |
34 | # Calculate predicted probability of survival: predicted_prob_true
35 | predicted_prob_true = predictions[:,1]
36 |
37 | # print predicted_prob_true
38 | print(predicted_prob_true)
39 |
40 | '''
41 | Save and Load
42 | '''
43 |
44 | from keras.models import load_model
45 | model.save('model_file.h5')
46 | my_model = load_model('my_model.h5')
47 |
48 |
49 |
50 |
51 |
--------------------------------------------------------------------------------
/Keras_Basic/Keras_fine_tuning_basic.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # -*- coding: utf-8 -*-
3 | """
4 | Created on Sun Apr 30 18:27:14 2017
5 |
6 | @author: ryan
7 | """
8 |
9 |
10 | """
11 | Model Optimization
12 |
13 | 1. loss options
14 | - mean_squared_error
15 | - mean_squared_lograithmic_error
16 | - mean_absolute_error
17 | - mean_ablsolute_percentage_error
18 | - binary_crossentropy
19 | - categorical_crossentropy
20 |
21 | 2. L1/L2 regeularization
22 |
23 | from keras import regularizers
24 | model.add(Dense(50, input_dim=100, activation="sigmoid", W_regularizer=regularizers.l2(0.01)))
25 |
26 | 3. Dropout -> 마지막에 가중치 p를 곱하여 스케일링
27 |
28 | model.add(Dropout(0.5))
29 | model.compile(optimizer=SGD(0.5), loss='categorical_crossentropy', metrics=["acc"])
30 |
31 | 4. Weight initialization
32 | model.add(Dense(100, input_dim=10, activation="sigmoid", "init"=uniform))
33 |
34 | 5. Softmax
35 |
36 | model.Sequential()
37 | model.add(Dense(15, input_dim=100, activation='sigmoid', init="global_uniform"))
38 | model.add(Dense(10, activation='softmax', init='global_uniform"))
39 | model.compile(optimizer=SGD(), loss='categorical_crossentropy', metrics=["accuracy"])
40 |
41 | """
42 |
43 | # Import the SGD optimizer
44 | from keras.optimizers import SGD
45 |
46 | # Create list of learning rates: lr_to_test
47 | lr_to_test = [.000001, 0.01, 1]
48 |
49 | # Loop over learning rates
50 | for lr in lr_to_test:
51 | print('\n\nTesting model with learning rate: %f\n'%lr )
52 |
53 | # Build new model to test, unaffected by previous models
54 | model = get_new_model()
55 |
56 | # Create SGD optimizer with specified learning rate: my_optimizer
57 | my_optimizer = SGD(lr=lr)
58 |
59 | # Compile the model
60 | model.compile(optimizer = my_optimizer, loss = 'categorical_crossentropy')
61 |
62 | # Fit the model
63 | model.fit(predictors, target)
64 |
65 |
66 |
67 |
68 |
69 |
70 |
71 | """
72 | Model validation
73 |
74 |
75 | model.fit(predictors, target, validation_split=0.3)
76 | Early Stopping
77 | stop traiing if validation is same (patient)
78 |
79 | Experimentation
80 | - Experiment with different architectures
81 | - More layers
82 | - Fewer layers
83 | - Layers with more nodes
84 | - Layers with fewer nodes
85 | - Creating a great model requires experimentation
86 |
87 | """"
88 | #Validation Set
89 | # Save the number of columns in predictors: n_cols
90 | n_cols = predictors.shape[1]
91 | input_shape = (n_cols,)
92 |
93 | # Specify the model
94 | model = Sequential()
95 | model.add(Dense(100, activation='relu', input_shape = input_shape))
96 | model.add(Dense(100, activation='relu'))
97 | model.add(Dense(2, activation='softmax'))
98 |
99 | # Compile the model
100 | model.compile(optimizer = 'adam', loss = 'categorical_crossentropy', metrics=['accuracy'])
101 |
102 | # Fit the model
103 | hist = model.fit(predictors, target, validation_split=0.3)
104 |
105 | """
106 | #Early Stopping
107 | """
108 |
109 | # Import EarlyStopping
110 | from keras.callbacks import EarlyStopping
111 |
112 | # Save the number of columns in predictors: n_cols
113 | n_cols = predictors.shape[1]
114 | input_shape = (n_cols,)
115 |
116 | # Specify the model
117 | model = Sequential()
118 | model.add(Dense(100, activation='relu', input_shape = input_shape))
119 | model.add(Dense(100, activation='relu'))
120 | model.add(Dense(2, activation='softmax'))
121 |
122 | # Compile the model
123 | model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
124 |
125 | # Define early_stopping_monitor
126 | early_stopping_monitor = EarlyStopping(patience = 2)
127 |
128 | # Fit the model
129 | model.fit(predictors, target, epochs=30, validation_split=0.3, callbacks = [early_stopping_monitor])
130 |
131 | """
132 | ##Experimenting with wider networks
133 |
134 | verbose=False / logging output, tell me everything
135 |
136 | """
137 | # Define early_stopping_monitor
138 | early_stopping_monitor = EarlyStopping(patience=2)
139 |
140 | # Create the new model: model_2
141 | model_2 = Sequential()
142 |
143 | # Add the first and second layers
144 | model_2.add(Dense(100, activation="relu", input_shape=input_shape))
145 | model_2.add(Dense(100, activation="relu"))
146 |
147 | # Add the output layer
148 | model_2.add(Dense(2, activation="softmax"))
149 |
150 | # Compile model_2
151 | model_2.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
152 |
153 | # Fit model_1
154 | model_1_training = model_1.fit(predictors, target, epochs=15, validation_split=0.2, callbacks=[early_stopping_monitor], verbose=False)
155 |
156 | # Fit model_2
157 | model_2_training = model_2.fit(predictors, target, epochs=15, validation_split=0.2, callbacks=[early_stopping_monitor], verbose=False)
158 |
159 | # Create the plot
160 | plt.plot(model_1_training.history['val_loss'], 'r', model_2_training.history['val_loss'], 'b')
161 | plt.xlabel('Epochs')
162 | plt.ylabel('Validation score')
163 | plt.show()
164 |
165 |
166 |
167 |
--------------------------------------------------------------------------------
/Keras_Basic/Keras_tutorial_text_generation.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": null,
6 | "metadata": {
7 | "collapsed": true
8 | },
9 | "outputs": [],
10 | "source": [
11 | "'''Example script to generate text from Nietzsche's writings.\n",
12 | "At least 20 epochs are required before the generated text\n",
13 | "starts sounding coherent.\n",
14 | "It is recommended to run this script on GPU, as recurrent\n",
15 | "networks are quite computationally intensive.\n",
16 | "If you try this script on new data, make sure your corpus\n",
17 | "has at least ~100k characters. ~1M is better.\n",
18 | "'''\n",
19 | "\n",
20 | "from __future__ import print_function\n",
21 | "from keras.models import Sequential\n",
22 | "from keras.layers import Dense, Activation\n",
23 | "from keras.layers import LSTM\n",
24 | "from keras.optimizers import RMSprop\n",
25 | "from keras.utils.data_utils import get_file\n",
26 | "import numpy as np\n",
27 | "import random\n",
28 | "import sys\n",
29 | "\n",
30 | "path = get_file('nietzsche.txt', origin='https://s3.amazonaws.com/text-datasets/nietzsche.txt')\n",
31 | "text = open(path).read().lower()\n",
32 | "print('corpus length:', len(text))\n",
33 | "\n",
34 | "chars = sorted(list(set(text)))\n",
35 | "print('total chars:', len(chars))\n",
36 | "char_indices = dict((c, i) for i, c in enumerate(chars))\n",
37 | "indices_char = dict((i, c) for i, c in enumerate(chars))\n",
38 | "\n",
39 | "# cut the text in semi-redundant sequences of maxlen characters\n",
40 | "maxlen = 40\n",
41 | "step = 3\n",
42 | "sentences = []\n",
43 | "next_chars = []\n",
44 | "for i in range(0, len(text) - maxlen, step):\n",
45 | " sentences.append(text[i: i + maxlen])\n",
46 | " next_chars.append(text[i + maxlen])\n",
47 | "print('nb sequences:', len(sentences))\n",
48 | "\n",
49 | "print('Vectorization...')\n",
50 | "X = np.zeros((len(sentences), maxlen, len(chars)), dtype=np.bool)\n",
51 | "y = np.zeros((len(sentences), len(chars)), dtype=np.bool)\n",
52 | "for i, sentence in enumerate(sentences):\n",
53 | " for t, char in enumerate(sentence):\n",
54 | " X[i, t, char_indices[char]] = 1\n",
55 | " y[i, char_indices[next_chars[i]]] = 1\n",
56 | "\n",
57 | "\n",
58 | "# build the model: a single LSTM\n",
59 | "print('Build model...')\n",
60 | "model = Sequential()\n",
61 | "model.add(LSTM(128, input_shape=(maxlen, len(chars))))\n",
62 | "model.add(Dense(len(chars)))\n",
63 | "model.add(Activation('softmax'))\n",
64 | "\n",
65 | "optimizer = RMSprop(lr=0.01)\n",
66 | "model.compile(loss='categorical_crossentropy', optimizer=optimizer)\n",
67 | "\n",
68 | "\n",
69 | "def sample(preds, temperature=1.0):\n",
70 | " # helper function to sample an index from a probability array\n",
71 | " preds = np.asarray(preds).astype('float64')\n",
72 | " preds = np.log(preds) / temperature\n",
73 | " exp_preds = np.exp(preds)\n",
74 | " preds = exp_preds / np.sum(exp_preds)\n",
75 | " probas = np.random.multinomial(1, preds, 1)\n",
76 | " return np.argmax(probas)\n",
77 | "\n",
78 | "# train the model, output generated text after each iteration\n",
79 | "for iteration in range(1, 60):\n",
80 | " print()\n",
81 | " print('-' * 50)\n",
82 | " print('Iteration', iteration)\n",
83 | " model.fit(X, y,\n",
84 | " batch_size=128,\n",
85 | " epochs=1)\n",
86 | "\n",
87 | " start_index = random.randint(0, len(text) - maxlen - 1)\n",
88 | "\n",
89 | " for diversity in [0.2, 0.5, 1.0, 1.2]:\n",
90 | " print()\n",
91 | " print('----- diversity:', diversity)\n",
92 | "\n",
93 | " generated = ''\n",
94 | " sentence = text[start_index: start_index + maxlen]\n",
95 | " generated += sentence\n",
96 | " print('----- Generating with seed: \"' + sentence + '\"')\n",
97 | " sys.stdout.write(generated)\n",
98 | "\n",
99 | " for i in range(400):\n",
100 | " x = np.zeros((1, maxlen, len(chars)))\n",
101 | " for t, char in enumerate(sentence):\n",
102 | " x[0, t, char_indices[char]] = 1.\n",
103 | "\n",
104 | " preds = model.predict(x, verbose=0)[0]\n",
105 | " next_index = sample(preds, diversity)\n",
106 | " next_char = indices_char[next_index]\n",
107 | "\n",
108 | " generated += next_char\n",
109 | " sentence = sentence[1:] + next_char\n",
110 | "\n",
111 | " sys.stdout.write(next_char)\n",
112 | " sys.stdout.flush()\n",
113 | " print()"
114 | ]
115 | }
116 | ],
117 | "metadata": {
118 | "anaconda-cloud": {},
119 | "kernelspec": {
120 | "display_name": "Python [conda root]",
121 | "language": "python",
122 | "name": "conda-root-py"
123 | },
124 | "language_info": {
125 | "codemirror_mode": {
126 | "name": "ipython",
127 | "version": 3
128 | },
129 | "file_extension": ".py",
130 | "mimetype": "text/x-python",
131 | "name": "python",
132 | "nbconvert_exporter": "python",
133 | "pygments_lexer": "ipython3",
134 | "version": "3.5.2"
135 | }
136 | },
137 | "nbformat": 4,
138 | "nbformat_minor": 1
139 | }
140 |
--------------------------------------------------------------------------------
/Machine_Comprehension/Attention_Keras/.Rhistory:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rainmaker712/nlp_ryan/108ce890659ed29d4a143e41e5546f613aa878ca/Machine_Comprehension/Attention_Keras/.Rhistory
--------------------------------------------------------------------------------
/Machine_Comprehension/Attention_Keras/CNNQA_weights.h5.7z:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rainmaker712/nlp_ryan/108ce890659ed29d4a143e41e5546f613aa878ca/Machine_Comprehension/Attention_Keras/CNNQA_weights.h5.7z
--------------------------------------------------------------------------------
/Machine_Comprehension/Attention_Keras/KerasQA.ods:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rainmaker712/nlp_ryan/108ce890659ed29d4a143e41e5546f613aa878ca/Machine_Comprehension/Attention_Keras/KerasQA.ods
--------------------------------------------------------------------------------
/Machine_Comprehension/Attention_Keras/embedding_data.h5:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rainmaker712/nlp_ryan/108ce890659ed29d4a143e41e5546f613aa878ca/Machine_Comprehension/Attention_Keras/embedding_data.h5
--------------------------------------------------------------------------------
/Machine_Comprehension/DMN_QA/DynamicMemoryNetwork.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rainmaker712/nlp_ryan/108ce890659ed29d4a143e41e5546f613aa878ca/Machine_Comprehension/DMN_QA/DynamicMemoryNetwork.pdf
--------------------------------------------------------------------------------
/Machine_Comprehension/DMN_QA/bAbi.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rainmaker712/nlp_ryan/108ce890659ed29d4a143e41e5546f613aa878ca/Machine_Comprehension/DMN_QA/bAbi.pdf
--------------------------------------------------------------------------------
/Machine_Comprehension/DMN_QA/dataset/babi_tasks_1-20_v1-2.tar.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rainmaker712/nlp_ryan/108ce890659ed29d4a143e41e5546f613aa878ca/Machine_Comprehension/DMN_QA/dataset/babi_tasks_1-20_v1-2.tar.gz
--------------------------------------------------------------------------------
/Machine_Comprehension/DMN_QA/image/algo_process1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rainmaker712/nlp_ryan/108ce890659ed29d4a143e41e5546f613aa878ca/Machine_Comprehension/DMN_QA/image/algo_process1.png
--------------------------------------------------------------------------------
/Machine_Comprehension/DMN_QA/image/algo_process2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rainmaker712/nlp_ryan/108ce890659ed29d4a143e41e5546f613aa878ca/Machine_Comprehension/DMN_QA/image/algo_process2.png
--------------------------------------------------------------------------------
/Machine_Comprehension/DMN_QA/image/algo_process3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rainmaker712/nlp_ryan/108ce890659ed29d4a143e41e5546f613aa878ca/Machine_Comprehension/DMN_QA/image/algo_process3.png
--------------------------------------------------------------------------------
/Machine_Comprehension/DMN_QA/image/babi_dataset.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rainmaker712/nlp_ryan/108ce890659ed29d4a143e41e5546f613aa878ca/Machine_Comprehension/DMN_QA/image/babi_dataset.png
--------------------------------------------------------------------------------
/Machine_Comprehension/Readme_MC.md:
--------------------------------------------------------------------------------
1 | # Machine Comprehension
2 |
3 | *Feedback: sungjin7127@gmail.com*
4 |
5 | ## Attention Keras
6 | * [Teaching Machines to Read and Comprehend (Paper) - DeepMind](https://arxiv.org/abs/1506.03340)
7 | * [Code to Generate](https://github.com/deepmind/rc-data)
8 | * [DeepMind Q&A Dataset (CNN, Daily Mail)](http://cs.nyu.edu/~kcho/DMQA/)
9 | * [by Keras](https://github.com/dandxy89/DeepLearning_MachineLearning/tree/master/Keras/Attention)
--------------------------------------------------------------------------------
/Math_Stat/.Rhistory:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rainmaker712/nlp_ryan/108ce890659ed29d4a143e41e5546f613aa878ca/Math_Stat/.Rhistory
--------------------------------------------------------------------------------
/Math_Stat/.ipynb_checkpoints/ML_Basic_Siraj Raval-checkpoint.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# Intro_to_Math_of_Intelligence"
8 | ]
9 | },
10 | {
11 | "cell_type": "code",
12 | "execution_count": 3,
13 | "metadata": {
14 | "collapsed": false
15 | },
16 | "outputs": [
17 | {
18 | "name": "stdout",
19 | "output_type": "stream",
20 | "text": [
21 | "Starting gradient descent at b = 0, m = 0, error = 5565.107834483211\n",
22 | "Running...\n",
23 | "After 10000 iterations b = 0.6078985997054931, m = 1.4675440436333027, error = 112.31533427075733\n"
24 | ]
25 | }
26 | ],
27 | "source": [
28 | "#The optimal values of m and b can be actually calculated with way less effort than doing a linear regression. \n",
29 | "#this is just to demonstrate gradient descent\n",
30 | "\n",
31 | "from numpy import *\n",
32 | "\n",
33 | "# y = mx + b\n",
34 | "# m is slope, b is y-intercept\n",
35 | "def compute_error_for_line_given_points(b, m, points):\n",
36 | " totalError = 0\n",
37 | " for i in range(0, len(points)):\n",
38 | " x = points[i, 0]\n",
39 | " y = points[i, 1]\n",
40 | " totalError += (y - (m * x + b)) ** 2\n",
41 | " return totalError / float(len(points))\n",
42 | "\n",
43 | "def step_gradient(b_current, m_current, points, learningRate):\n",
44 | " b_gradient = 0\n",
45 | " m_gradient = 0\n",
46 | " N = float(len(points))\n",
47 | " for i in range(0, len(points)):\n",
48 | " x = points[i, 0]\n",
49 | " y = points[i, 1]\n",
50 | " b_gradient += -(2/N) * (y - ((m_current * x) + b_current))\n",
51 | " m_gradient += -(2/N) * x * (y - ((m_current * x) + b_current))\n",
52 | " new_b = b_current - (learningRate * b_gradient)\n",
53 | " new_m = m_current - (learningRate * m_gradient)\n",
54 | " return [new_b, new_m]\n",
55 | "\n",
56 | "def gradient_descent_runner(points, starting_b, starting_m, learning_rate, num_iterations):\n",
57 | " b = starting_b\n",
58 | " m = starting_m\n",
59 | " for i in range(num_iterations):\n",
60 | " b, m = step_gradient(b, m, array(points), learning_rate)\n",
61 | " return [b, m]\n",
62 | "\n",
63 | "def run():\n",
64 | " points = genfromtxt(\"data.csv\", delimiter=\",\")\n",
65 | " learning_rate = 0.0001\n",
66 | " initial_b = 0 # initial y-intercept guess\n",
67 | " initial_m = 0 # initial slope guess\n",
68 | " num_iterations = 10000\n",
69 | " print(\"Starting gradient descent at b = {0}, m = {1}, error = {2}\".format(initial_b, initial_m, compute_error_for_line_given_points(initial_b, initial_m, points)))\n",
70 | " print(\"Running...\")\n",
71 | " [b, m] = gradient_descent_runner(points, initial_b, initial_m, learning_rate, num_iterations)\n",
72 | " print(\"After {0} iterations b = {1}, m = {2}, error = {3}\".format(num_iterations, b, m, compute_error_for_line_given_points(b, m, points)))\n",
73 | "\n",
74 | "if __name__ == '__main__':\n",
75 | " run()"
76 | ]
77 | }
78 | ],
79 | "metadata": {
80 | "anaconda-cloud": {},
81 | "kernelspec": {
82 | "display_name": "Python [conda root]",
83 | "language": "python",
84 | "name": "conda-root-py"
85 | },
86 | "language_info": {
87 | "codemirror_mode": {
88 | "name": "ipython",
89 | "version": 3
90 | },
91 | "file_extension": ".py",
92 | "mimetype": "text/x-python",
93 | "name": "python",
94 | "nbconvert_exporter": "python",
95 | "pygments_lexer": "ipython3",
96 | "version": "3.5.2"
97 | }
98 | },
99 | "nbformat": 4,
100 | "nbformat_minor": 1
101 | }
102 |
--------------------------------------------------------------------------------
/Math_Stat/ML_Basic_Siraj Raval.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# Intro_to_Math_of_Intelligence"
8 | ]
9 | },
10 | {
11 | "cell_type": "code",
12 | "execution_count": 3,
13 | "metadata": {
14 | "collapsed": false,
15 | "scrolled": true
16 | },
17 | "outputs": [
18 | {
19 | "name": "stdout",
20 | "output_type": "stream",
21 | "text": [
22 | "Starting gradient descent at b = 0, m = 0, error = 5565.107834483211\n",
23 | "Running...\n",
24 | "After 10000 iterations b = 0.6078985997054931, m = 1.4675440436333027, error = 112.31533427075733\n"
25 | ]
26 | }
27 | ],
28 | "source": [
29 | "#The optimal values of m and b can be actually calculated with way less effort than doing a linear regression. \n",
30 | "#this is just to demonstrate gradient descent\n",
31 | "\n",
32 | "from numpy import *\n",
33 | "\n",
34 | "# y = mx + b\n",
35 | "# m is slope, b is y-intercept\n",
36 | "def compute_error_for_line_given_points(b, m, points):\n",
37 | " totalError = 0\n",
38 | " for i in range(0, len(points)):\n",
39 | " x = points[i, 0]\n",
40 | " y = points[i, 1]\n",
41 | " totalError += (y - (m * x + b)) ** 2\n",
42 | " return totalError / float(len(points))\n",
43 | "\n",
44 | "def step_gradient(b_current, m_current, points, learningRate):\n",
45 | " b_gradient = 0\n",
46 | " m_gradient = 0\n",
47 | " N = float(len(points))\n",
48 | " for i in range(0, len(points)):\n",
49 | " x = points[i, 0]\n",
50 | " y = points[i, 1]\n",
51 | " b_gradient += -(2/N) * (y - ((m_current * x) + b_current))\n",
52 | " m_gradient += -(2/N) * x * (y - ((m_current * x) + b_current))\n",
53 | " new_b = b_current - (learningRate * b_gradient)\n",
54 | " new_m = m_current - (learningRate * m_gradient)\n",
55 | " return [new_b, new_m]\n",
56 | "\n",
57 | "def gradient_descent_runner(points, starting_b, starting_m, learning_rate, num_iterations):\n",
58 | " b = starting_b\n",
59 | " m = starting_m\n",
60 | " for i in range(num_iterations):\n",
61 | " b, m = step_gradient(b, m, array(points), learning_rate)\n",
62 | " return [b, m]\n",
63 | "\n",
64 | "def run():\n",
65 | " points = genfromtxt(\"data.csv\", delimiter=\",\")\n",
66 | " learning_rate = 0.0001\n",
67 | " initial_b = 0 # initial y-intercept guess\n",
68 | " initial_m = 0 # initial slope guess\n",
69 | " num_iterations = 10000\n",
70 | " print(\"Starting gradient descent at b = {0}, m = {1}, error = {2}\".format(initial_b, initial_m, compute_error_for_line_given_points(initial_b, initial_m, points)))\n",
71 | " print(\"Running...\")\n",
72 | " [b, m] = gradient_descent_runner(points, initial_b, initial_m, learning_rate, num_iterations)\n",
73 | " print(\"After {0} iterations b = {1}, m = {2}, error = {3}\".format(num_iterations, b, m, compute_error_for_line_given_points(b, m, points)))\n",
74 | "\n",
75 | "if __name__ == '__main__':\n",
76 | " run()"
77 | ]
78 | },
79 | {
80 | "cell_type": "code",
81 | "execution_count": null,
82 | "metadata": {
83 | "collapsed": true
84 | },
85 | "outputs": [],
86 | "source": []
87 | }
88 | ],
89 | "metadata": {
90 | "anaconda-cloud": {},
91 | "kernelspec": {
92 | "display_name": "Python [conda root]",
93 | "language": "python",
94 | "name": "conda-root-py"
95 | },
96 | "language_info": {
97 | "codemirror_mode": {
98 | "name": "ipython",
99 | "version": 3
100 | },
101 | "file_extension": ".py",
102 | "mimetype": "text/x-python",
103 | "name": "python",
104 | "nbconvert_exporter": "python",
105 | "pygments_lexer": "ipython3",
106 | "version": "3.5.2"
107 | }
108 | },
109 | "nbformat": 4,
110 | "nbformat_minor": 1
111 | }
112 |
--------------------------------------------------------------------------------
/Math_Stat/Readme.md:
--------------------------------------------------------------------------------
1 | # Math&Stat Basic
2 |
3 | # *To-Do*
4 |
5 | ## Math & Stat
6 |
7 | * [Fundamentals of Engineering Exam Review](https://www.coursera.org/learn/fe-exam/home/welcome) - Week2 & Week3 (Due July 2017)
8 |
9 | * [Data Science Math Skills](https://www.coursera.org/learn/datasciencemathskills/home/welcome) - (Due August 2017)
10 |
11 | * [Bayesian Statistics: From Concept to Data Analysis](https://www.coursera.org/learn/bayesian-statistics/home/welcome) - (Due 2017)
12 |
13 | ## Python Skills for Data (One Course per week)
14 |
15 | * Introduction to Data Visualization with Python
16 | * pandas Foundation
17 | * Maniplulating DataFrames with pandas
18 | * Merging DataFrames with pandas
19 | * Statistical Thinking in Python 1 & 2
20 | * Introduction to Databases in Python
21 | * Supervised & Unsupervised Learning in Python
22 | * Intermediate Python for Data Science
23 |
24 |
25 |
26 | # *참고자료*
27 |
28 | ## Data Preprocessing
29 | - [Chris ALBon: Python/R](https://ch/home/ryan/nlp_ryan/Readme.mdrisalbon.com/)
30 |
--------------------------------------------------------------------------------
/Math_Stat/data.csv:
--------------------------------------------------------------------------------
1 | 32.502345269453031,31.70700584656992
2 | 53.426804033275019,68.77759598163891
3 | 61.530358025636438,62.562382297945803
4 | 47.475639634786098,71.546632233567777
5 | 59.813207869512318,87.230925133687393
6 | 55.142188413943821,78.211518270799232
7 | 52.211796692214001,79.64197304980874
8 | 39.299566694317065,59.171489321869508
9 | 48.10504169176825,75.331242297063056
10 | 52.550014442733818,71.300879886850353
11 | 45.419730144973755,55.165677145959123
12 | 54.351634881228918,82.478846757497919
13 | 44.164049496773352,62.008923245725825
14 | 58.16847071685779,75.392870425994957
15 | 56.727208057096611,81.43619215887864
16 | 48.955888566093719,60.723602440673965
17 | 44.687196231480904,82.892503731453715
18 | 60.297326851333466,97.379896862166078
19 | 45.618643772955828,48.847153317355072
20 | 38.816817537445637,56.877213186268506
21 | 66.189816606752601,83.878564664602763
22 | 65.41605174513407,118.59121730252249
23 | 47.48120860786787,57.251819462268969
24 | 41.57564261748702,51.391744079832307
25 | 51.84518690563943,75.380651665312357
26 | 59.370822011089523,74.765564032151374
27 | 57.31000343834809,95.455052922574737
28 | 63.615561251453308,95.229366017555307
29 | 46.737619407976972,79.052406169565586
30 | 50.556760148547767,83.432071421323712
31 | 52.223996085553047,63.358790317497878
32 | 35.567830047746632,41.412885303700563
33 | 42.436476944055642,76.617341280074044
34 | 58.16454011019286,96.769566426108199
35 | 57.504447615341789,74.084130116602523
36 | 45.440530725319981,66.588144414228594
37 | 61.89622268029126,77.768482417793024
38 | 33.093831736163963,50.719588912312084
39 | 36.436009511386871,62.124570818071781
40 | 37.675654860850742,60.810246649902211
41 | 44.555608383275356,52.682983366387781
42 | 43.318282631865721,58.569824717692867
43 | 50.073145632289034,82.905981485070512
44 | 43.870612645218372,61.424709804339123
45 | 62.997480747553091,115.24415280079529
46 | 32.669043763467187,45.570588823376085
47 | 40.166899008703702,54.084054796223612
48 | 53.575077531673656,87.994452758110413
49 | 33.864214971778239,52.725494375900425
50 | 64.707138666121296,93.576118692658241
51 | 38.119824026822805,80.166275447370964
52 | 44.502538064645101,65.101711570560326
53 | 40.599538384552318,65.562301260400375
54 | 41.720676356341293,65.280886920822823
55 | 51.088634678336796,73.434641546324301
56 | 55.078095904923202,71.13972785861894
57 | 41.377726534895203,79.102829683549857
58 | 62.494697427269791,86.520538440347153
59 | 49.203887540826003,84.742697807826218
60 | 41.102685187349664,59.358850248624933
61 | 41.182016105169822,61.684037524833627
62 | 50.186389494880601,69.847604158249183
63 | 52.378446219236217,86.098291205774103
64 | 50.135485486286122,59.108839267699643
65 | 33.644706006191782,69.89968164362763
66 | 39.557901222906828,44.862490711164398
67 | 56.130388816875467,85.498067778840223
68 | 57.362052133238237,95.536686846467219
69 | 60.269214393997906,70.251934419771587
70 | 35.678093889410732,52.721734964774988
71 | 31.588116998132829,50.392670135079896
72 | 53.66093226167304,63.642398775657753
73 | 46.682228649471917,72.247251068662365
74 | 43.107820219102464,57.812512976181402
75 | 70.34607561504933,104.25710158543822
76 | 44.492855880854073,86.642020318822006
77 | 57.50453330326841,91.486778000110135
78 | 36.930076609191808,55.231660886212836
79 | 55.805733357942742,79.550436678507609
80 | 38.954769073377065,44.847124242467601
81 | 56.901214702247074,80.207523139682763
82 | 56.868900661384046,83.14274979204346
83 | 34.33312470421609,55.723489260543914
84 | 59.04974121466681,77.634182511677864
85 | 57.788223993230673,99.051414841748269
86 | 54.282328705967409,79.120646274680027
87 | 51.088719898979143,69.588897851118475
88 | 50.282836348230731,69.510503311494389
89 | 44.211741752090113,73.687564318317285
90 | 38.005488008060688,61.366904537240131
91 | 32.940479942618296,67.170655768995118
92 | 53.691639571070056,85.668203145001542
93 | 68.76573426962166,114.85387123391394
94 | 46.230966498310252,90.123572069967423
95 | 68.319360818255362,97.919821035242848
96 | 50.030174340312143,81.536990783015028
97 | 49.239765342753763,72.111832469615663
98 | 50.039575939875988,85.232007342325673
99 | 48.149858891028863,66.224957888054632
100 | 25.128484647772304,53.454394214850524
101 |
--------------------------------------------------------------------------------
/Natural Language Generation/lstm_keras_generation.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # -*- coding: utf-8 -*-
3 | """
4 | Created on Mon Jun 26 21:23:43 2017
5 |
6 | @author: ryan
7 | """
8 |
9 | '''Example script to generate text from Nietzsche's writings.
10 | At least 20 epochs are required before the generated text
11 | starts sounding coherent.
12 | It is recommended to run this script on GPU, as recurrent
13 | networks are quite computationally intensive.
14 | If you try this script on new data, make sure your corpus
15 | has at least ~100k characters. ~1M is better.
16 | '''
17 |
18 | from __future__ import print_function
19 | from keras.models import Sequential
20 | from keras.layers import Dense, Activation
21 | from keras.layers import LSTM
22 | from keras.optimizers import RMSprop
23 | from keras.utils.data_utils import get_file
24 | import numpy as np
25 | import random
26 | import sys
27 |
28 | path = get_file('nietzsche.txt', origin='https://s3.amazonaws.com/text-datasets/nietzsche.txt')
29 | text = open(path).read().lower()
30 | print('corpus length:', len(text))
31 |
32 | chars = sorted(list(set(text)))
33 | print('total chars:', len(chars))
34 | char_indices = dict((c, i) for i, c in enumerate(chars))
35 | indices_char = dict((i, c) for i, c in enumerate(chars))
36 |
37 | # cut the text in semi-redundant sequences of maxlen characters
38 | maxlen = 40
39 | step = 3
40 | sentences = []
41 | next_chars = []
42 | for i in range(0, len(text) - maxlen, step):
43 | sentences.append(text[i: i + maxlen])
44 | next_chars.append(text[i + maxlen])
45 | print('nb sequences:', len(sentences))
46 |
47 | print('Vectorization...')
48 | X = np.zeros((len(sentences), maxlen, len(chars)), dtype=np.bool)
49 | y = np.zeros((len(sentences), len(chars)), dtype=np.bool)
50 | for i, sentence in enumerate(sentences):
51 | for t, char in enumerate(sentence):
52 | X[i, t, char_indices[char]] = 1
53 | y[i, char_indices[next_chars[i]]] = 1
54 |
55 |
56 | # build the model: a single LSTM
57 | print('Build model...')
58 | model = Sequential()
59 | model.add(LSTM(128, input_shape=(maxlen, len(chars))))
60 | model.add(Dense(len(chars)))
61 | model.add(Activation('softmax'))
62 |
63 | optimizer = RMSprop(lr=0.01)
64 | model.compile(loss='categorical_crossentropy', optimizer=optimizer)
65 |
66 |
67 | def sample(preds, temperature=1.0):
68 | # helper function to sample an index from a probability array
69 | preds = np.asarray(preds).astype('float64')
70 | preds = np.log(preds) / temperature
71 | exp_preds = np.exp(preds)
72 | preds = exp_preds / np.sum(exp_preds)
73 | probas = np.random.multinomial(1, preds, 1)
74 | return np.argmax(probas)
75 |
76 | # train the model, output generated text after each iteration
77 | for iteration in range(1, 60):
78 | print()
79 | print('-' * 50)
80 | print('Iteration', iteration)
81 | model.fit(X, y,
82 | batch_size=128,
83 | epochs=1)
84 |
85 | start_index = random.randint(0, len(text) - maxlen - 1)
86 |
87 | for diversity in [0.2, 0.5, 1.0, 1.2]:
88 | print()
89 | print('----- diversity:', diversity)
90 |
91 | generated = ''
92 | sentence = text[start_index: start_index + maxlen]
93 | generated += sentence
94 | print('----- Generating with seed: "' + sentence + '"')
95 | sys.stdout.write(generated)
96 |
97 | for i in range(400):
98 | x = np.zeros((1, maxlen, len(chars)))
99 | for t, char in enumerate(sentence):
100 | x[0, t, char_indices[char]] = 1.
101 |
102 | preds = model.predict(x, verbose=0)[0]
103 | next_index = sample(preds, diversity)
104 | next_char = indices_char[next_index]
105 |
106 | generated += next_char
107 | sentence = sentence[1:] + next_char
108 |
109 | sys.stdout.write(next_char)
110 | sys.stdout.flush()
111 | print()
--------------------------------------------------------------------------------
/Python/.ipynb_checkpoints/Python_Review-checkpoint.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# 7. 함수 이해하기"
8 | ]
9 | },
10 | {
11 | "cell_type": "code",
12 | "execution_count": 1,
13 | "metadata": {
14 | "collapsed": true
15 | },
16 | "outputs": [],
17 | "source": [
18 | "def func(pa1, pa2):\n",
19 | " \"\"\"함수란 이런것이다.\n",
20 | " \n",
21 | " \"\"\"\n",
22 | " pa1, pa2 = pa2, pa1\n",
23 | " \n",
24 | " return pa1, pa2"
25 | ]
26 | },
27 | {
28 | "cell_type": "code",
29 | "execution_count": 2,
30 | "metadata": {
31 | "collapsed": false,
32 | "scrolled": true
33 | },
34 | "outputs": [
35 | {
36 | "name": "stdout",
37 | "output_type": "stream",
38 | "text": [
39 | "Help on function func in module __main__:\n",
40 | "\n",
41 | "func(pa1, pa2)\n",
42 | " 함수란 이런것이다.\n",
43 | "\n"
44 | ]
45 | }
46 | ],
47 | "source": [
48 | "help(func)"
49 | ]
50 | },
51 | {
52 | "cell_type": "markdown",
53 | "metadata": {
54 | "collapsed": true
55 | },
56 | "source": [
57 | "# 수정범위\n",
58 | "\n",
59 | "P6. 일급객체\n"
60 | ]
61 | },
62 | {
63 | "cell_type": "code",
64 | "execution_count": 3,
65 | "metadata": {
66 | "collapsed": false
67 | },
68 | "outputs": [
69 | {
70 | "name": "stdout",
71 | "output_type": "stream",
72 | "text": [
73 | "{'count': 0}\n",
74 | "call count 1\n",
75 | "20\n",
76 | "call count 2\n",
77 | "22\n",
78 | "{'count': 2}\n"
79 | ]
80 | }
81 | ],
82 | "source": [
83 | "def add(x,y):\n",
84 | " add.count += 1\n",
85 | " print(\"call count\" , add.count)\n",
86 | " return x+y\n",
87 | "\n",
88 | "add.count = 0\n",
89 | "print(add.__dict__)\n",
90 | "print(add(10,10))\n",
91 | "print(add(11,11))\n",
92 | "print(add.__dict__)"
93 | ]
94 | },
95 | {
96 | "cell_type": "code",
97 | "execution_count": 7,
98 | "metadata": {
99 | "collapsed": false
100 | },
101 | "outputs": [
102 | {
103 | "name": "stdout",
104 | "output_type": "stream",
105 | "text": [
106 | "\n",
107 | "\n"
108 | ]
109 | }
110 | ],
111 | "source": [
112 | "def add(x,y):\n",
113 | " return x+y\n",
114 | "\n",
115 | "print(globals()[\"add\"])\n",
116 | "print(add)"
117 | ]
118 | },
119 | {
120 | "cell_type": "code",
121 | "execution_count": 8,
122 | "metadata": {
123 | "collapsed": false
124 | },
125 | "outputs": [
126 | {
127 | "name": "stdout",
128 | "output_type": "stream",
129 | "text": [
130 | "10\n"
131 | ]
132 | }
133 | ],
134 | "source": [
135 | "def func(func, x, y):\n",
136 | " return func(x, y)\n",
137 | "\n",
138 | "print(func(add,5,5))"
139 | ]
140 | },
141 | {
142 | "cell_type": "code",
143 | "execution_count": 10,
144 | "metadata": {
145 | "collapsed": false
146 | },
147 | "outputs": [
148 | {
149 | "name": "stdout",
150 | "output_type": "stream",
151 | "text": [
152 | "dahl\n",
153 | "dahl\n"
154 | ]
155 | }
156 | ],
157 | "source": [
158 | "#함수에서 클레스 접근 예시\n",
159 | "class A:\n",
160 | " name = \"dahl\"\n",
161 | " \n",
162 | "def getName():\n",
163 | " return A.name\n",
164 | "\n",
165 | "print(getName())\n",
166 | "\n",
167 | "#함수에서 인스턴스 접근 예시\n",
168 | "#instance = 변수 in class\n",
169 | "class Person:\n",
170 | " def __init__(self,name):\n",
171 | " self.name = name\n",
172 | " \n",
173 | "def func(obj):\n",
174 | " return obj.name\n",
175 | "\n",
176 | "p = Person(\"dahl\")\n",
177 | "print(func(p))"
178 | ]
179 | },
180 | {
181 | "cell_type": "code",
182 | "execution_count": 13,
183 | "metadata": {
184 | "collapsed": false
185 | },
186 | "outputs": [
187 | {
188 | "name": "stdout",
189 | "output_type": "stream",
190 | "text": [
191 | "10\n",
192 | "20\n"
193 | ]
194 | }
195 | ],
196 | "source": [
197 | "#익명함수\n",
198 | "fn = lambda x : x\n",
199 | "print(fn(10))\n",
200 | "\n",
201 | "#익명 함수도 객체임\n",
202 | "\n",
203 | "#익명 함수에서 함수 적용\n",
204 | "fn1 = lambda x : add(x,x)\n",
205 | "print(fn1(10))\n",
206 | "\n",
207 | "#익명함수 내의 파라메터 초기값 처리\n",
208 | "x = 20\n",
209 | "lam = lambda x=x : list(x+n for n in range(3))"
210 | ]
211 | }
212 | ],
213 | "metadata": {
214 | "anaconda-cloud": {},
215 | "kernelspec": {
216 | "display_name": "Python [conda root]",
217 | "language": "python",
218 | "name": "conda-root-py"
219 | },
220 | "language_info": {
221 | "codemirror_mode": {
222 | "name": "ipython",
223 | "version": 3
224 | },
225 | "file_extension": ".py",
226 | "mimetype": "text/x-python",
227 | "name": "python",
228 | "nbconvert_exporter": "python",
229 | "pygments_lexer": "ipython3",
230 | "version": "3.5.2"
231 | }
232 | },
233 | "nbformat": 4,
234 | "nbformat_minor": 1
235 | }
236 |
--------------------------------------------------------------------------------
/Python/Cheat_Sheet.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # -*- coding: utf-8 -*-
3 | """
4 | Created on Sun Jun 11 19:26:51 2017
5 |
6 | @author: ryan
7 | """
8 |
9 | #-----------------Sklearn--------------------
10 | #1. Divide train and test data
11 | from sklearn.model_selection import train_test_split
12 | x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.33, random_state=42)
13 |
14 | #---------------Keras----------------
15 |
16 | #Create the plot
17 | import matplotlib.pyplot as plt
18 | plt.plot(model['acc'], 'r')
19 | plt.xlabel('Epochs')
20 | plt.ylabel('acc')
21 | plt.show()
22 |
23 | #Save Model
24 | from keras.models import load_model
25 | model.save('domain_classify.h5')
26 |
27 | #Load Model
28 | my_model = load_model('domain_classify.h5')
29 |
30 | #Use Model (Make sure input as same dim.)
31 | my_model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
32 | my_model.predict_classes(np.array(sent).shape)
33 |
34 |
35 |
--------------------------------------------------------------------------------
/Python/Decorator.py:
--------------------------------------------------------------------------------
1 | #11월 20일, Unit 44 데코레이터
2 | #https://dojang.io/mod/page/view.php?id=1131
3 |
4 | #데코레이터는 함수를 수정하지 않은 상태에서 추가 기능을 구현하기 위해 사용
5 |
6 | class Calc:
7 | @staticmethod
8 | def add(a,b):
9 | print(a,b)
10 |
11 | #함수의 시작과 끝을 출력하는 데코레이터
12 | def trace(func):
13 | def wrapper():
14 | print(func.__name__, '함수 시작')
15 | func()
16 | print(func.__name__, '함수 끝')
17 | return wrapper
18 |
19 | @trace
20 | def hello():
21 | print('hello')
22 |
23 | @trace
24 | def world():
25 | print('world')
26 |
27 | # trace_hello = trace(hello) #데코레이터에 호출할 함수 넣기
28 | # trace_hello() #반환된 함수를 호출
29 | # trace_world = trace(world)
30 | # trace_world()
31 |
32 | hello()
33 | world()
--------------------------------------------------------------------------------
/Python/Visualization/.ipynb_checkpoints/Bokeh-checkpoint.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [],
3 | "metadata": {},
4 | "nbformat": 4,
5 | "nbformat_minor": 2
6 | }
7 |
--------------------------------------------------------------------------------
/Python/Visualization/Bokeh.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": null,
6 | "metadata": {
7 | "collapsed": true
8 | },
9 | "outputs": [],
10 | "source": []
11 | }
12 | ],
13 | "metadata": {
14 | "kernelspec": {
15 | "display_name": "Python 3",
16 | "language": "python",
17 | "name": "python3"
18 | },
19 | "language_info": {
20 | "codemirror_mode": {
21 | "name": "ipython",
22 | "version": 3
23 | },
24 | "file_extension": ".py",
25 | "mimetype": "text/x-python",
26 | "name": "python",
27 | "nbconvert_exporter": "python",
28 | "pygments_lexer": "ipython3",
29 | "version": "3.6.1"
30 | }
31 | },
32 | "nbformat": 4,
33 | "nbformat_minor": 2
34 | }
35 |
--------------------------------------------------------------------------------
/Python/attribute.py:
--------------------------------------------------------------------------------
1 | class Person:
2 | def __init__(self):
3 | self.hello = '안녕하세요.'
4 |
5 | def greeting(self):
6 | print(self.hello)
7 |
8 | james = Person()
9 | james.greeting() # 안녕하세요.
--------------------------------------------------------------------------------
/Quora_insincere/.gitignore:
--------------------------------------------------------------------------------
1 | /2_NLP_Study
2 | .DS_Store
3 | .ipynb_checkpoints/
4 | data_in/
5 | sh/
6 | input/
7 |
8 | # Byte-compiled / optimized / DLL files
9 | __pycache__/
10 | *.py[cod]
11 | *$py.class
12 |
13 | # C extensions
14 | *.so
15 |
16 | # Distribution / packaging
17 | .Python
18 | build/
19 | develop-eggs/
20 | dist/
21 | downloads/
22 | eggs/
23 | .eggs/
24 | lib/
25 | lib64/
26 | parts/
27 | sdist/
28 | var/
29 | wheels/
30 | *.egg-info/
31 | .installed.cfg
32 | *.egg
33 | MANIFEST
34 |
35 | # PyInstaller
36 | # Usually these files are written by a python script from a template
37 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
38 | *.manifest
39 | *.spec
40 |
41 | # Installer logs
42 | pip-log.txt
43 | pip-delete-this-directory.txt
44 |
45 | # Unit test / coverage reports
46 | htmlcov/
47 | .tox/
48 | .coverage
49 | .coverage.*
50 | .cache
51 | nosetests.xml
52 | coverage.xml
53 | *.cover
54 | .hypothesis/
55 | .pytest_cache/
56 |
57 | # Translations
58 | *.mo
59 | *.pot
60 |
61 | # Django stuff:
62 | *.log
63 | local_settings.py
64 | db.sqlite3
65 |
66 | # Flask stuff:
67 | instance/
68 | .webassets-cache
69 |
70 | # Scrapy stuff:
71 | .scrapy
72 |
73 | # Sphinx documentation
74 | docs/_build/
75 |
76 | # PyBuilder
77 | target/
78 |
79 | # Jupyter Notebook
80 | .ipynb_checkpoints
81 |
82 | # pyenv
83 | .python-version
84 |
85 | # celery beat schedule file
86 | celerybeat-schedule
87 |
88 | # SageMath parsed files
89 | *.sage.py
90 |
91 | # Environments
92 | .env
93 | .venv
94 | env/
95 | venv/
96 | ENV/
97 | env.bak/
98 | venv.bak/
99 |
100 | # Spyder project settings
101 | .spyderproject
102 | .spyproject
103 |
104 | # Rope project settings
105 | .ropeproject
106 |
107 | # mkdocs documentation
108 | /site
109 |
110 | # mypy
111 | .mypy_cache/
112 |
113 | .vscode
114 | .ipynb_checkpoints
115 |
116 | *.voc
117 | checkPoint
118 | *.log
119 |
120 | OLD/
--------------------------------------------------------------------------------
/Quora_insincere/README.md:
--------------------------------------------------------------------------------
1 | Kaggle
2 |
3 | https://www.kaggle.com/c/quora-insincere-questions-classification/data
--------------------------------------------------------------------------------
/Tensorflow/.gitignore:
--------------------------------------------------------------------------------
1 | # Created by .ignore support plugin (hsz.mobi)
2 | ### JetBrains template
3 | # Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio and Webstorm
4 | # Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839
5 |
6 | # User-specific stuff:
7 | .idea/**/workspace.xml
8 | .idea/**/tasks.xml
9 | .idea/dictionaries
10 |
11 | # Sensitive or high-churn files:
12 | .idea/**/dataSources/
13 | .idea/**/dataSources.ids
14 | .idea/**/dataSources.xml
15 | .idea/**/dataSources.local.xml
16 | .idea/**/sqlDataSources.xml
17 | .idea/**/dynamic.xml
18 | .idea/**/uiDesigner.xml
19 |
20 | # Gradle:
21 | .idea/**/gradle.xml
22 | .idea/**/libraries
23 |
24 | # CMake
25 | cmake-build-debug/
26 |
27 | # Mongo Explorer plugin:
28 | .idea/**/mongoSettings.xml
29 |
30 | ## File-based project format:
31 | *.iws
32 |
33 | ## Plugin-specific files:
34 |
35 | # IntelliJ
36 | out/
37 |
38 | # mpeltonen/sbt-idea plugin
39 | .idea_modules/
40 |
41 | # JIRA plugin
42 | atlassian-ide-plugin.xml
43 |
44 | # Cursive Clojure plugin
45 | .idea/replstate.xml
46 |
47 | # Crashlytics plugin (for Android Studio and IntelliJ)
48 | com_crashlytics_export_strings.xml
49 | crashlytics.properties
50 | crashlytics-build.properties
51 | fabric.properties
52 | ### Python template
53 | # Byte-compiled / optimized / DLL files
54 | __pycache__/
55 | *.py[cod]
56 | *$py.class
57 |
58 | # C extensions
59 | *.so
60 |
61 | # Distribution / packaging
62 | .Python
63 | build/
64 | develop-eggs/
65 | dist/
66 | downloads/
67 | eggs/
68 | .eggs/
69 | lib/
70 | lib64/
71 | parts/
72 | sdist/
73 | var/
74 | wheels/
75 | *.egg-info/
76 | .installed.cfg
77 | *.egg
78 | MANIFEST
79 |
80 | # PyInstaller
81 | # Usually these files are written by a python script from a template
82 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
83 | *.manifest
84 | *.spec
85 |
86 | # Installer logs
87 | pip-log.txt
88 | pip-delete-this-directory.txt
89 |
90 | # Unit test / coverage reports
91 | htmlcov/
92 | .tox/
93 | .coverage
94 | .coverage.*
95 | .cache
96 | nosetests.xml
97 | coverage.xml
98 | *.cover
99 | .hypothesis/
100 |
101 | # Translations
102 | *.mo
103 | *.pot
104 |
105 | # Django stuff:
106 | *.log
107 | .static_storage/
108 | .media/
109 | local_settings.py
110 |
111 | # Flask stuff:
112 | instance/
113 | .webassets-cache
114 |
115 | # Scrapy stuff:
116 | .scrapy
117 |
118 | # Sphinx documentation
119 | docs/_build/
120 |
121 | # PyBuilder
122 | target/
123 |
124 | # Jupyter Notebook
125 | .ipynb_checkpoints
126 |
127 | # pyenv
128 | .python-version
129 |
130 | # celery beat schedule file
131 | celerybeat-schedule
132 |
133 | # SageMath parsed files
134 | *.sage.py
135 |
136 | # Environments
137 | .env
138 | .venv
139 | env/
140 | venv/
141 | ENV/
142 | env.bak/
143 | venv.bak/
144 |
145 | # Spyder project settings
146 | .spyderproject
147 | .spyproject
148 |
149 | # Rope project settings
150 | .ropeproject
151 |
152 | # mkdocs documentation
153 | /site
154 |
155 | # mypy
156 | .mypy_cache/
157 | ### macOS template
158 | # General
159 | .DS_Store
160 | .AppleDouble
161 | .LSOverride
162 |
163 | # Icon must end with two \r
164 | Icon
165 |
166 | # Thumbnails
167 | ._*
168 |
169 | # Files that might appear in the root of a volume
170 | .DocumentRevisions-V100
171 | .fseventsd
172 | .Spotlight-V100
173 | .TemporaryItems
174 | .Trashes
175 | .VolumeIcon.icns
176 | .com.apple.timemachine.donotpresent
177 |
178 | # Directories potentially created on remote AFP share
179 | .AppleDB
180 | .AppleDesktop
181 | Network Trash Folder
182 | Temporary Items
183 | .apdisk
184 |
185 | .idea/
186 | data_out/*
187 | checkpoint/
188 | logs/
189 | OLD/
190 | practice/
191 | scala_data_pre/
192 | target/
193 | .vscode/
194 | .ipynb_checkpoints/
195 | .DS_Store
196 | .DS_Store*
197 | my_test_model/
198 | result/
199 | sh/
--------------------------------------------------------------------------------
/Tensorflow/04_word2vec_eager.py:
--------------------------------------------------------------------------------
1 | """ starter code for word2vec skip-gram model with NCE loss
2 | Eager execution
3 | CS 20: "TensorFlow for Deep Learning Research"
4 | cs20.stanford.edu
5 | Chip Huyen (chiphuyen@cs.stanford.edu) & Akshay Agrawal (akshayka@cs.stanford.edu)
6 | Lecture 04
7 | """
8 |
9 | import os
10 | os.environ['TF_CPP_MIN_LOG_LEVEL']='2'
11 |
12 | import numpy as np
13 | import tensorflow as tf
14 | import tensorflow.contrib.eager as tfe
15 |
16 | import utils
17 | import word2vec_utils
18 |
19 | tfe.enable_eager_execution()
20 |
21 | # Model hyperparameters
22 | VOCAB_SIZE = 50000
23 | BATCH_SIZE = 128
24 | EMBED_SIZE = 128 # dimension of the word embedding vectors
25 | SKIP_WINDOW = 1 # the context window
26 | NUM_SAMPLED = 64 # number of negative examples to sample
27 | LEARNING_RATE = 1.0
28 | NUM_TRAIN_STEPS = 100000
29 | VISUAL_FLD = 'visualization'
30 | SKIP_STEP = 5000
31 |
32 | # Parameters for downloading data
33 | DOWNLOAD_URL = 'http://mattmahoney.net/dc/text8.zip'
34 | EXPECTED_BYTES = 31344016
35 |
36 | class Word2Vec(object):
37 | def __init__(self, vocab_size, embed_size, num_sampled=NUM_SAMPLED):
38 | self.vocab_size = vocab_size
39 | self.num_sampled = num_sampled
40 | self.embed_matrix = tfe.Variable(tf.random_uniform(
41 | [vocab_size, embed_size]))
42 | self.nce_weight = tfe.Variable(tf.truncated_normal(
43 | [vocab_size, embed_size],
44 | stddev=1.0 / (embed_size ** 0.5)))
45 | self.nce_bias = tfe.Variable(tf.zeros([vocab_size]))
46 |
47 | def compute_loss(self, center_words, target_words):
48 | """Computes the forward pass of word2vec with the NCE loss."""
49 | embed = tf.nn.embedding_lookup(self.embed_matrix, center_words)
50 | loss = tf.reduce_mean(tf.nn.nce_loss(weights=self.nce_weight,
51 | biases=self.nce_bias,
52 | labels=target_words,
53 | inputs=embed,
54 | num_sampled=self.num_sampled,
55 | num_classes=self.vocab_size))
56 | return loss
57 |
58 |
59 | def gen():
60 | yield from word2vec_utils.batch_gen(DOWNLOAD_URL, EXPECTED_BYTES,
61 | VOCAB_SIZE, BATCH_SIZE, SKIP_WINDOW,
62 | VISUAL_FLD)
63 |
64 | def main():
65 | dataset = tf.data.Dataset.from_generator(gen, (tf.int32, tf.int32),
66 | (tf.TensorShape([BATCH_SIZE]),
67 | tf.TensorShape([BATCH_SIZE, 1])))
68 | optimizer = tf.train.GradientDescentOptimizer(LEARNING_RATE)
69 | model = Word2Vec(vocab_size=VOCAB_SIZE, embed_size=EMBED_SIZE)
70 | grad_fn = tfe.implicit_value_and_gradients(model.compute_loss)
71 | total_loss = 0.0 # for average loss in the last SKIP_STEP steps
72 | num_train_steps = 0
73 | while num_train_steps < NUM_TRAIN_STEPS:
74 | for center_words, target_words in tfe.Iterator(dataset):
75 | if num_train_steps >= NUM_TRAIN_STEPS:
76 | break
77 | loss_batch, grads = grad_fn(center_words, target_words)
78 | total_loss += loss_batch
79 | optimizer.apply_gradients(grads)
80 | if (num_train_steps + 1) % SKIP_STEP == 0:
81 | print('Average loss at step {}: {:5.1f}'.format(
82 | num_train_steps, total_loss / SKIP_STEP))
83 | total_loss = 0.0
84 | num_train_steps += 1
85 |
86 |
87 | if __name__ == '__main__':
88 | main()
89 |
--------------------------------------------------------------------------------
/Tensorflow/TF_README.md:
--------------------------------------------------------------------------------
1 | # Deep Learning NLP Tutorial from Scratch
2 |
3 | *Feedback: sungjin7127@gmail.com*
4 |
5 | # *To-Do*
6 |
7 | ##Tensorflow Tutorial (1.4 or Later)
8 |
9 | * [Conversation-Tensorflow](https://github.com/DoungjunLee/conversation-tensorflow)
10 | * [Hvass Tensorflow Tutorial](https://github.com/Hvass-Labs/TensorFlow-Tutorials)
11 |
--------------------------------------------------------------------------------
/Tensorflow/standford_example/02_lazy_loading.py:
--------------------------------------------------------------------------------
1 | """ Example of lazy vs normal loading
2 | Created by Chip Huyen (chiphuyen@cs.stanford.edu)
3 | CS20: "TensorFlow for Deep Learning Research"
4 | cs20.stanford.edu
5 | Lecture 02
6 | """
7 | import os
8 | os.environ['TF_CPP_MIN_LOG_LEVEL']='2'
9 |
10 | import tensorflow as tf
11 |
12 | ########################################
13 | ## NORMAL LOADING ##
14 | ## print out a graph with 1 Add node ##
15 | ########################################
16 |
17 | x = tf.Variable(10, name='x')
18 | y = tf.Variable(20, name='y')
19 | z = tf.add(x, y)
20 |
21 | with tf.Session() as sess:
22 | sess.run(tf.global_variables_initializer())
23 | writer = tf.summary.FileWriter('graphs/normal_loading', sess.graph)
24 | for _ in range(10):
25 | sess.run(z)
26 | print(tf.get_default_graph().as_graph_def())
27 | writer.close()
28 |
29 | ########################################
30 | ## LAZY LOADING ##
31 | ## print out a graph with 10 Add nodes##
32 | ########################################
33 |
34 | x = tf.Variable(10, name='x')
35 | y = tf.Variable(20, name='y')
36 |
37 | with tf.Session() as sess:
38 | sess.run(tf.global_variables_initializer())
39 | writer = tf.summary.FileWriter('graphs/lazy_loading', sess.graph)
40 | for _ in range(10):
41 | sess.run(tf.add(x, y))
42 | print(tf.get_default_graph().as_graph_def())
43 | writer.close()
--------------------------------------------------------------------------------
/Tensorflow/standford_example/02_placeholder.py:
--------------------------------------------------------------------------------
1 | """ Placeholder and feed_dict example
2 | Created by Chip Huyen (chiphuyen@cs.stanford.edu)
3 | CS20: "TensorFlow for Deep Learning Research"
4 | cs20.stanford.edu
5 | Lecture 02
6 | """
7 | import os
8 | os.environ['TF_CPP_MIN_LOG_LEVEL']='2'
9 |
10 | import tensorflow as tf
11 |
12 | # Example 1: feed_dict with placeholder
13 |
14 | # a is a placeholderfor a vector of 3 elements, type tf.float32
15 | a = tf.placeholder(tf.float32, shape=[3])
16 | b = tf.constant([5, 5, 5], tf.float32)
17 |
18 | # use the placeholder as you would a constant
19 | c = a + b # short for tf.add(a, b)
20 |
21 | writer = tf.summary.FileWriter('graphs/placeholders', tf.get_default_graph())
22 | with tf.Session() as sess:
23 | # compute the value of c given the value of a is [1, 2, 3]
24 | print(sess.run(c, {a: [1, 2, 3]})) # [6. 7. 8.]
25 | writer.close()
26 |
27 |
28 | # Example 2: feed_dict with variables
29 | a = tf.add(2, 5)
30 | b = tf.multiply(a, 3)
31 |
32 | with tf.Session() as sess:
33 | print(sess.run(b)) # >> 21
34 | # compute the value of b given the value of a is 15
35 | print(sess.run(b, feed_dict={a: 15})) # >> 45
--------------------------------------------------------------------------------
/Tensorflow/standford_example/02_simple_tf.py:
--------------------------------------------------------------------------------
1 | """ Simple TensorFlow's ops
2 | Created by Chip Huyen (chiphuyen@cs.stanford.edu)
3 | CS20: "TensorFlow for Deep Learning Research"
4 | cs20.stanford.edu
5 | """
6 | import os
7 | os.environ['TF_CPP_MIN_LOG_LEVEL']='2'
8 |
9 | import numpy as np
10 | import tensorflow as tf
11 |
12 | # Example 1: Simple ways to create log file writer
13 | a = tf.constant(2, name='a')
14 | b = tf.constant(3, name='b')
15 | x = tf.add(a, b, name='add')
16 | writer = tf.summary.FileWriter('./graphs/simple', tf.get_default_graph())
17 | with tf.Session() as sess:
18 | # writer = tf.summary.FileWriter('./graphs', sess.graph)
19 | print(sess.run(x))
20 | writer.close() # close the writer when you’re done using it
21 |
22 | # Example 2: The wonderful wizard of div
23 | a = tf.constant([2, 2], name='a')
24 | b = tf.constant([[0, 1], [2, 3]], name='b')
25 |
26 | with tf.Session() as sess:
27 | print(sess.run(tf.div(b, a)))
28 | print(sess.run(tf.divide(b, a)))
29 | print(sess.run(tf.truediv(b, a)))
30 | print(sess.run(tf.floordiv(b, a)))
31 | # print(sess.run(tf.realdiv(b, a)))
32 | print(sess.run(tf.truncatediv(b, a)))
33 | print(sess.run(tf.floor_div(b, a)))
34 |
35 | # Example 3: multiplying tensors
36 | a = tf.constant([10, 20], name='a')
37 | b = tf.constant([2, 3], name='b')
38 |
39 | with tf.Session() as sess:
40 | print(sess.run(tf.multiply(a, b)))
41 | print(sess.run(tf.tensordot(a, b, 1)))
42 |
43 | # Example 4: Python native type
44 | t_0 = 19
45 | x = tf.zeros_like(t_0) # ==> 0
46 | y = tf.ones_like(t_0) # ==> 1
47 |
48 | t_1 = ['apple', 'peach', 'banana']
49 | x = tf.zeros_like(t_1) # ==> ['' '' '']
50 | # y = tf.ones_like(t_1) # ==> TypeError: Expected string, got 1 of type 'int' instead.
51 |
52 | t_2 = [[True, False, False],
53 | [False, False, True],
54 | [False, True, False]]
55 | x = tf.zeros_like(t_2) # ==> 3x3 tensor, all elements are False
56 | y = tf.ones_like(t_2) # ==> 3x3 tensor, all elements are True
57 |
58 | print(tf.int32.as_numpy_dtype())
59 |
60 | # Example 5: printing your graph's definition
61 | my_const = tf.constant([1.0, 2.0], name='my_const')
62 | print(tf.get_default_graph().as_graph_def())
--------------------------------------------------------------------------------
/Tensorflow/standford_example/02_variables.py:
--------------------------------------------------------------------------------
1 | """ Variable exmaples
2 | Created by Chip Huyen (chiphuyen@cs.stanford.edu)
3 | CS20: "TensorFlow for Deep Learning Research"
4 | cs20.stanford.edu
5 | Lecture 02
6 | """
7 | import os
8 | os.environ['TF_CPP_MIN_LOG_LEVEL']='2'
9 |
10 | import numpy as np
11 | import tensorflow as tf
12 |
13 | # Example 1: creating variables
14 | s = tf.Variable(2, name='scalar')
15 | m = tf.Variable([[0, 1], [2, 3]], name='matrix')
16 | W = tf.Variable(tf.zeros([784,10]), name='big_matrix')
17 | V = tf.Variable(tf.truncated_normal([784, 10]), name='normal_matrix')
18 |
19 | s = tf.get_variable('scalar', initializer=tf.constant(2))
20 | m = tf.get_variable('matrix', initializer=tf.constant([[0, 1], [2, 3]]))
21 | W = tf.get_variable('big_matrix', shape=(784, 10), initializer=tf.zeros_initializer())
22 | V = tf.get_variable('normal_matrix', shape=(784, 10), initializer=tf.truncated_normal_initializer())
23 |
24 | with tf.Session() as sess:
25 | sess.run(tf.global_variables_initializer())
26 | print(V.eval())
27 |
28 | # Example 2: assigning values to variables
29 | W = tf.Variable(10)
30 | W.assign(100)
31 | with tf.Session() as sess:
32 | sess.run(W.initializer)
33 | print(sess.run(W)) # >> 10
34 |
35 | W = tf.Variable(10)
36 | assign_op = W.assign(100)
37 | with tf.Session() as sess:
38 | sess.run(assign_op)
39 | print(W.eval()) # >> 100
40 |
41 | # create a variable whose original value is 2
42 | a = tf.get_variable('scalar', initializer=tf.constant(2))
43 | a_times_two = a.assign(a * 2)
44 | with tf.Session() as sess:
45 | sess.run(tf.global_variables_initializer())
46 | sess.run(a_times_two) # >> 4
47 | sess.run(a_times_two) # >> 8
48 | sess.run(a_times_two) # >> 16
49 |
50 | W = tf.Variable(10)
51 | with tf.Session() as sess:
52 | sess.run(W.initializer)
53 | print(sess.run(W.assign_add(10))) # >> 20
54 | print(sess.run(W.assign_sub(2))) # >> 18
55 |
56 | # Example 3: Each session has its own copy of variable
57 | W = tf.Variable(10)
58 | sess1 = tf.Session()
59 | sess2 = tf.Session()
60 | sess1.run(W.initializer)
61 | sess2.run(W.initializer)
62 | print(sess1.run(W.assign_add(10))) # >> 20
63 | print(sess2.run(W.assign_sub(2))) # >> 8
64 | print(sess1.run(W.assign_add(100))) # >> 120
65 | print(sess2.run(W.assign_sub(50))) # >> -42
66 | sess1.close()
67 | sess2.close()
68 |
69 | # Example 4: create a variable with the initial value depending on another variable
70 | W = tf.Variable(tf.truncated_normal([700, 10]))
71 | U = tf.Variable(W * 2)
--------------------------------------------------------------------------------
/Tensorflow/standford_example/03_linreg_dataset.py:
--------------------------------------------------------------------------------
1 | """ Solution for simple linear regression example using tf.data
2 | Created by Chip Huyen (chiphuyen@cs.stanford.edu)
3 | CS20: "TensorFlow for Deep Learning Research"
4 | cs20.stanford.edu
5 | Lecture 03
6 | """
7 | import os
8 | os.environ['TF_CPP_MIN_LOG_LEVEL']='2'
9 | import time
10 |
11 | import numpy as np
12 | import matplotlib.pyplot as plt
13 | import tensorflow as tf
14 |
15 | import utils
16 |
17 | DATA_FILE = 'data/birth_life_2010.txt'
18 |
19 | # Step 1: read in the data
20 | data, n_samples = utils.read_birth_life_data(DATA_FILE)
21 |
22 | # Step 2: create Dataset and iterator
23 | dataset = tf.data.Dataset.from_tensor_slices((data[:,0], data[:,1]))
24 |
25 | iterator = dataset.make_initializable_iterator()
26 | X, Y = iterator.get_next()
27 |
28 | # Step 3: create weight and bias, initialized to 0
29 | w = tf.get_variable('weights', initializer=tf.constant(0.0))
30 | b = tf.get_variable('bias', initializer=tf.constant(0.0))
31 |
32 | # Step 4: build model to predict Y
33 | Y_predicted = X * w + b
34 |
35 | # Step 5: use the square error as the loss function
36 | loss = tf.square(Y - Y_predicted, name='loss')
37 | # loss = utils.huber_loss(Y, Y_predicted)
38 |
39 | # Step 6: using gradient descent with learning rate of 0.001 to minimize loss
40 | optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.001).minimize(loss)
41 |
42 | start = time.time()
43 | with tf.Session() as sess:
44 | # Step 7: initialize the necessary variables, in this case, w and b
45 | sess.run(tf.global_variables_initializer())
46 | writer = tf.summary.FileWriter('./graphs/linear_reg', sess.graph)
47 |
48 | # Step 8: train the model for 100 epochs
49 | for i in range(100):
50 | sess.run(iterator.initializer) # initialize the iterator
51 | total_loss = 0
52 | try:
53 | while True:
54 | _, l = sess.run([optimizer, loss])
55 | total_loss += l
56 | except tf.errors.OutOfRangeError:
57 | pass
58 |
59 | print('Epoch {0}: {1}'.format(i, total_loss/n_samples))
60 |
61 | # close the writer when you're done using it
62 | writer.close()
63 |
64 | # Step 9: output the values of w and b
65 | w_out, b_out = sess.run([w, b])
66 | print('w: %f, b: %f' %(w_out, b_out))
67 | print('Took: %f seconds' %(time.time() - start))
68 |
69 | # plot the results
70 | plt.plot(data[:,0], data[:,1], 'bo', label='Real data')
71 | plt.plot(data[:,0], data[:,0] * w_out + b_out, 'r', label='Predicted data with squared error')
72 | # plt.plot(data[:,0], data[:,0] * (-5.883589) + 85.124306, 'g', label='Predicted data with Huber loss')
73 | plt.legend()
74 | plt.show()
--------------------------------------------------------------------------------
/Tensorflow/standford_example/03_linreg_placeholder.py:
--------------------------------------------------------------------------------
1 | """ Solution for simple linear regression example using placeholders
2 | Created by Chip Huyen (chiphuyen@cs.stanford.edu)
3 | CS20: "TensorFlow for Deep Learning Research"
4 | cs20.stanford.edu
5 | Lecture 03
6 | """
7 | import os
8 | os.environ['TF_CPP_MIN_LOG_LEVEL']='2'
9 | import time
10 |
11 | import numpy as np
12 | import matplotlib.pyplot as plt
13 | import tensorflow as tf
14 |
15 | import utils
16 |
17 | DATA_FILE = 'data/birth_life_2010.txt'
18 |
19 | # Step 1: read in data from the .txt file
20 | data, n_samples = utils.read_birth_life_data(DATA_FILE)
21 |
22 | # Step 2: create placeholders for X (birth rate) and Y (life expectancy)
23 | X = tf.placeholder(tf.float32, name='X')
24 | Y = tf.placeholder(tf.float32, name='Y')
25 |
26 | # Step 3: create weight and bias, initialized to 0
27 | w = tf.get_variable('weights', initializer=tf.constant(0.0))
28 | b = tf.get_variable('bias', initializer=tf.constant(0.0))
29 |
30 | # Step 4: build model to predict Y
31 | Y_predicted = w * X + b
32 |
33 | # Step 5: use the squared error as the loss function
34 | # you can use either mean squared error or Huber loss
35 | loss = tf.square(Y - Y_predicted, name='loss')
36 | # loss = utils.huber_loss(Y, Y_predicted)
37 |
38 | # Step 6: using gradient descent with learning rate of 0.001 to minimize loss
39 | optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.001).minimize(loss)
40 |
41 |
42 | start = time.time()
43 | writer = tf.summary.FileWriter('./graphs/linear_reg', tf.get_default_graph())
44 | with tf.Session() as sess:
45 | # Step 7: initialize the necessary variables, in this case, w and b
46 | sess.run(tf.global_variables_initializer())
47 |
48 | # Step 8: train the model for 100 epochs
49 | for i in range(100):
50 | total_loss = 0
51 | for x, y in data:
52 | # Session execute optimizer and fetch values of loss
53 | _, l = sess.run([optimizer, loss], feed_dict={X: x, Y:y})
54 | total_loss += l
55 | print('Epoch {0}: {1}'.format(i, total_loss/n_samples))
56 |
57 | # close the writer when you're done using it
58 | writer.close()
59 |
60 | # Step 9: output the values of w and b
61 | w_out, b_out = sess.run([w, b])
62 |
63 | print('Took: %f seconds' %(time.time() - start))
64 |
65 | # plot the results
66 | plt.plot(data[:,0], data[:,1], 'bo', label='Real data')
67 | plt.plot(data[:,0], data[:,0] * w_out + b_out, 'r', label='Predicted data')
68 | plt.legend()
69 | plt.show()
--------------------------------------------------------------------------------
/Tensorflow/standford_example/03_linreg_starter.py:
--------------------------------------------------------------------------------
1 | """ Starter code for simple linear regression example using placeholders
2 | Created by Chip Huyen (huyenn@cs.stanford.edu)
3 | CS20: "TensorFlow for Deep Learning Research"
4 | cs20.stanford.edu
5 | Lecture 03
6 | """
7 | import os
8 | os.environ['TF_CPP_MIN_LOG_LEVEL']='2'
9 | import time
10 |
11 | import numpy as np
12 | import matplotlib.pyplot as plt
13 | import tensorflow as tf
14 |
15 | import utils
16 |
17 | DATA_FILE = 'data/birth_life_2010.txt'
18 |
19 | # Step 1: read in data from the .txt file
20 | data, n_samples = utils.read_birth_life_data(DATA_FILE)
21 |
22 | # Step 2: create placeholders for X (birth rate) and Y (life expectancy)
23 | # Remember both X and Y are scalars with type float
24 | X, Y = None, None
25 | #############################
26 | ########## TO DO ############
27 | #############################
28 |
29 | # Step 3: create weight and bias, initialized to 0.0
30 | # Make sure to use tf.get_variable
31 | w, b = None, None
32 | #############################
33 | ########## TO DO ############
34 | #############################
35 |
36 | # Step 4: build model to predict Y
37 | # e.g. how would you derive at Y_predicted given X, w, and b
38 | Y_predicted = None
39 | #############################
40 | ########## TO DO ############
41 | #############################
42 |
43 | # Step 5: use the square error as the loss function
44 | loss = None
45 | #############################
46 | ########## TO DO ############
47 | #############################
48 |
49 | # Step 6: using gradient descent with learning rate of 0.001 to minimize loss
50 | optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.001).minimize(loss)
51 |
52 | start = time.time()
53 |
54 | # Create a filewriter to write the model's graph to TensorBoard
55 | #############################
56 | ########## TO DO ############
57 | #############################
58 |
59 | with tf.Session() as sess:
60 | # Step 7: initialize the necessary variables, in this case, w and b
61 | #############################
62 | ########## TO DO ############
63 | #############################
64 |
65 | # Step 8: train the model for 100 epochs
66 | for i in range(100):
67 | total_loss = 0
68 | for x, y in data:
69 | # Execute train_op and get the value of loss.
70 | # Don't forget to feed in data for placeholders
71 | _, loss = ########## TO DO ############
72 | total_loss += loss
73 |
74 | print('Epoch {0}: {1}'.format(i, total_loss/n_samples))
75 |
76 | # close the writer when you're done using it
77 | #############################
78 | ########## TO DO ############
79 | #############################
80 | writer.close()
81 |
82 | # Step 9: output the values of w and b
83 | w_out, b_out = None, None
84 | #############################
85 | ########## TO DO ############
86 | #############################
87 |
88 | print('Took: %f seconds' %(time.time() - start))
89 |
90 | # uncomment the following lines to see the plot
91 | # plt.plot(data[:,0], data[:,1], 'bo', label='Real data')
92 | # plt.plot(data[:,0], data[:,0] * w_out + b_out, 'r', label='Predicted data')
93 | # plt.legend()
94 | # plt.show()
--------------------------------------------------------------------------------
/Tensorflow/standford_example/03_logreg.py:
--------------------------------------------------------------------------------
1 | """ Solution for simple logistic regression model for MNIST
2 | with tf.data module
3 | MNIST dataset: yann.lecun.com/exdb/mnist/
4 | Created by Chip Huyen (chiphuyen@cs.stanford.edu)
5 | CS20: "TensorFlow for Deep Learning Research"
6 | cs20.stanford.edu
7 | Lecture 03
8 | """
9 | import os
10 | os.environ['TF_CPP_MIN_LOG_LEVEL']='2'
11 |
12 | import numpy as np
13 | import tensorflow as tf
14 | import time
15 |
16 | import utils
17 |
18 | # Define paramaters for the model
19 | learning_rate = 0.01
20 | batch_size = 128
21 | n_epochs = 30
22 | n_train = 60000
23 | n_test = 10000
24 |
25 | # Step 1: Read in data
26 | mnist_folder = 'data/mnist'
27 | utils.download_mnist(mnist_folder)
28 | train, val, test = utils.read_mnist(mnist_folder, flatten=True)
29 |
30 | # Step 2: Create datasets and iterator
31 | train_data = tf.data.Dataset.from_tensor_slices(train)
32 | train_data = train_data.shuffle(10000) # if you want to shuffle your data
33 | train_data = train_data.batch(batch_size)
34 |
35 | test_data = tf.data.Dataset.from_tensor_slices(test)
36 | test_data = test_data.batch(batch_size)
37 |
38 | iterator = tf.data.Iterator.from_structure(train_data.output_types,
39 | train_data.output_shapes)
40 | img, label = iterator.get_next()
41 |
42 | train_init = iterator.make_initializer(train_data) # initializer for train_data
43 | test_init = iterator.make_initializer(test_data) # initializer for train_data
44 |
45 | # Step 3: create weights and bias
46 | # w is initialized to random variables with mean of 0, stddev of 0.01
47 | # b is initialized to 0
48 | # shape of w depends on the dimension of X and Y so that Y = tf.matmul(X, w)
49 | # shape of b depends on Y
50 | w = tf.get_variable(name='weights', shape=(784, 10), initializer=tf.random_normal_initializer(0, 0.01))
51 | b = tf.get_variable(name='bias', shape=(1, 10), initializer=tf.zeros_initializer())
52 |
53 | # Step 4: build model
54 | # the model that returns the logits.
55 | # this logits will be later passed through softmax layer
56 | logits = tf.matmul(img, w) + b
57 |
58 | # Step 5: define loss function
59 | # use cross entropy of softmax of logits as the loss function
60 | entropy = tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=label, name='entropy')
61 | loss = tf.reduce_mean(entropy, name='loss') # computes the mean over all the examples in the batch
62 |
63 | # Step 6: define training op
64 | # using gradient descent with learning rate of 0.01 to minimize loss
65 | optimizer = tf.train.AdamOptimizer(learning_rate).minimize(loss)
66 |
67 | # Step 7: calculate accuracy with test set
68 | preds = tf.nn.softmax(logits)
69 | correct_preds = tf.equal(tf.argmax(preds, 1), tf.argmax(label, 1))
70 | accuracy = tf.reduce_sum(tf.cast(correct_preds, tf.float32))
71 |
72 | writer = tf.summary.FileWriter('./graphs/logreg', tf.get_default_graph())
73 | with tf.Session() as sess:
74 |
75 | start_time = time.time()
76 | sess.run(tf.global_variables_initializer())
77 |
78 | # train the model n_epochs times
79 | for i in range(n_epochs):
80 | sess.run(train_init) # drawing samples from train_data
81 | total_loss = 0
82 | n_batches = 0
83 | try:
84 | while True:
85 | _, l = sess.run([optimizer, loss])
86 | total_loss += l
87 | n_batches += 1
88 | except tf.errors.OutOfRangeError:
89 | pass
90 | print('Average loss epoch {0}: {1}'.format(i, total_loss/n_batches))
91 | print('Total time: {0} seconds'.format(time.time() - start_time))
92 |
93 | # test the model
94 | sess.run(test_init) # drawing samples from test_data
95 | total_correct_preds = 0
96 | try:
97 | while True:
98 | accuracy_batch = sess.run(accuracy)
99 | total_correct_preds += accuracy_batch
100 | except tf.errors.OutOfRangeError:
101 | pass
102 |
103 | print('Accuracy {0}'.format(total_correct_preds/n_test))
104 | writer.close()
105 |
--------------------------------------------------------------------------------
/Tensorflow/standford_example/03_logreg_placeholder.py:
--------------------------------------------------------------------------------
1 | """ Solution for simple logistic regression model for MNIST
2 | with placeholder
3 | MNIST dataset: yann.lecun.com/exdb/mnist/
4 | Created by Chip Huyen (huyenn@cs.stanford.edu)
5 | CS20: "TensorFlow for Deep Learning Research"
6 | cs20.stanford.edu
7 | Lecture 03
8 | """
9 | import os
10 | os.environ['TF_CPP_MIN_LOG_LEVEL']='2'
11 |
12 | import numpy as np
13 | import tensorflow as tf
14 | from tensorflow.examples.tutorials.mnist import input_data
15 | import time
16 |
17 | import utils
18 |
19 | # Define paramaters for the model
20 | learning_rate = 0.01
21 | batch_size = 128
22 | n_epochs = 30
23 |
24 | # Step 1: Read in data
25 | # using TF Learn's built in function to load MNIST data to the folder data/mnist
26 | mnist = input_data.read_data_sets('data/mnist', one_hot=True)
27 | X_batch, Y_batch = mnist.train.next_batch(batch_size)
28 |
29 | # Step 2: create placeholders for features and labels
30 | # each image in the MNIST data is of shape 28*28 = 784
31 | # therefore, each image is represented with a 1x784 tensor
32 | # there are 10 classes for each image, corresponding to digits 0 - 9.
33 | # each lable is one hot vector.
34 | X = tf.placeholder(tf.float32, [batch_size, 784], name='image')
35 | Y = tf.placeholder(tf.int32, [batch_size, 10], name='label')
36 |
37 | # Step 3: create weights and bias
38 | # w is initialized to random variables with mean of 0, stddev of 0.01
39 | # b is initialized to 0
40 | # shape of w depends on the dimension of X and Y so that Y = tf.matmul(X, w)
41 | # shape of b depends on Y
42 | w = tf.get_variable(name='weights', shape=(784, 10), initializer=tf.random_normal_initializer())
43 | b = tf.get_variable(name='bias', shape=(1, 10), initializer=tf.zeros_initializer())
44 |
45 | # Step 4: build model
46 | # the model that returns the logits.
47 | # this logits will be later passed through softmax layer
48 | logits = tf.matmul(X, w) + b
49 |
50 | # Step 5: define loss function
51 | # use cross entropy of softmax of logits as the loss function
52 | entropy = tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=Y, name='loss')
53 | loss = tf.reduce_mean(entropy) # computes the mean over all the examples in the batch
54 | # loss = tf.reduce_mean(-tf.reduce_sum(tf.nn.softmax(logits) * tf.log(Y), reduction_indices=[1]))
55 |
56 | # Step 6: define training op
57 | # using gradient descent with learning rate of 0.01 to minimize loss
58 | optimizer = tf.train.AdamOptimizer(learning_rate).minimize(loss)
59 |
60 | # Step 7: calculate accuracy with test set
61 | preds = tf.nn.softmax(logits)
62 | correct_preds = tf.equal(tf.argmax(preds, 1), tf.argmax(Y, 1))
63 | accuracy = tf.reduce_sum(tf.cast(correct_preds, tf.float32))
64 |
65 | writer = tf.summary.FileWriter('./graphs/logreg_placeholder', tf.get_default_graph())
66 | with tf.Session() as sess:
67 | start_time = time.time()
68 | sess.run(tf.global_variables_initializer())
69 | n_batches = int(mnist.train.num_examples/batch_size)
70 |
71 | # train the model n_epochs times
72 | for i in range(n_epochs):
73 | total_loss = 0
74 |
75 | for j in range(n_batches):
76 | X_batch, Y_batch = mnist.train.next_batch(batch_size)
77 | _, loss_batch = sess.run([optimizer, loss], {X: X_batch, Y:Y_batch})
78 | total_loss += loss_batch
79 | print('Average loss epoch {0}: {1}'.format(i, total_loss/n_batches))
80 | print('Total time: {0} seconds'.format(time.time() - start_time))
81 |
82 | # test the model
83 | n_batches = int(mnist.test.num_examples/batch_size)
84 | total_correct_preds = 0
85 |
86 | for i in range(n_batches):
87 | X_batch, Y_batch = mnist.test.next_batch(batch_size)
88 | accuracy_batch = sess.run(accuracy, {X: X_batch, Y:Y_batch})
89 | total_correct_preds += accuracy_batch
90 |
91 | print('Accuracy {0}'.format(total_correct_preds/mnist.test.num_examples))
92 |
93 | writer.close()
94 |
--------------------------------------------------------------------------------
/Tensorflow/standford_example/03_logreg_starter.py:
--------------------------------------------------------------------------------
1 | """ Starter code for simple logistic regression model for MNIST
2 | with tf.data module
3 | MNIST dataset: yann.lecun.com/exdb/mnist/
4 | Created by Chip Huyen (chiphuyen@cs.stanford.edu)
5 | CS20: "TensorFlow for Deep Learning Research"
6 | cs20.stanford.edu
7 | Lecture 03
8 | """
9 | import os
10 | os.environ['TF_CPP_MIN_LOG_LEVEL']='2'
11 |
12 | import numpy as np
13 | import tensorflow as tf
14 | import time
15 |
16 | import utils
17 |
18 | # Define paramaters for the model
19 | learning_rate = 0.01
20 | batch_size = 128
21 | n_epochs = 30
22 | n_train = 60000
23 | n_test = 10000
24 |
25 | # Step 1: Read in data
26 | mnist_folder = 'data/mnist'
27 | utils.download_mnist(mnist_folder)
28 | train, val, test = utils.read_mnist(mnist_folder, flatten=True)
29 |
30 | # Step 2: Create datasets and iterator
31 | # create training Dataset and batch it
32 | train_data = tf.data.Dataset.from_tensor_slices(train)
33 | train_data = train_data.shuffle(10000) # if you want to shuffle your data
34 | train_data = train_data.batch(batch_size)
35 |
36 | # create testing Dataset and batch it
37 | test_data = None
38 | #############################
39 | ########## TO DO ############
40 | #############################
41 |
42 |
43 | # create one iterator and initialize it with different datasets
44 | iterator = tf.data.Iterator.from_structure(train_data.output_types,
45 | train_data.output_shapes)
46 | img, label = iterator.get_next()
47 |
48 | train_init = iterator.make_initializer(train_data) # initializer for train_data
49 | test_init = iterator.make_initializer(test_data) # initializer for train_data
50 |
51 | # Step 3: create weights and bias
52 | # w is initialized to random variables with mean of 0, stddev of 0.01
53 | # b is initialized to 0
54 | # shape of w depends on the dimension of X and Y so that Y = tf.matmul(X, w)
55 | # shape of b depends on Y
56 | w, b = None, None
57 | #############################
58 | ########## TO DO ############
59 | #############################
60 |
61 |
62 | # Step 4: build model
63 | # the model that returns the logits.
64 | # this logits will be later passed through softmax layer
65 | logits = None
66 | #############################
67 | ########## TO DO ############
68 | #############################
69 |
70 |
71 | # Step 5: define loss function
72 | # use cross entropy of softmax of logits as the loss function
73 | loss = None
74 | #############################
75 | ########## TO DO ############
76 | #############################
77 |
78 |
79 | # Step 6: define optimizer
80 | # using Adamn Optimizer with pre-defined learning rate to minimize loss
81 | optimizer = None
82 | #############################
83 | ########## TO DO ############
84 | #############################
85 |
86 |
87 | # Step 7: calculate accuracy with test set
88 | preds = tf.nn.softmax(logits)
89 | correct_preds = tf.equal(tf.argmax(preds, 1), tf.argmax(label, 1))
90 | accuracy = tf.reduce_sum(tf.cast(correct_preds, tf.float32))
91 |
92 | writer = tf.summary.FileWriter('./graphs/logreg', tf.get_default_graph())
93 | with tf.Session() as sess:
94 |
95 | start_time = time.time()
96 | sess.run(tf.global_variables_initializer())
97 |
98 | # train the model n_epochs times
99 | for i in range(n_epochs):
100 | sess.run(train_init) # drawing samples from train_data
101 | total_loss = 0
102 | n_batches = 0
103 | try:
104 | while True:
105 | _, l = sess.run([optimizer, loss])
106 | total_loss += l
107 | n_batches += 1
108 | except tf.errors.OutOfRangeError:
109 | pass
110 | print('Average loss epoch {0}: {1}'.format(i, total_loss/n_batches))
111 | print('Total time: {0} seconds'.format(time.time() - start_time))
112 |
113 | # test the model
114 | sess.run(test_init) # drawing samples from test_data
115 | total_correct_preds = 0
116 | try:
117 | while True:
118 | accuracy_batch = sess.run(accuracy)
119 | total_correct_preds += accuracy_batch
120 | except tf.errors.OutOfRangeError:
121 | pass
122 |
123 | print('Accuracy {0}'.format(total_correct_preds/n_test))
124 | writer.close()
--------------------------------------------------------------------------------
/Tensorflow/standford_example/04_linreg_eager.py:
--------------------------------------------------------------------------------
1 | """ Starter code for a simple regression example using eager execution.
2 | Created by Akshay Agrawal (akshayka@cs.stanford.edu)
3 | CS20: "TensorFlow for Deep Learning Research"
4 | cs20.stanford.edu
5 | Lecture 04
6 | """
7 | import time
8 |
9 | import tensorflow as tf
10 | import tensorflow.contrib.eager as tfe
11 | import matplotlib.pyplot as plt
12 |
13 | import utils
14 |
15 | DATA_FILE = 'data/birth_life_2010.txt'
16 |
17 | # In order to use eager execution, `tfe.enable_eager_execution()` must be
18 | # called at the very beginning of a TensorFlow program.
19 | tfe.enable_eager_execution()
20 |
21 | # Read the data into a dataset.
22 | data, n_samples = utils.read_birth_life_data(DATA_FILE)
23 | dataset = tf.data.Dataset.from_tensor_slices((data[:,0], data[:,1]))
24 |
25 | # Create variables.
26 | w = tfe.Variable(0.0)
27 | b = tfe.Variable(0.0)
28 |
29 | # Define the linear predictor.
30 | def prediction(x):
31 | return x * w + b
32 |
33 | # Define loss functions of the form: L(y, y_predicted)
34 | def squared_loss(y, y_predicted):
35 | return (y - y_predicted) ** 2
36 |
37 | def huber_loss(y, y_predicted, m=1.0):
38 | """Huber loss."""
39 | t = y - y_predicted
40 | # Note that enabling eager execution lets you use Python control flow and
41 | # specificy dynamic TensorFlow computations. Contrast this implementation
42 | # to the graph-construction one found in `utils`, which uses `tf.cond`.
43 | return t ** 2 if tf.abs(t) <= m else m * (2 * tf.abs(t) - m)
44 |
45 | def train(loss_fn):
46 | """Train a regression model evaluated using `loss_fn`."""
47 | print('Training; loss function: ' + loss_fn.__name__)
48 | optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.01)
49 |
50 | # Define the function through which to differentiate.
51 | def loss_for_example(x, y):
52 | return loss_fn(y, prediction(x))
53 |
54 | # `grad_fn(x_i, y_i)` returns (1) the value of `loss_for_example`
55 | # evaluated at `x_i`, `y_i` and (2) the gradients of any variables used in
56 | # calculating it.
57 | grad_fn = tfe.implicit_value_and_gradients(loss_for_example)
58 |
59 | start = time.time()
60 | for epoch in range(100):
61 | total_loss = 0.0
62 | for x_i, y_i in tfe.Iterator(dataset):
63 | loss, gradients = grad_fn(x_i, y_i)
64 | # Take an optimization step and update variables.
65 | optimizer.apply_gradients(gradients)
66 | total_loss += loss
67 | if epoch % 10 == 0:
68 | print('Epoch {0}: {1}'.format(epoch, total_loss / n_samples))
69 | print('Took: %f seconds' % (time.time() - start))
70 | print('Eager execution exhibits significant overhead per operation. '
71 | 'As you increase your batch size, the impact of the overhead will '
72 | 'become less noticeable. Eager execution is under active development: '
73 | 'expect performance to increase substantially in the near future!')
74 |
75 | train(huber_loss)
76 | plt.plot(data[:,0], data[:,1], 'bo')
77 | # The `.numpy()` method of a tensor retrieves the NumPy array backing it.
78 | # In future versions of eager, you won't need to call `.numpy()` and will
79 | # instead be able to, in most cases, pass Tensors wherever NumPy arrays are
80 | # expected.
81 | plt.plot(data[:,0], data[:,0] * w.numpy() + b.numpy(), 'r',
82 | label="huber regression")
83 | plt.legend()
84 | plt.show()
85 |
--------------------------------------------------------------------------------
/Tensorflow/standford_example/04_linreg_eager_starter.py:
--------------------------------------------------------------------------------
1 | """ Starter code for a simple regression example using eager execution.
2 | Created by Akshay Agrawal (akshayka@cs.stanford.edu)
3 | CS20: "TensorFlow for Deep Learning Research"
4 | cs20.stanford.edu
5 | Lecture 04
6 | """
7 | import time
8 |
9 | import tensorflow as tf
10 | import tensorflow.contrib.eager as tfe
11 | import matplotlib.pyplot as plt
12 |
13 | import utils
14 |
15 | DATA_FILE = 'data/birth_life_2010.txt'
16 |
17 | # In order to use eager execution, `tfe.enable_eager_execution()` must be
18 | # called at the very beginning of a TensorFlow program.
19 | #############################
20 | ########## TO DO ############
21 | #############################
22 |
23 | # Read the data into a dataset.
24 | data, n_samples = utils.read_birth_life_data(DATA_FILE)
25 | dataset = tf.data.Dataset.from_tensor_slices((data[:,0], data[:,1]))
26 |
27 | # Create weight and bias variables, initialized to 0.0.
28 | #############################
29 | ########## TO DO ############
30 | #############################
31 | w = None
32 | b = None
33 |
34 | # Define the linear predictor.
35 | def prediction(x):
36 | #############################
37 | ########## TO DO ############
38 | #############################
39 | pass
40 |
41 | # Define loss functions of the form: L(y, y_predicted)
42 | def squared_loss(y, y_predicted):
43 | #############################
44 | ########## TO DO ############
45 | #############################
46 | pass
47 |
48 | def huber_loss(y, y_predicted):
49 | """Huber loss with `m` set to `1.0`."""
50 | #############################
51 | ########## TO DO ############
52 | #############################
53 | pass
54 |
55 | def train(loss_fn):
56 | """Train a regression model evaluated using `loss_fn`."""
57 | print('Training; loss function: ' + loss_fn.__name__)
58 | optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.01)
59 |
60 | # Define the function through which to differentiate.
61 | #############################
62 | ########## TO DO ############
63 | #############################
64 | def loss_for_example(x, y):
65 | pass
66 |
67 | # Obtain a gradients function using `tfe.implicit_value_and_gradients`.
68 | #############################
69 | ########## TO DO ############
70 | #############################
71 | grad_fn = None
72 |
73 | start = time.time()
74 | for epoch in range(100):
75 | total_loss = 0.0
76 | for x_i, y_i in tfe.Iterator(dataset):
77 | # Compute the loss and gradient, and take an optimization step.
78 | #############################
79 | ########## TO DO ############
80 | #############################
81 | optimizer.apply_gradients(gradients)
82 | total_loss += loss
83 | if epoch % 10 == 0:
84 | print('Epoch {0}: {1}'.format(epoch, total_loss / n_samples))
85 | print('Took: %f seconds' % (time.time() - start))
86 | print('Eager execution exhibits significant overhead per operation. '
87 | 'As you increase your batch size, the impact of the overhead will '
88 | 'become less noticeable. Eager execution is under active development: '
89 | 'expect performance to increase substantially in the near future!')
90 |
91 | train(huber_loss)
92 | plt.plot(data[:,0], data[:,1], 'bo')
93 | # The `.numpy()` method of a tensor retrieves the NumPy array backing it.
94 | # In future versions of eager, you won't need to call `.numpy()` and will
95 | # instead be able to, in most cases, pass Tensors wherever NumPy arrays are
96 | # expected.
97 | plt.plot(data[:,0], data[:,0] * w.numpy() + b.numpy(), 'r',
98 | label="huber regression")
99 | plt.legend()
100 | plt.show()
101 |
--------------------------------------------------------------------------------
/Tensorflow/standford_example/04_word2vec.py:
--------------------------------------------------------------------------------
1 | """ starter code for word2vec skip-gram model with NCE loss
2 | CS 20: "TensorFlow for Deep Learning Research"
3 | cs20.stanford.edu
4 | Chip Huyen (chiphuyen@cs.stanford.edu)
5 | Lecture 04
6 | """
7 |
8 | import os
9 | os.environ['TF_CPP_MIN_LOG_LEVEL']='2'
10 |
11 | import numpy as np
12 | from tensorflow.contrib.tensorboard.plugins import projector
13 | import tensorflow as tf
14 |
15 | import utils
16 | import word2vec_utils
17 |
18 | # Model hyperparameters
19 | VOCAB_SIZE = 50000
20 | BATCH_SIZE = 128
21 | EMBED_SIZE = 128 # dimension of the word embedding vectors
22 | SKIP_WINDOW = 1 # the context window
23 | NUM_SAMPLED = 64 # number of negative examples to sample
24 | LEARNING_RATE = 1.0
25 | NUM_TRAIN_STEPS = 100000
26 | VISUAL_FLD = 'visualization'
27 | SKIP_STEP = 5000
28 |
29 | # Parameters for downloading data
30 | DOWNLOAD_URL = 'http://mattmahoney.net/dc/text8.zip'
31 | EXPECTED_BYTES = 31344016
32 | NUM_VISUALIZE = 3000 # number of tokens to visualize
33 |
34 |
35 | def word2vec(dataset):
36 | """ Build the graph for word2vec model and train it """
37 | # Step 1: get input, output from the dataset
38 | with tf.name_scope('data'):
39 | iterator = dataset.make_initializable_iterator()
40 | center_words, target_words = iterator.get_next()
41 |
42 | """ Step 2 + 3: define weights and embedding lookup.
43 | In word2vec, it's actually the weights that we care about
44 | """
45 | with tf.name_scope('embed'):
46 | embed_matrix = tf.get_variable('embed_matrix',
47 | shape=[VOCAB_SIZE, EMBED_SIZE],
48 | initializer=tf.random_uniform_initializer())
49 | embed = tf.nn.embedding_lookup(embed_matrix, center_words, name='embedding')
50 |
51 | # Step 4: construct variables for NCE loss and define loss function
52 | with tf.name_scope('loss'):
53 | nce_weight = tf.get_variable('nce_weight', shape=[VOCAB_SIZE, EMBED_SIZE],
54 | initializer=tf.truncated_normal_initializer(stddev=1.0 / (EMBED_SIZE ** 0.5)))
55 | nce_bias = tf.get_variable('nce_bias', initializer=tf.zeros([VOCAB_SIZE]))
56 |
57 | # define loss function to be NCE loss function
58 | loss = tf.reduce_mean(tf.nn.nce_loss(weights=nce_weight,
59 | biases=nce_bias,
60 | labels=target_words,
61 | inputs=embed,
62 | num_sampled=NUM_SAMPLED,
63 | num_classes=VOCAB_SIZE), name='loss')
64 |
65 | # Step 5: define optimizer
66 | with tf.name_scope('optimizer'):
67 | optimizer = tf.train.GradientDescentOptimizer(LEARNING_RATE).minimize(loss)
68 |
69 | utils.safe_mkdir('checkpoints')
70 |
71 | with tf.Session() as sess:
72 | sess.run(iterator.initializer)
73 | sess.run(tf.global_variables_initializer())
74 |
75 | total_loss = 0.0 # we use this to calculate late average loss in the last SKIP_STEP steps
76 | writer = tf.summary.FileWriter('graphs/word2vec_simple', sess.graph)
77 |
78 | for index in range(NUM_TRAIN_STEPS):
79 | try:
80 | loss_batch, _ = sess.run([loss, optimizer])
81 | total_loss += loss_batch
82 | if (index + 1) % SKIP_STEP == 0:
83 | print('Average loss at step {}: {:5.1f}'.format(index, total_loss / SKIP_STEP))
84 | total_loss = 0.0
85 | except tf.errors.OutOfRangeError:
86 | sess.run(iterator.initializer)
87 | writer.close()
88 |
89 | def gen():
90 | yield from word2vec_utils.batch_gen(DOWNLOAD_URL, EXPECTED_BYTES, VOCAB_SIZE,
91 | BATCH_SIZE, SKIP_WINDOW, VISUAL_FLD)
92 |
93 | def main():
94 | dataset = tf.data.Dataset.from_generator(gen,
95 | (tf.int32, tf.int32),
96 | (tf.TensorShape([BATCH_SIZE]), tf.TensorShape([BATCH_SIZE, 1])))
97 | word2vec(dataset)
98 |
99 | if __name__ == '__main__':
100 | main()
101 |
--------------------------------------------------------------------------------
/Tensorflow/standford_example/04_word2vec_eager.py:
--------------------------------------------------------------------------------
1 | """ starter code for word2vec skip-gram model with NCE loss
2 | Eager execution
3 | CS 20: "TensorFlow for Deep Learning Research"
4 | cs20.stanford.edu
5 | Chip Huyen (chiphuyen@cs.stanford.edu) & Akshay Agrawal (akshayka@cs.stanford.edu)
6 | Lecture 04
7 | """
8 |
9 | import os
10 | os.environ['TF_CPP_MIN_LOG_LEVEL']='2'
11 |
12 | import numpy as np
13 | import tensorflow as tf
14 | import tensorflow.contrib.eager as tfe
15 |
16 | import utils
17 | import word2vec_utils
18 |
19 | tfe.enable_eager_execution()
20 |
21 | # Model hyperparameters
22 | VOCAB_SIZE = 50000
23 | BATCH_SIZE = 128
24 | EMBED_SIZE = 128 # dimension of the word embedding vectors
25 | SKIP_WINDOW = 1 # the context window
26 | NUM_SAMPLED = 64 # number of negative examples to sample
27 | LEARNING_RATE = 1.0
28 | NUM_TRAIN_STEPS = 100000
29 | VISUAL_FLD = 'visualization'
30 | SKIP_STEP = 5000
31 |
32 | # Parameters for downloading data
33 | DOWNLOAD_URL = 'http://mattmahoney.net/dc/text8.zip'
34 | EXPECTED_BYTES = 31344016
35 |
36 | class Word2Vec(object):
37 | def __init__(self, vocab_size, embed_size, num_sampled=NUM_SAMPLED):
38 | self.vocab_size = vocab_size
39 | self.num_sampled = num_sampled
40 | self.embed_matrix = tfe.Variable(tf.random_uniform(
41 | [vocab_size, embed_size]))
42 | self.nce_weight = tfe.Variable(tf.truncated_normal(
43 | [vocab_size, embed_size],
44 | stddev=1.0 / (embed_size ** 0.5)))
45 | self.nce_bias = tfe.Variable(tf.zeros([vocab_size]))
46 |
47 | def compute_loss(self, center_words, target_words):
48 | """Computes the forward pass of word2vec with the NCE loss."""
49 | embed = tf.nn.embedding_lookup(self.embed_matrix, center_words)
50 | loss = tf.reduce_mean(tf.nn.nce_loss(weights=self.nce_weight,
51 | biases=self.nce_bias,
52 | labels=target_words,
53 | inputs=embed,
54 | num_sampled=self.num_sampled,
55 | num_classes=self.vocab_size))
56 | return loss
57 |
58 |
59 | def gen():
60 | yield from word2vec_utils.batch_gen(DOWNLOAD_URL, EXPECTED_BYTES,
61 | VOCAB_SIZE, BATCH_SIZE, SKIP_WINDOW,
62 | VISUAL_FLD)
63 |
64 | def main():
65 | dataset = tf.data.Dataset.from_generator(gen, (tf.int32, tf.int32),
66 | (tf.TensorShape([BATCH_SIZE]),
67 | tf.TensorShape([BATCH_SIZE, 1])))
68 | optimizer = tf.train.GradientDescentOptimizer(LEARNING_RATE)
69 | model = Word2Vec(vocab_size=VOCAB_SIZE, embed_size=EMBED_SIZE)
70 | grad_fn = tfe.implicit_value_and_gradients(model.compute_loss)
71 | total_loss = 0.0 # for average loss in the last SKIP_STEP steps
72 | num_train_steps = 0
73 | while num_train_steps < NUM_TRAIN_STEPS:
74 | for center_words, target_words in tfe.Iterator(dataset):
75 | if num_train_steps >= NUM_TRAIN_STEPS:
76 | break
77 | loss_batch, grads = grad_fn(center_words, target_words)
78 | total_loss += loss_batch
79 | optimizer.apply_gradients(grads)
80 | if (num_train_steps + 1) % SKIP_STEP == 0:
81 | print('Average loss at step {}: {:5.1f}'.format(
82 | num_train_steps, total_loss / SKIP_STEP))
83 | total_loss = 0.0
84 | num_train_steps += 1
85 |
86 |
87 | if __name__ == '__main__':
88 | main()
89 |
--------------------------------------------------------------------------------
/Tensorflow/standford_example/04_word2vec_eager_starter.py:
--------------------------------------------------------------------------------
1 | """ starter code for word2vec skip-gram model with NCE loss
2 | Eager execution
3 | CS 20: "TensorFlow for Deep Learning Research"
4 | cs20.stanford.edu
5 | Chip Huyen (chiphuyen@cs.stanford.edu) & Akshay Agrawal (akshayka@cs.stanford.edu)
6 | Lecture 04
7 | """
8 |
9 | import os
10 | os.environ['TF_CPP_MIN_LOG_LEVEL']='2'
11 |
12 | import numpy as np
13 | import tensorflow as tf
14 | import tensorflow.contrib.eager as tfe
15 |
16 | import utils
17 | import word2vec_utils
18 |
19 | # Enable eager execution!
20 | #############################
21 | ########## TO DO ############
22 | #############################
23 |
24 | # Model hyperparameters
25 | VOCAB_SIZE = 50000
26 | BATCH_SIZE = 128
27 | EMBED_SIZE = 128 # dimension of the word embedding vectors
28 | SKIP_WINDOW = 1 # the context window
29 | NUM_SAMPLED = 64 # number of negative examples to sample
30 | LEARNING_RATE = 1.0
31 | NUM_TRAIN_STEPS = 100000
32 | VISUAL_FLD = 'visualization'
33 | SKIP_STEP = 5000
34 |
35 | # Parameters for downloading data
36 | DOWNLOAD_URL = 'http://mattmahoney.net/dc/text8.zip'
37 | EXPECTED_BYTES = 31344016
38 |
39 | class Word2Vec(object):
40 | def __init__(self, vocab_size, embed_size, num_sampled=NUM_SAMPLED):
41 | self.vocab_size = vocab_size
42 | self.num_sampled = num_sampled
43 | # Create the variables: an embedding matrix, nce_weight, and nce_bias
44 | #############################
45 | ########## TO DO ############
46 | #############################
47 | self.embed_matrix = None
48 | self.nce_weight = None
49 | self.nce_bias = None
50 |
51 | def compute_loss(self, center_words, target_words):
52 | """Computes the forward pass of word2vec with the NCE loss."""
53 | # Look up the embeddings for the center words
54 | #############################
55 | ########## TO DO ############
56 | #############################
57 | embed = None
58 |
59 | # Compute the loss, using tf.reduce_mean and tf.nn.nce_loss
60 | #############################
61 | ########## TO DO ############
62 | #############################
63 | loss = None
64 | return loss
65 |
66 |
67 | def gen():
68 | yield from word2vec_utils.batch_gen(DOWNLOAD_URL, EXPECTED_BYTES,
69 | VOCAB_SIZE, BATCH_SIZE, SKIP_WINDOW,
70 | VISUAL_FLD)
71 |
72 | def main():
73 | dataset = tf.data.Dataset.from_generator(gen, (tf.int32, tf.int32),
74 | (tf.TensorShape([BATCH_SIZE]),
75 | tf.TensorShape([BATCH_SIZE, 1])))
76 | optimizer = tf.train.GradientDescentOptimizer(LEARNING_RATE)
77 | # Create the model
78 | #############################
79 | ########## TO DO ############
80 | #############################
81 | model = None
82 |
83 | # Create the gradients function, using `tfe.implicit_value_and_gradients`
84 | #############################
85 | ########## TO DO ############
86 | #############################
87 | grad_fn = None
88 |
89 | total_loss = 0.0 # for average loss in the last SKIP_STEP steps
90 | num_train_steps = 0
91 | while num_train_steps < NUM_TRAIN_STEPS:
92 | for center_words, target_words in tfe.Iterator(dataset):
93 | if num_train_steps >= NUM_TRAIN_STEPS:
94 | break
95 |
96 | # Compute the loss and gradients, and take an optimization step.
97 | #############################
98 | ########## TO DO ############
99 | #############################
100 |
101 | if (num_train_steps + 1) % SKIP_STEP == 0:
102 | print('Average loss at step {}: {:5.1f}'.format(
103 | num_train_steps, total_loss / SKIP_STEP))
104 | total_loss = 0.0
105 | num_train_steps += 1
106 |
107 |
108 | if __name__ == '__main__':
109 | main()
110 |
--------------------------------------------------------------------------------
/Tensorflow/standford_example/05_randomization.py:
--------------------------------------------------------------------------------
1 | """ Examples to demonstrate ops level randomization
2 | CS 20: "TensorFlow for Deep Learning Research"
3 | cs20.stanford.edu
4 | Chip Huyen (chiphuyen@cs.stanford.edu)
5 | Lecture 05
6 | """
7 | import os
8 | os.environ['TF_CPP_MIN_LOG_LEVEL']='2'
9 |
10 | import tensorflow as tf
11 |
12 | # Example 1: session keeps track of the random state
13 | c = tf.random_uniform([], -10, 10, seed=2)
14 |
15 | with tf.Session() as sess:
16 | print(sess.run(c)) # >> 3.574932
17 | print(sess.run(c)) # >> -5.9731865
18 |
19 | # Example 2: each new session will start the random state all over again.
20 | c = tf.random_uniform([], -10, 10, seed=2)
21 |
22 | with tf.Session() as sess:
23 | print(sess.run(c)) # >> 3.574932
24 |
25 | with tf.Session() as sess:
26 | print(sess.run(c)) # >> 3.574932
27 |
28 | # Example 3: with operation level random seed, each op keeps its own seed.
29 | c = tf.random_uniform([], -10, 10, seed=2)
30 | d = tf.random_uniform([], -10, 10, seed=2)
31 |
32 | with tf.Session() as sess:
33 | print(sess.run(c)) # >> 3.574932
34 | print(sess.run(d)) # >> 3.574932
35 |
36 | # Example 4: graph level random seed
37 | tf.set_random_seed(2)
38 | c = tf.random_uniform([], -10, 10)
39 | d = tf.random_uniform([], -10, 10)
40 |
41 | with tf.Session() as sess:
42 | print(sess.run(c)) # >> 9.123926
43 | print(sess.run(d)) # >> -4.5340395
44 |
--------------------------------------------------------------------------------
/Tensorflow/standford_example/05_variable_sharing.py:
--------------------------------------------------------------------------------
1 | """ Examples to demonstrate variable sharing
2 | CS 20: 'TensorFlow for Deep Learning Research'
3 | cs20.stanford.edu
4 | Chip Huyen (chiphuyen@cs.stanford.edu)
5 | Lecture 05
6 | """
7 | import os
8 | os.environ['TF_CPP_MIN_LOG_LEVEL']='2'
9 |
10 | import tensorflow as tf
11 |
12 | x1 = tf.truncated_normal([200, 100], name='x1')
13 | x2 = tf.truncated_normal([200, 100], name='x2')
14 |
15 | def two_hidden_layers(x):
16 | assert x.shape.as_list() == [200, 100]
17 | w1 = tf.Variable(tf.random_normal([100, 50]), name='h1_weights')
18 | b1 = tf.Variable(tf.zeros([50]), name='h1_biases')
19 | h1 = tf.matmul(x, w1) + b1
20 | assert h1.shape.as_list() == [200, 50]
21 | w2 = tf.Variable(tf.random_normal([50, 10]), name='h2_weights')
22 | b2 = tf.Variable(tf.zeros([10]), name='2_biases')
23 | logits = tf.matmul(h1, w2) + b2
24 | return logits
25 |
26 | def two_hidden_layers_2(x):
27 | assert x.shape.as_list() == [200, 100]
28 | w1 = tf.get_variable('h1_weights', [100, 50], initializer=tf.random_normal_initializer())
29 | b1 = tf.get_variable('h1_biases', [50], initializer=tf.constant_initializer(0.0))
30 | h1 = tf.matmul(x, w1) + b1
31 | assert h1.shape.as_list() == [200, 50]
32 | w2 = tf.get_variable('h2_weights', [50, 10], initializer=tf.random_normal_initializer())
33 | b2 = tf.get_variable('h2_biases', [10], initializer=tf.constant_initializer(0.0))
34 | logits = tf.matmul(h1, w2) + b2
35 | return logits
36 |
37 | # logits1 = two_hidden_layers(x1)
38 | # logits2 = two_hidden_layers(x2)
39 |
40 | # logits1 = two_hidden_layers_2(x1)
41 | # logits2 = two_hidden_layers_2(x2)
42 |
43 | # with tf.variable_scope('two_layers') as scope:
44 | # logits1 = two_hidden_layers_2(x1)
45 | # scope.reuse_variables()
46 | # logits2 = two_hidden_layers_2(x2)
47 |
48 | # with tf.variable_scope('two_layers') as scope:
49 | # logits1 = two_hidden_layers_2(x1)
50 | # scope.reuse_variables()
51 | # logits2 = two_hidden_layers_2(x2)
52 |
53 | def fully_connected(x, output_dim, scope):
54 | with tf.variable_scope(scope, reuse=tf.AUTO_REUSE) as scope:
55 | w = tf.get_variable('weights', [x.shape[1], output_dim], initializer=tf.random_normal_initializer())
56 | b = tf.get_variable('biases', [output_dim], initializer=tf.constant_initializer(0.0))
57 | return tf.matmul(x, w) + b
58 |
59 | def two_hidden_layers(x):
60 | h1 = fully_connected(x, 50, 'h1')
61 | h2 = fully_connected(h1, 10, 'h2')
62 |
63 | with tf.variable_scope('two_layers') as scope:
64 | logits1 = two_hidden_layers(x1)
65 | # scope.reuse_variables()
66 | logits2 = two_hidden_layers(x2)
67 |
68 | writer = tf.summary.FileWriter('./graphs/cool_variables', tf.get_default_graph())
69 | writer.close()
--------------------------------------------------------------------------------
/Tensorflow/standford_example/07_run_kernels.py:
--------------------------------------------------------------------------------
1 | """
2 | Simple examples of convolution to do some basic filters
3 | Also demonstrates the use of TensorFlow data readers.
4 |
5 | We will use some popular filters for our image.
6 | It seems to be working with grayscale images, but not with rgb images.
7 | It's probably because I didn't choose the right kernels for rgb images.
8 |
9 | kernels for rgb images have dimensions 3 x 3 x 3 x 3
10 | kernels for grayscale images have dimensions 3 x 3 x 1 x 1
11 |
12 | CS 20: "TensorFlow for Deep Learning Research"
13 | cs20.stanford.edu
14 | Chip Huyen (chiphuyen@cs.stanford.edu)
15 | Lecture 07
16 | """
17 | import os
18 | os.environ['TF_CPP_MIN_LOG_LEVEL']='2'
19 |
20 | import sys
21 | sys.path.append('..')
22 |
23 | from matplotlib import gridspec as gridspec
24 | from matplotlib import pyplot as plt
25 | import tensorflow as tf
26 |
27 | import kernels
28 |
29 | def read_one_image(filename):
30 | ''' This method is to show how to read image from a file into a tensor.
31 | The output is a tensor object.
32 | '''
33 | image_string = tf.read_file(filename)
34 | image_decoded = tf.image.decode_image(image_string)
35 | image = tf.cast(image_decoded, tf.float32) / 256.0
36 | return image
37 |
38 | def convolve(image, kernels, rgb=True, strides=[1, 3, 3, 1], padding='SAME'):
39 | images = [image[0]]
40 | for i, kernel in enumerate(kernels):
41 | filtered_image = tf.nn.conv2d(image,
42 | kernel,
43 | strides=strides,
44 | padding=padding)[0]
45 | if i == 2:
46 | filtered_image = tf.minimum(tf.nn.relu(filtered_image), 255)
47 | images.append(filtered_image)
48 | return images
49 |
50 | def show_images(images, rgb=True):
51 | gs = gridspec.GridSpec(1, len(images))
52 | for i, image in enumerate(images):
53 | plt.subplot(gs[0, i])
54 | if rgb:
55 | plt.imshow(image)
56 | else:
57 | image = image.reshape(image.shape[0], image.shape[1])
58 | plt.imshow(image, cmap='gray')
59 | plt.axis('off')
60 | plt.show()
61 |
62 | def main():
63 | rgb = False
64 | if rgb:
65 | kernels_list = [kernels.BLUR_FILTER_RGB,
66 | kernels.SHARPEN_FILTER_RGB,
67 | kernels.EDGE_FILTER_RGB,
68 | kernels.TOP_SOBEL_RGB,
69 | kernels.EMBOSS_FILTER_RGB]
70 | else:
71 | kernels_list = [kernels.BLUR_FILTER,
72 | kernels.SHARPEN_FILTER,
73 | kernels.EDGE_FILTER,
74 | kernels.TOP_SOBEL,
75 | kernels.EMBOSS_FILTER]
76 |
77 | kernels_list = kernels_list[1:]
78 | image = read_one_image('data/friday.jpg')
79 | if not rgb:
80 | image = tf.image.rgb_to_grayscale(image)
81 | image = tf.expand_dims(image, 0) # make it into a batch of 1 element
82 | images = convolve(image, kernels_list, rgb)
83 | with tf.Session() as sess:
84 | images = sess.run(images) # convert images from tensors to float values
85 | show_images(images, rgb)
86 |
87 | if __name__ == '__main__':
88 | main()
--------------------------------------------------------------------------------
/Tensorflow/standford_example/kernels.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import tensorflow as tf
3 |
4 | a = np.zeros([3, 3, 3, 3])
5 | a[1, 1, :, :] = 0.25
6 | a[0, 1, :, :] = 0.125
7 | a[1, 0, :, :] = 0.125
8 | a[2, 1, :, :] = 0.125
9 | a[1, 2, :, :] = 0.125
10 | a[0, 0, :, :] = 0.0625
11 | a[0, 2, :, :] = 0.0625
12 | a[2, 0, :, :] = 0.0625
13 | a[2, 2, :, :] = 0.0625
14 |
15 | BLUR_FILTER_RGB = tf.constant(a, dtype=tf.float32)
16 |
17 | a = np.zeros([3, 3, 1, 1])
18 | # a[1, 1, :, :] = 0.25
19 | # a[0, 1, :, :] = 0.125
20 | # a[1, 0, :, :] = 0.125
21 | # a[2, 1, :, :] = 0.125
22 | # a[1, 2, :, :] = 0.125
23 | # a[0, 0, :, :] = 0.0625
24 | # a[0, 2, :, :] = 0.0625
25 | # a[2, 0, :, :] = 0.0625
26 | # a[2, 2, :, :] = 0.0625
27 | a[1, 1, :, :] = 1.0
28 | a[0, 1, :, :] = 1.0
29 | a[1, 0, :, :] = 1.0
30 | a[2, 1, :, :] = 1.0
31 | a[1, 2, :, :] = 1.0
32 | a[0, 0, :, :] = 1.0
33 | a[0, 2, :, :] = 1.0
34 | a[2, 0, :, :] = 1.0
35 | a[2, 2, :, :] = 1.0
36 | BLUR_FILTER = tf.constant(a, dtype=tf.float32)
37 |
38 | a = np.zeros([3, 3, 3, 3])
39 | a[1, 1, :, :] = 5
40 | a[0, 1, :, :] = -1
41 | a[1, 0, :, :] = -1
42 | a[2, 1, :, :] = -1
43 | a[1, 2, :, :] = -1
44 |
45 | SHARPEN_FILTER_RGB = tf.constant(a, dtype=tf.float32)
46 |
47 | a = np.zeros([3, 3, 1, 1])
48 | a[1, 1, :, :] = 5
49 | a[0, 1, :, :] = -1
50 | a[1, 0, :, :] = -1
51 | a[2, 1, :, :] = -1
52 | a[1, 2, :, :] = -1
53 |
54 | SHARPEN_FILTER = tf.constant(a, dtype=tf.float32)
55 |
56 | # a = np.zeros([3, 3, 3, 3])
57 | # a[:, :, :, :] = -1
58 | # a[1, 1, :, :] = 8
59 |
60 | # EDGE_FILTER_RGB = tf.constant(a, dtype=tf.float32)
61 |
62 | EDGE_FILTER_RGB = tf.constant([
63 | [[[ -1., 0., 0.], [ 0., -1., 0.], [ 0., 0., -1.]],
64 | [[ -1., 0., 0.], [ 0., -1., 0.], [ 0., 0., -1.]],
65 | [[ -1., 0., 0.], [ 0., -1., 0.], [ 0., 0., -1.]]],
66 | [[[ -1., 0., 0.], [ 0., -1., 0.], [ 0., 0., -1.]],
67 | [[ 8., 0., 0.], [ 0., 8., 0.], [ 0., 0., 8.]],
68 | [[ -1., 0., 0.], [ 0., -1., 0.], [ 0., 0., -1.]]],
69 | [[[ -1., 0., 0.], [ 0., -1., 0.], [ 0., 0., -1.]],
70 | [[ -1., 0., 0.], [ 0., -1., 0.], [ 0., 0., -1.]],
71 | [[ -1., 0., 0.], [ 0., -1., 0.], [ 0., 0., -1.]]]
72 | ])
73 |
74 | a = np.zeros([3, 3, 1, 1])
75 | # a[:, :, :, :] = -1
76 | # a[1, 1, :, :] = 8
77 | a[0, 1, :, :] = -1
78 | a[1, 0, :, :] = -1
79 | a[1, 2, :, :] = -1
80 | a[2, 1, :, :] = -1
81 | a[1, 1, :, :] = 4
82 |
83 | EDGE_FILTER = tf.constant(a, dtype=tf.float32)
84 |
85 | a = np.zeros([3, 3, 3, 3])
86 | a[0, :, :, :] = 1
87 | a[0, 1, :, :] = 2 # originally 2
88 | a[2, :, :, :] = -1
89 | a[2, 1, :, :] = -2
90 |
91 | TOP_SOBEL_RGB = tf.constant(a, dtype=tf.float32)
92 |
93 | a = np.zeros([3, 3, 1, 1])
94 | a[0, :, :, :] = 1
95 | a[0, 1, :, :] = 2 # originally 2
96 | a[2, :, :, :] = -1
97 | a[2, 1, :, :] = -2
98 |
99 | TOP_SOBEL = tf.constant(a, dtype=tf.float32)
100 |
101 | a = np.zeros([3, 3, 3, 3])
102 | a[0, 0, :, :] = -2
103 | a[0, 1, :, :] = -1
104 | a[1, 0, :, :] = -1
105 | a[1, 1, :, :] = 1
106 | a[1, 2, :, :] = 1
107 | a[2, 1, :, :] = 1
108 | a[2, 2, :, :] = 2
109 |
110 | EMBOSS_FILTER_RGB = tf.constant(a, dtype=tf.float32)
111 |
112 | a = np.zeros([3, 3, 1, 1])
113 | a[0, 0, :, :] = -2
114 | a[0, 1, :, :] = -1
115 | a[1, 0, :, :] = -1
116 | a[1, 1, :, :] = 1
117 | a[1, 2, :, :] = 1
118 | a[2, 1, :, :] = 1
119 | a[2, 2, :, :] = 2
120 | EMBOSS_FILTER = tf.constant(a, dtype=tf.float32)
--------------------------------------------------------------------------------
/Tensorflow/standford_example/word2vec_utils.py:
--------------------------------------------------------------------------------
1 | xfrom collections import Counter
2 | import random
3 | import os
4 | import sys
5 | sys.path.append('..')
6 | import zipfile
7 |
8 | import numpy as np
9 | from six.moves import urllib
10 | import tensorflow as tf
11 |
12 | import utils
13 |
14 | def read_data(file_path):
15 | """ Read data into a list of tokens
16 | There should be 17,005,207 tokens
17 | """
18 | with zipfile.ZipFile(file_path) as f:
19 | words = tf.compat.as_str(f.read(f.namelist()[0])).split()
20 | return words
21 |
22 | def build_vocab(words, vocab_size, visual_fld):
23 | """ Build vocabulary of VOCAB_SIZE most frequent words and write it to
24 | visualization/vocab.tsv
25 | """
26 | utils.safe_mkdir(visual_fld)
27 | file = open(os.path.join(visual_fld, 'vocab.tsv'), 'w')
28 |
29 | dictionary = dict()
30 | count = [('UNK', -1)]
31 | index = 0
32 | count.extend(Counter(words).most_common(vocab_size - 1))
33 |
34 | for word, _ in count:
35 | dictionary[word] = index
36 | index += 1
37 | file.write(word + '\n')
38 |
39 | index_dictionary = dict(zip(dictionary.values(), dictionary.keys()))
40 | file.close()
41 | return dictionary, index_dictionary
42 |
43 | def convert_words_to_index(words, dictionary):
44 | """ Replace each word in the dataset with its index in the dictionary """
45 | return [dictionary[word] if word in dictionary else 0 for word in words]
46 |
47 | def generate_sample(index_words, context_window_size):
48 | """ Form training pairs according to the skip-gram model. """
49 | for index, center in enumerate(index_words):
50 | context = random.randint(1, context_window_size)
51 | # get a random target before the center word
52 | for target in index_words[max(0, index - context): index]:
53 | yield center, target
54 | # get a random target after the center wrod
55 | for target in index_words[index + 1: index + context + 1]:
56 | yield center, target
57 |
58 | def most_common_words(visual_fld, num_visualize):
59 | """ create a list of num_visualize most frequent words to visualize on TensorBoard.
60 | saved to visualization/vocab_[num_visualize].tsv
61 | """
62 | words = open(os.path.join(visual_fld, 'vocab.tsv'), 'r').readlines()[:num_visualize]
63 | words = [word for word in words]
64 | file = open(os.path.join(visual_fld, 'vocab_' + str(num_visualize) + '.tsv'), 'w')
65 | for word in words:
66 | file.write(word)
67 | file.close()
68 |
69 | def batch_gen(download_url, expected_byte, vocab_size, batch_size,
70 | skip_window, visual_fld):
71 | local_dest = 'data/text8.zip'
72 | utils.download_one_file(download_url, local_dest, expected_byte)
73 | words = read_data(local_dest)
74 | dictionary, _ = build_vocab(words, vocab_size, visual_fld)
75 | index_words = convert_words_to_index(words, dictionary)
76 | del words # to save memory
77 | single_gen = generate_sample(index_words, skip_window)
78 |
79 | while True:
80 | center_batch = np.zeros(batch_size, dtype=np.int32)
81 | target_batch = np.zeros([batch_size, 1])
82 | for index in range(batch_size):
83 | center_batch[index], target_batch[index] = next(single_gen)
84 | yield center_batch, target_batch
--------------------------------------------------------------------------------
/Text_Classification/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rainmaker712/nlp_ryan/108ce890659ed29d4a143e41e5546f613aa878ca/Text_Classification/.DS_Store
--------------------------------------------------------------------------------
/Text_Classification/.ipynb_checkpoints/cnn_textclassification_keras-checkpoint.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# 6.3 \n",
8 | "\n",
9 | "https://github.com/jarfo/kchar\n",
10 | "https://github.com/carpedm20/lstm-char-cnn-tensorflow\n",
11 | "https://github.com/fchollet/keras/blob/master/examples/imdb_cnn.py"
12 | ]
13 | },
14 | {
15 | "cell_type": "code",
16 | "execution_count": 1,
17 | "metadata": {
18 | "collapsed": false
19 | },
20 | "outputs": [
21 | {
22 | "name": "stderr",
23 | "output_type": "stream",
24 | "text": [
25 | "Using TensorFlow backend.\n"
26 | ]
27 | }
28 | ],
29 | "source": [
30 | "from keras.preprocessing import sequence\n",
31 | "from keras.models import Sequential\n",
32 | "from keras.layers import Dense, Dropout, Activation\n",
33 | "from keras.layers import Embedding\n",
34 | "from keras.layers import Conv1D, GlobalMaxPooling1D\n",
35 | "from keras.datasets import imdb"
36 | ]
37 | },
38 | {
39 | "cell_type": "code",
40 | "execution_count": 2,
41 | "metadata": {
42 | "collapsed": true
43 | },
44 | "outputs": [],
45 | "source": [
46 | "# set parameters:\n",
47 | "max_features = 5000\n",
48 | "maxlen = 400\n",
49 | "batch_size = 32\n",
50 | "embedding_dims = 50\n",
51 | "filters = 250\n",
52 | "kernel_size = 3\n",
53 | "hidden_dims = 250\n",
54 | "epochs = 2"
55 | ]
56 | },
57 | {
58 | "cell_type": "code",
59 | "execution_count": 3,
60 | "metadata": {
61 | "collapsed": false
62 | },
63 | "outputs": [
64 | {
65 | "name": "stdout",
66 | "output_type": "stream",
67 | "text": [
68 | "Loading data...\n",
69 | "25000 train sequences\n",
70 | "25000 test sequences\n"
71 | ]
72 | }
73 | ],
74 | "source": [
75 | "print('Loading data...')\n",
76 | "(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=max_features)\n",
77 | "print(len(x_train), 'train sequences')\n",
78 | "print(len(x_test), 'test sequences')"
79 | ]
80 | },
81 | {
82 | "cell_type": "code",
83 | "execution_count": 4,
84 | "metadata": {
85 | "collapsed": false
86 | },
87 | "outputs": [
88 | {
89 | "name": "stdout",
90 | "output_type": "stream",
91 | "text": [
92 | "Pad sequences (samples x time)\n",
93 | "x_train shape: (25000, 400)\n",
94 | "x_test shape: (25000, 400)\n",
95 | "Build model...\n"
96 | ]
97 | }
98 | ],
99 | "source": [
100 | "print('Pad sequences (samples x time)')\n",
101 | "x_train = sequence.pad_sequences(x_train, maxlen=maxlen)\n",
102 | "x_test = sequence.pad_sequences(x_test, maxlen=maxlen)\n",
103 | "print('x_train shape:', x_train.shape)\n",
104 | "print('x_test shape:', x_test.shape)\n",
105 | "print('Build model...')\n"
106 | ]
107 | },
108 | {
109 | "cell_type": "code",
110 | "execution_count": 8,
111 | "metadata": {
112 | "collapsed": false
113 | },
114 | "outputs": [],
115 | "source": [
116 | "model = Sequential()\n",
117 | "\n",
118 | "# we start off with an efficient embedding layer which maps\n",
119 | "# our vocab indices into embedding_dims dimensions\n",
120 | "model.add(Embedding(max_features,\n",
121 | " embedding_dims,\n",
122 | " input_length=maxlen))\n",
123 | "model.add(Dropout(0.2))"
124 | ]
125 | },
126 | {
127 | "cell_type": "code",
128 | "execution_count": null,
129 | "metadata": {
130 | "collapsed": true
131 | },
132 | "outputs": [],
133 | "source": [
134 | "# we add a Convolution1D, which will learn filters\n",
135 | "# word group filters of size filter_length:\n",
136 | "model.add(Conv1D(filters,\n",
137 | " kernel_size,\n",
138 | " padding='valid',\n",
139 | " activation='relu',\n",
140 | " strides=1))\n",
141 | "# we use max pooling:\n",
142 | "model.add(GlobalMaxPooling1D())\n",
143 | "\n",
144 | "# We add a vanilla hidden layer:\n",
145 | "model.add(Dense(hidden_dims))\n",
146 | "model.add(Dropout(0.2))\n",
147 | "model.add(Activation('relu'))\n",
148 | "\n",
149 | "# We project onto a single unit output layer, and squash it with a sigmoid:\n",
150 | "model.add(Dense(1))\n",
151 | "model.add(Activation('sigmoid'))\n",
152 | "\n",
153 | "model.compile(loss='binary_crossentropy',\n",
154 | " optimizer='adam',\n",
155 | " metrics=['accuracy'])\n",
156 | "model.fit(x_train, y_train,\n",
157 | " batch_size=batch_size,\n",
158 | " epochs=epochs,\n",
159 | " validation_data=(x_test, y_test))"
160 | ]
161 | }
162 | ],
163 | "metadata": {
164 | "anaconda-cloud": {},
165 | "kernelspec": {
166 | "display_name": "Python [conda root]",
167 | "language": "python",
168 | "name": "conda-root-py"
169 | },
170 | "language_info": {
171 | "codemirror_mode": {
172 | "name": "ipython",
173 | "version": 3
174 | },
175 | "file_extension": ".py",
176 | "mimetype": "text/x-python",
177 | "name": "python",
178 | "nbconvert_exporter": "python",
179 | "pygments_lexer": "ipython3",
180 | "version": "3.5.2"
181 | }
182 | },
183 | "nbformat": 4,
184 | "nbformat_minor": 1
185 | }
186 |
--------------------------------------------------------------------------------
/Text_Classification/.ipynb_checkpoints/seq2seq_keras-checkpoint.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [],
3 | "metadata": {},
4 | "nbformat": 4,
5 | "nbformat_minor": 1
6 | }
7 |
--------------------------------------------------------------------------------
/Text_Classification/To_Do/cnn_keras_text_class_kor.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # -*- coding: utf-8 -*-
3 | """
4 | Created on Sat May 6 15:08:54 2017
5 |
6 | #Data
7 | 영화 한글 데이터: https://github.com/e9t/nsmc
8 |
9 | @author: ryan
10 |
11 | On Progress
12 | """
--------------------------------------------------------------------------------
/Text_Classification/To_Do/hierachical_attention_keras_text_class_eng.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # -*- coding: utf-8 -*-
3 | """
4 | Created on Sat May 6 15:08:54 2017
5 |
6 | #Data
7 |
8 | @author: ryan
9 |
10 | On Progress
11 | """
12 |
--------------------------------------------------------------------------------
/Text_Classification/To_Do/lstm_keras_text_class_eng.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # -*- coding: utf-8 -*-
3 | """
4 | Created on Sat May 6 15:08:54 2017
5 |
6 | #Data
7 |
8 | @author: ryan
9 |
10 | On Progress
11 | """
12 |
--------------------------------------------------------------------------------
/Text_Classification/To_Do/rnn_attention_keras_text_class_eng .py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # -*- coding: utf-8 -*-
3 | """
4 | Created on Sat May 6 15:08:54 2017
5 |
6 | #Data
7 |
8 | @author: ryan
9 |
10 | On Progress
11 | """
12 |
--------------------------------------------------------------------------------
/Text_Classification/cnn_keras_text_class_imdb.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # -*- coding: utf-8 -*-
3 | """
4 | Created on Sat May 6 22:55:55 2017
5 |
6 | @author: ryan
7 | """
8 |
9 | import numpy as np
10 | import pandas as pd
11 | import pickle
12 | from collections import defaultdict
13 | import re
14 |
15 | from bs4 import BeautifulSoup
16 |
17 | import sys
18 | import os
19 |
20 | from keras.preprocessing import sequence
21 | from keras.models import Sequential
22 | from keras.layers import Dense, Dropout, Activation
23 | from keras.layers import Embedding
24 | from keras.layers import Conv1D, GlobalMaxPooling1D
25 | from keras.datasets import imdb
26 |
27 |
28 | # set parameters:
29 | max_features = 5000
30 | maxlen = 400
31 | batch_size = 32
32 | embedding_dims = 50
33 | filters = 250
34 | kernel_size = 3
35 | hidden_dims = 250
36 | epochs = 2
37 |
38 | print('Loading data...')
39 | (x_train, y_train), (x_val, y_val) = imdb.load_data(num_words=max_features)
40 | print(len(x_train), 'train sequences')
41 | print(len(x_val), 'test sequences')
42 |
43 | print('Pad sequences (samples x time)')
44 | x_train = sequence.pad_sequences(x_train, maxlen=maxlen)
45 | x_test = sequence.pad_sequences(x_test, maxlen=maxlen)
46 | print('x_train shape:', x_train.shape)
47 | print('x_test shape:', x_test.shape)
48 |
49 | print('Build model...')
50 | model = Sequential()
51 |
52 | # we start off with an efficient embedding layer which maps
53 | # our vocab indices into embedding_dims dimensions
54 | model.add(Embedding(max_features,
55 | embedding_dims,
56 | input_length=maxlen))
57 | model.add(Dropout(0.2))
58 |
59 | # we add a Convolution1D, which will learn filters
60 | # word group filters of size filter_length:
61 | model.add(Conv1D(filters,
62 | kernel_size,
63 | padding='valid',
64 | activation='relu',
65 | strides=1))
66 | # we use max pooling:
67 | model.add(GlobalMaxPooling1D())
68 |
69 | # We add a vanilla hidden layer:
70 | model.add(Dense(hidden_dims))
71 | model.add(Dropout(0.2))
72 | model.add(Activation('relu'))
73 |
74 | # We project onto a single unit output layer, and squash it with a sigmoid:
75 | model.add(Dense(1))
76 | model.add(Activation('sigmoid'))
77 |
78 | model.compile(loss='binary_crossentropy',
79 | optimizer='adam',
80 | metrics=['accuracy'])
81 | history = model.fit(x_train, y_train,
82 | batch_size=batch_size,
83 | epochs=epochs,
84 | validation_data=(x_test, y_test))
85 |
86 | # list all data in history
87 | print(history.history.keys())
88 |
89 | # summarize history for accuracy
90 | import matplotlib.pyplot as plt
91 |
92 | plt.plot(history.history['acc'])
93 | plt.plot(history.history['val_acc'])
94 | plt.title('model accuracy')
95 | plt.ylabel('accuracy')
96 | plt.xlabel('epoch')
97 | plt.legend(['train', 'test'], loc='upper left')
98 | plt.show()
99 | # summarize history for loss
100 | plt.plot(history.history['loss'])
101 | plt.plot(history.history['val_loss'])
102 | plt.title('model loss')
103 | plt.ylabel('loss')
104 | plt.xlabel('epoch')
105 | plt.legend(['train', 'test'], loc='upper left')
106 | plt.show()
--------------------------------------------------------------------------------
/Text_Classification/cnn_keras_text_class_imdb2.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # -*- coding: utf-8 -*-
3 | """
4 | Created on Sat Jun 3 18:51:43 2017
5 |
6 | @author: ryan
7 | """
8 |
9 | '''This example demonstrates the use of Convolution1D for text classification.
10 | Gets to 0.89 test accuracy after 2 epochs.
11 | 90s/epoch on Intel i5 2.4Ghz CPU.
12 | 10s/epoch on Tesla K40 GPU.
13 | '''
14 |
15 | from __future__ import print_function
16 |
17 | from keras.preprocessing import sequence
18 | from keras.models import Sequential
19 | from keras.layers import Dense, Dropout, Activation
20 | from keras.layers import Embedding
21 | from keras.layers import Conv1D, GlobalMaxPooling1D
22 | from keras.datasets import imdb
23 |
24 | # set parameters:
25 | max_features = 5000
26 | maxlen = 400
27 | batch_size = 32
28 | embedding_dims = 50
29 | filters = 250
30 | kernel_size = 3
31 | hidden_dims = 250
32 | epochs = 2
33 |
34 | print('Loading data...')
35 | (x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=max_features)
36 | print(len(x_train), 'train sequences')
37 | print(len(x_test), 'test sequences')
38 |
39 | print('Pad sequences (samples x time)')
40 | x_train = sequence.pad_sequences(x_train, maxlen=maxlen)
41 | x_test = sequence.pad_sequences(x_test, maxlen=maxlen)
42 | print('x_train shape:', x_train.shape)
43 | print('x_test shape:', x_test.shape)
44 |
45 | print('Build model...')
46 | model = Sequential()
47 |
48 | # we start off with an efficient embedding layer which maps
49 | # our vocab indices into embedding_dims dimensions
50 | model.add(Embedding(max_features,
51 | embedding_dims,
52 | input_length=maxlen))
53 | model.add(Dropout(0.2))
54 |
55 | # we add a Convolution1D, which will learn filters
56 | # word group filters of size filter_length:
57 | model.add(Conv1D(filters,
58 | kernel_size,
59 | padding='valid',
60 | activation='relu',
61 | strides=1))
62 | # we use max pooling:
63 | model.add(GlobalMaxPooling1D())
64 |
65 | # We add a vanilla hidden layer:
66 | model.add(Dense(hidden_dims))
67 | model.add(Dropout(0.2))
68 | model.add(Activation('relu'))
69 |
70 | # We project onto a single unit output layer, and squash it with a sigmoid:
71 | model.add(Dense(1))
72 | model.add(Activation('sigmoid'))
73 |
74 | model.compile(loss='binary_crossentropy',
75 | optimizer='adam',
76 | metrics=['accuracy'])
77 | model.fit(x_train, y_train,
78 | batch_size=batch_size,
79 | epochs=epochs,
80 | validation_data=(x_test, y_test))
--------------------------------------------------------------------------------
/Text_Classification/nets/__pycache__/text_cnn.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rainmaker712/nlp_ryan/108ce890659ed29d4a143e41e5546f613aa878ca/Text_Classification/nets/__pycache__/text_cnn.cpython-35.pyc
--------------------------------------------------------------------------------
/Text_Classification/nets/text_cnn.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import tensorflow as tf
3 | from collections import OrderedDict
4 |
5 | def textcnn(input_placeholder, target_placeholder, vocab_size, embedding_dim, filter_sizes, num_filters, is_training=True, keep_prob=0.8, scope='TextCNN'):
6 |
7 | # Get 'sequence_length' and 'num_classes'
8 | sequence_length = input_placeholder.get_shape()[1]
9 | num_classes = target_placeholder.get_shape()[1]
10 |
11 | # Declare 'end_points' which is an ordered dictionary
12 | end_points = OrderedDict()
13 |
14 | # tf.random_uniform_initializer의 형태를 간소화
15 | random_uniform = lambda minval, maxval: tf.random_uniform_initializer(minval=minval, maxval=maxval)
16 |
17 | # tf.truncated_normal_initializer의 형태를 간소화
18 | trunc_normal = lambda stddev: tf.truncated_normal_initializer(mean=0.0, stddev=stddev)
19 |
20 | # tf.contrib.layers.xavier_initializer의 형태를 간소화
21 | xavier = tf.contrib.layers.xavier_initializer()
22 |
23 | # tf.contrib.layers.xavier_initializer_conv2d의 형태를 간소화
24 | xavier_conv = tf.contrib.layers.xavier_initializer_conv2d()
25 |
26 | # tf.constant_initializer의 형태를 간소화
27 | constant = lambda value: tf.constant_initializer(value=value)
28 |
29 | with tf.variable_scope(scope):
30 |
31 | end_point = 'Embedding'
32 | with tf.variable_scope(end_point):
33 | w_embedding = tf.get_variable(name='w_embedding', shape=[vocab_size, embedding_dim],
34 | initializer=random_uniform(-1.0, 1.0))
35 | embedded_chars = tf.nn.embedding_lookup(params=w_embedding, ids=input_placeholder, name='embedded_chars')
36 | embedded_chars_expanded = tf.expand_dims(input=embedded_chars, axis=-1, name='embedded_chars_expanded')
37 | end_points[end_point] = w_embedding
38 |
39 | pooled_output = []
40 | for i, filter_size in enumerate(filter_sizes):
41 | end_point = 'Conv-maxpool-%d' % filter_size
42 | with tf.variable_scope(end_point):
43 | filter_shape = [filter_size, embedding_dim, 1, num_filters]
44 | bias_shape = [num_filters]
45 | w_conv = tf.get_variable(name='w_conv', shape=filter_shape, initializer=trunc_normal(0.01))
46 | b_conv = tf.get_variable(name='b_conv', shape=bias_shape, initializer=constant(0.0))
47 | conv = tf.nn.conv2d(input=embedded_chars_expanded, filter=w_conv, strides=[1, 1, 1, 1], padding='VALID', name='conv')
48 | activated = tf.nn.relu(features=tf.nn.bias_add(conv, b_conv), name='relu')
49 | pooled = tf.nn.max_pool(value=activated, ksize=[1, sequence_length - filter_size + 1, 1, 1], strides=[1, 1, 1, 1], padding='VALID', name='maxpool')
50 | pooled_output.append(pooled)
51 | end_points[end_point] = pooled
52 |
53 | end_point = 'Flatten'
54 | with tf.variable_scope(end_point):
55 | num_filters_total = num_filters * len(filter_sizes)
56 | h_pool = tf.concat(values=pooled_output, axis=3, name='concat')
57 | h_pool_flat = tf.reshape(tensor=h_pool, shape=[-1, num_filters_total], name='flatten')
58 | end_points[end_point] = h_pool_flat
59 |
60 | end_point = 'Fully-connected'
61 | with tf.variable_scope(end_point):
62 | dropout = tf.contrib.slim.dropout(h_pool_flat, keep_prob=keep_prob, is_training=is_training, scope='dropout')
63 | w_fc = tf.get_variable(name='w_fc', shape=[num_filters_total, num_classes], initializer=xavier)
64 | b_fc = tf.get_variable(name='b_fc', shape=[num_classes], initializer=constant(0.0))
65 | logits = tf.nn.xw_plus_b(x=dropout, weights=w_fc, biases=b_fc, name='logits')
66 | end_points[end_point] = logits
67 |
68 | return logits, end_points
--------------------------------------------------------------------------------
/Text_Classification/seq2seq_keras.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": null,
6 | "metadata": {
7 | "collapsed": true
8 | },
9 | "outputs": [],
10 | "source": []
11 | }
12 | ],
13 | "metadata": {
14 | "anaconda-cloud": {},
15 | "kernelspec": {
16 | "display_name": "Python [conda root]",
17 | "language": "python",
18 | "name": "conda-root-py"
19 | },
20 | "language_info": {
21 | "codemirror_mode": {
22 | "name": "ipython",
23 | "version": 3
24 | },
25 | "file_extension": ".py",
26 | "mimetype": "text/x-python",
27 | "name": "python",
28 | "nbconvert_exporter": "python",
29 | "pygments_lexer": "ipython3",
30 | "version": "3.5.2"
31 | }
32 | },
33 | "nbformat": 4,
34 | "nbformat_minor": 1
35 | }
36 |
--------------------------------------------------------------------------------
/VAE/vae_sample.py:
--------------------------------------------------------------------------------
1 | """
2 | Arxiv Insights: https://www.youtube.com/watch?v=9zKuYvjFFS8&t=609s
3 | https://github.com/hwalsuklee/tensorflow-mnist-CVAE
4 | """
5 |
6 | import tensorflow as tf
7 |
8 | # Gaussian MLP as conditional encoder
9 | def gaussian_MLP_conditional_encoder(x, y, n_hidden, n_output, keep_prob):
10 | with tf.variable_scope("gaussian_MLP_encoder"):
11 | # concatenate condition and image
12 | dim_y = int(y.get_shape()[1])
13 | input = tf.concat(axis=1, values=[x, y])
14 |
15 | # initializers
16 | w_init = tf.contrib.layers.variance_scaling_initializer()
17 | b_init = tf.constant_initializer(0.)
18 |
19 | # 1st hidden layer
20 | w0 = tf.get_variable('w0', [input.get_shape()[1], n_hidden+dim_y], initializer=w_init)
21 | b0 = tf.get_variable('b0', [n_hidden+dim_y], initializer=b_init)
22 | h0 = tf.matmul(input, w0) + b0
23 | h0 = tf.nn.elu(h0)
24 | h0 = tf.nn.dropout(h0, keep_prob)
25 |
26 | # 2nd hidden layer
27 | w1 = tf.get_variable('w1', [h0.get_shape()[1], n_hidden], initializer=w_init)
28 | b1 = tf.get_variable('b1', [n_hidden], initializer=b_init)
29 | h1 = tf.matmul(h0, w1) + b1
30 | h1 = tf.nn.tanh(h1)
31 | h1 = tf.nn.dropout(h1, keep_prob)
32 |
33 | # output layer
34 | # borrowed from https: // github.com / altosaar / vae / blob / master / vae.py
35 | wo = tf.get_variable('wo', [h1.get_shape()[1], n_output * 2], initializer=w_init)
36 | bo = tf.get_variable('bo', [n_output * 2], initializer=b_init)
37 |
38 | gaussian_params = tf.matmul(h1, wo) + bo
39 |
40 | # The mean parameter is unconstrained
41 | mean = gaussian_params[:, :n_output]
42 | # The standard deviation must be positive. Parametrize with a softplus and
43 | # add a small epsilon for numerical stability
44 | stddev = 1e-6 + tf.nn.softplus(gaussian_params[:, n_output:])
45 |
46 | return mean, stddev
47 |
48 | # Bernoulli MLP as conditional decoder
49 | def bernoulli_MLP_conditional_decoder(z, y, n_hidden, n_output, keep_prob, reuse=False):
50 |
51 | with tf.variable_scope("bernoulli_MLP_decoder", reuse=reuse):
52 | # concatenate condition and latent vectors
53 | input = tf.concat(axis=1, values=[z, y])
54 |
55 | # initializers
56 | w_init = tf.contrib.layers.variance_scaling_initializer()
57 | b_init = tf.constant_initializer(0.)
58 |
59 | # 1st hidden layer
60 | w0 = tf.get_variable('w0', [input.get_shape()[1], n_hidden], initializer=w_init)
61 | b0 = tf.get_variable('b0', [n_hidden], initializer=b_init)
62 | h0 = tf.matmul(input, w0) + b0
63 | h0 = tf.nn.tanh(h0)
64 | h0 = tf.nn.dropout(h0, keep_prob)
65 |
66 | # 2nd hidden layer
67 | w1 = tf.get_variable('w1', [h0.get_shape()[1], n_hidden], initializer=w_init)
68 | b1 = tf.get_variable('b1', [n_hidden], initializer=b_init)
69 | h1 = tf.matmul(h0, w1) + b1
70 | h1 = tf.nn.elu(h1)
71 | h1 = tf.nn.dropout(h1, keep_prob)
72 |
73 | # output layer-mean
74 | wo = tf.get_variable('wo', [h1.get_shape()[1], n_output], initializer=w_init)
75 | bo = tf.get_variable('bo', [n_output], initializer=b_init)
76 | y = tf.sigmoid(tf.matmul(h1, wo) + bo)
77 |
78 | return y
79 |
80 | # Gateway
81 | def autoencoder(x_hat, x, y, dim_img, dim_z, n_hidden, keep_prob):
82 |
83 | # encoding
84 | mu, sigma = gaussian_MLP_conditional_encoder(x_hat, y, n_hidden, dim_z, keep_prob)
85 |
86 | # sampling by re-parameterization technique
87 | z = mu + sigma * tf.random_normal(tf.shape(mu), 0, 1, dtype=tf.float32) #Latent vector
88 |
89 | # decoding
90 | x_ = bernoulli_MLP_conditional_decoder(z, y, n_hidden, dim_img, keep_prob)
91 | x_ = tf.clip_by_value(x_, 1e-8, 1 - 1e-8)
92 |
93 | # ELBO
94 | marginal_likelihood = tf.reduce_sum(x * tf.log(x_) + (1 - x) * tf.log(1 - x_), 1)
95 | KL_divergence = 0.5 * tf.reduce_sum(tf.square(mu) + tf.square(sigma) - tf.log(1e-8 + tf.square(sigma)) - 1, 1)
96 |
97 | marginal_likelihood = tf.reduce_mean(marginal_likelihood)
98 | KL_divergence = tf.reduce_mean(KL_divergence)
99 |
100 | ELBO = marginal_likelihood - KL_divergence
101 |
102 | # minimize loss instead of maximizing ELBO
103 | loss = -ELBO
104 |
105 | return x_, z, loss, -marginal_likelihood, KL_divergence
106 |
107 | # Conditional Decoder (Generator)
108 | def decoder(z, y, dim_img, n_hidden):
109 |
110 | x_ = bernoulli_MLP_conditional_decoder(z, y, n_hidden, dim_img, 1.0, reuse=True)
111 |
112 | return x_
--------------------------------------------------------------------------------
/dataset/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rainmaker712/nlp_ryan/108ce890659ed29d4a143e41e5546f613aa878ca/dataset/.DS_Store
--------------------------------------------------------------------------------
/pytorch_basic/.ipynb_checkpoints/10.pytorch_rnn-checkpoint.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [],
3 | "metadata": {},
4 | "nbformat": 4,
5 | "nbformat_minor": 1
6 | }
7 |
--------------------------------------------------------------------------------
/pytorch_basic/.ipynb_checkpoints/sec 6. Linear regression wih Python-checkpoint.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 2,
6 | "metadata": {
7 | "collapsed": false
8 | },
9 | "outputs": [
10 | {
11 | "ename": "NameError",
12 | "evalue": "name 'np' is not defined",
13 | "output_type": "error",
14 | "traceback": [
15 | "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
16 | "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)",
17 | "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrandom\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mseed\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 2\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3\u001b[0m \u001b[0mx\u001b[0m\u001b[0;34m=\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrandom\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrand\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mn\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 4\u001b[0m \u001b[0my\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mx\u001b[0m \u001b[0;34m**\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrandom\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrand\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mn\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 5\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
18 | "\u001b[0;31mNameError\u001b[0m: name 'np' is not defined"
19 | ]
20 | }
21 | ],
22 | "source": [
23 | "np.random.seed(1)\n",
24 | "\n",
25 | "x= np.random.rand(n)\n",
26 | "y = x ** np.random.rand(n)\n",
27 | "\n",
28 | "colors = np.random.rand()\n",
29 | "plt.plot(np.unique(x), np.poly1d(np.ployfit(x,y,1))(np.unique(x)))\n",
30 | "\n",
31 | "plt.scatter(x,y, colors, alpha=0.5)\n",
32 | "plt.show()"
33 | ]
34 | }
35 | ],
36 | "metadata": {
37 | "anaconda-cloud": {},
38 | "kernelspec": {
39 | "display_name": "Python [conda root]",
40 | "language": "python",
41 | "name": "conda-root-py"
42 | },
43 | "language_info": {
44 | "codemirror_mode": {
45 | "name": "ipython",
46 | "version": 3
47 | },
48 | "file_extension": ".py",
49 | "mimetype": "text/x-python",
50 | "name": "python",
51 | "nbconvert_exporter": "python",
52 | "pygments_lexer": "ipython3",
53 | "version": "3.5.2"
54 | }
55 | },
56 | "nbformat": 4,
57 | "nbformat_minor": 1
58 | }
59 |
--------------------------------------------------------------------------------
/pytorch_basic/10.pytorch_rnn.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": null,
6 | "metadata": {
7 | "collapsed": true
8 | },
9 | "outputs": [],
10 | "source": []
11 | }
12 | ],
13 | "metadata": {
14 | "anaconda-cloud": {},
15 | "kernelspec": {
16 | "display_name": "Python [conda root]",
17 | "language": "python",
18 | "name": "conda-root-py"
19 | },
20 | "language_info": {
21 | "codemirror_mode": {
22 | "name": "ipython",
23 | "version": 3
24 | },
25 | "file_extension": ".py",
26 | "mimetype": "text/x-python",
27 | "name": "python",
28 | "nbconvert_exporter": "python",
29 | "pygments_lexer": "ipython3",
30 | "version": "3.5.2"
31 | }
32 | },
33 | "nbformat": 4,
34 | "nbformat_minor": 1
35 | }
36 |
--------------------------------------------------------------------------------
/pytorch_basic/Start_Pytorch.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 3,
6 | "metadata": {},
7 | "outputs": [],
8 | "source": [
9 | "from torch.autograd import Variable\n",
10 | "import torch"
11 | ]
12 | },
13 | {
14 | "cell_type": "code",
15 | "execution_count": 4,
16 | "metadata": {},
17 | "outputs": [],
18 | "source": [
19 | "torch.add?"
20 | ]
21 | },
22 | {
23 | "cell_type": "code",
24 | "execution_count": null,
25 | "metadata": {},
26 | "outputs": [],
27 | "source": []
28 | }
29 | ],
30 | "metadata": {
31 | "kernelspec": {
32 | "display_name": "Python 3",
33 | "language": "python",
34 | "name": "python3"
35 | },
36 | "language_info": {
37 | "codemirror_mode": {
38 | "name": "ipython",
39 | "version": 3
40 | },
41 | "file_extension": ".py",
42 | "mimetype": "text/x-python",
43 | "name": "python",
44 | "nbconvert_exporter": "python",
45 | "pygments_lexer": "ipython3",
46 | "version": "3.6.3"
47 | }
48 | },
49 | "nbformat": 4,
50 | "nbformat_minor": 2
51 | }
52 |
--------------------------------------------------------------------------------
/pytorch_basic/cnn_cifar10_pytorch.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # -*- coding: utf-8 -*-
3 | """
4 | Created on Sat Jun 10 15:48:47 2017
5 |
6 | @author: ryan
7 | """
8 |
9 | #-----------CNN------------------#
10 | import torch
11 | import torchvision
12 | import torchvision.transforms as transforms
13 |
14 | transform = transforms.Compose(
15 | [transforms.ToTensor(),
16 | transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
17 |
18 | trainset = torchvision.datasets.CIFAR10(root='./data', train=True,
19 | download=True, transform=transform)
20 | trainloader = torch.utils.data.DataLoader(trainset, batch_size=4,
21 | shuffle=True, num_workers=2)
22 |
23 | testset = torchvision.datasets.CIFAR10(root='./data', train=False,
24 | download=True, transform=transform)
25 | testloader = torch.utils.data.DataLoader(testset, batch_size=4,
26 | shuffle=False, num_workers=2)
27 |
28 | classes = ('plane', 'car', 'bird', 'cat',
29 | 'deer', 'dog', 'frog', 'horse', 'ship', 'truck')
30 |
31 | import matplotlib.pyplot as plt
32 | import numpy as np
33 |
34 | # functions to show an image
35 |
36 | def imshow(img):
37 | img = img / 2 + 0.5 # unnormalize
38 | npimg = img.numpy()
39 | plt.imshow(np.transpose(npimg, (1, 2, 0)))
40 |
41 |
42 | # get some random training images
43 | dataiter = iter(trainloader)
44 | images, labels = dataiter.next()
45 |
46 | # show images
47 | imshow(torchvision.utils.make_grid(images))
48 | # print labels
49 | print(' '.join('%5s' % classes[labels[j]] for j in range(4)))
50 |
51 | #1. Loading and normalizing cifar10
52 |
53 | #2. Define a Convolution Neural Network
54 | from torch.autograd import Variable
55 | import torch.nn as nn
56 | import torch.nn.functional as F
57 |
58 | class Net(nn.Module):
59 | def __init__(self):
60 | super(Net, self).__init__()
61 | self.conv1 = nn.Conv2d(3, 6, 5)
62 | self.pool = nn.MaxPool2d(2,2)
63 | self.conv2 = nn.Conv2d(6, 16, 5)
64 | self.fc1 = nn.Linear(16 * 5 * 5, 120)
65 | self.fc2 = nn.Linear(120, 84)
66 | self.fc3 = nn.Linear(84, 10)
67 |
68 | def forward(self, x):
69 | x = self.pool(F.relu(self.conv1(x)))
70 | x = self.pool(F.relu(self.conv2(x)))
71 | x = x.view(-1, 16 * 5 * 5)
72 | x = F.relu(self.fc1(x))
73 | x = F.relu(self.fc2(x))
74 | x = self.fc3(x)
75 | return x
76 |
77 | net = Net()
78 |
79 | #3. Define a Loss function and optimizer
80 |
81 | #Cross-Entropy and SGD with momentum
82 | import torch.optim as optim
83 |
84 | criterion = nn.CrossEntropyLoss()
85 | optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)
86 |
87 | #4. train the network
88 | net.cuda()
89 |
90 | for epoch in range(1000): # loop over the dataset multiple times
91 |
92 | running_loss = 0.0
93 | for i, data in enumerate(trainloader, 0):
94 | # get the inputs
95 | inputs, labels = data
96 |
97 | # wrap them in Variable
98 | #inputs, labels = Variable(inputs), Variable(labels) #CPU Ver
99 | inputs, labels = Variable(inputs.cuda()), Variable(labels.cuda()) #GPU Ver
100 |
101 | # zero the parameter gradients
102 | optimizer.zero_grad()
103 |
104 | # forward + backward + optimize
105 | outputs = net(inputs)
106 | loss = criterion(outputs, labels)
107 | loss.backward()
108 | optimizer.step()
109 |
110 | # print statistics
111 | running_loss += loss.data[0]
112 | if i % 2000 == 1999: # print every 2000 mini-batches
113 | print('[%d, %5d] loss: %.3f' %
114 | (epoch + 1, i + 1, running_loss / 2000))
115 | running_loss = 0.0
116 |
117 | print('Finished Training')
118 |
119 | #5. Test the network on the test data
120 | dataiter = iter(testloader)
121 | images, labels = dataiter.next()
122 | #print image
123 | imshow(torchvision.utils.make_grid(images))
124 | print('GroundTruth: ', ' '.join('%5s' % classes[labels[j]] for j in range(4)))
125 |
126 | outputs = net(Variable(images))
127 |
128 | _, predicted = torch.max(outputs.data, 1)
129 | print('Predicted: ', ' '.join('%5s' % classes[predicted[j][0]]
130 | for j in range(4)))
131 |
132 | #Performance Test
133 | correct = 0
134 | total = 0
135 | for data in testloader:
136 | images, labels = data
137 | outputs = net(Variable(images))
138 | _, predicted = torch.max(outputs.data, 1)
139 | total += labels.size(0)
140 | correct += (predicted == labels).sum()
141 |
142 | print('Accuracy of the network on the 10000 test images: %d %%' % (
143 | 100 * correct / total))
144 |
145 | #정확히 맞추는 것과 못 맞추는 것 구분
146 | class_correct = list(0. for i in range(10))
147 | class_total = list(0. for i in range(10))
148 | for data in testloader:
149 | images, labels = data
150 | outputs = net(Variable(images))
151 | _, predicted = torch.max(outputs.data, 1)
152 | c = (predicted == labels).squeeze()
153 | for i in range(4):
154 | label = labels[i]
155 | class_correct[label] += c[i]
156 | class_total[label] += 1
157 |
158 | for i in range(10):
159 | print('Accuracy of %5s : %2d %%' % (
160 | classes[i], 100 * class_correct[i] / class_total[i]))
161 |
162 |
163 |
164 |
165 |
--------------------------------------------------------------------------------
/pytorch_basic/pytorch_basic.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # -*- coding: utf-8 -*-
3 | """
4 | Created on Tue Jun 6 16:24:52 2017
5 |
6 | @author: ryan
7 | """
8 |
9 | """Pytorch Intro"""
10 |
11 | import torch
12 |
13 | import os
14 | os.environ["CUDA_VISIBLE_DEVICES"] = "0, 1"
15 |
16 | #GPU
17 | dtype = torch.cuda.FloatTensor
18 |
19 | ##Tensors
20 | x = torch.Tensor(5,3).type(dtype)
21 | x = torch.rand(5,3).type(dtype)
22 | x.size()
23 |
24 | ##Operations
25 | y = torch.rand(5,3).type(dtype)
26 | print(x+y)
27 |
28 | #print(torch.add(x,y))
29 | result = torch.Tensor(5,3).type(dtype)
30 | torch.add(x, y, out=result)
31 | print(result)
32 |
33 | #Indexing
34 | print(x[:, 1])
35 |
36 | ##Numpy Bridge
37 |
38 | #Convert torch Tensor to numpy Array
39 | a = torch.ones(5)
40 | print(a)
41 |
42 | b = a.numpy()
43 | print(b)
44 |
45 | #Convert numpy array to torch
46 | import numpy as np
47 | a = np.ones(5)
48 | b = torch.from_numpy(a)
49 | np.add(a, 1, out=a)
50 | print(a)
51 | print(b)
52 |
53 | #Cuda Tensors
54 | if torch.cuda.is_available():
55 | x = x.cuda()
56 | y = y.cuda()
57 | x + y
58 |
59 |
60 | """ Autograd: Automatic differentiation """
61 |
62 | ##Variable
63 | # If Variable is not a scala, you need to specify arg. for backward()
64 |
65 | import torch
66 | from torch.autograd import Variable
67 |
68 | x = Variable(torch.ones(2,2), requires_grad=True).type(dtype)
69 | y = x + 2
70 | print(y)
71 |
72 | z = y * y * 3
73 | out = z.mean()
74 | print(z, out)
75 |
76 |
77 | ##Gradients
78 | out.backward()
79 |
80 | print(x.grad)
81 |
82 | import time
83 | from datetime import timedelta
84 |
85 | start_time = time.monotonic()
86 | x = torch.randn(3)
87 | x = Variable(x, requires_grad=True)
88 | y = x*2
89 | while y.data.norm() < 1000000:
90 | y = y * 2
91 | end_time = time.monotonic()
92 |
93 | print(timedelta(seconds=end_time - start_time))
94 |
95 | gradients = torch.FloatTensor([0.1, 1.0, 0.0001])
96 | y.backward(gradients)
97 |
98 | print(x.grad)
99 |
100 |
101 |
102 |
--------------------------------------------------------------------------------
/pytorch_basic/pytorch_nlp.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # -*- coding: utf-8 -*-
3 | """
4 | Created on Tue Jun 6 17:37:50 2017
5 |
6 | @author: ryan
7 | """
8 |
9 | import torch
10 | import torch.autograd as autograd
11 | import torch.nn as nn
12 | import torch.nn.functional as F
13 | import torch.optim as optim
14 |
15 | torch.manual_seed(1)
16 |
17 | #Creating Tensors
18 | V_data = [1,2,3]
19 | V = torch.Tensor(V_data)
20 | print(V)
21 |
22 | #Create matrix
23 | M_data = [[1,2,3], [4,5,6]]
24 | M = torch.Tensor(M_data)
25 | print(M)
26 |
27 | # Create 3D tensor of size 2*2*2
28 | T_data = [[[1,2],[3,4]],
29 | [[5,6],[7,8]]]
30 | T = torch.Tensor(T_data)
31 | print(T)
32 |
33 | # Index into V and get a scalar
34 | print(V[0])
35 |
36 | # Index into M and get a vector
37 | print(M[0])
38 |
39 | # Index into T and get a matrix
40 | print(T[0])
41 |
42 | x = torch.randn((3, 4, 5))
43 | print(x)
44 |
45 | ##Operations with Tensors
46 | x = torch.Tensor([1., 2., 3.])
47 | y = torch.Tensor([4., 5., 6.])
48 | z = x + y
49 | print(z)
50 |
51 | ##Concat
52 | # By default, it concatenates along the first axis (concatenates rows)
53 | x_1 = torch.randn(2, 5)
54 | y_1 = torch.randn(3, 5)
55 | z_1 = torch.cat([x_1, y_1])
56 | print(z_1)
57 |
58 | # Concatenate columns:
59 | x_2 = torch.randn(2, 3)
60 | y_2 = torch.randn(2, 5)
61 | # second arg specifies which axis to concat along
62 | z_2 = torch.cat([x_2, y_2], 1)
63 | print(z_2)
64 |
65 | # If your tensors are not compatible, torch will complain. Uncomment to see the error
66 | # torch.cat([x_1, x_2])
67 |
68 |
69 | ##Reshaping Tensors
70 | x = torch.randn(2,3,4)
71 | print(x)
72 | print(x.view(2,12)) #2rows with 12 col.
73 | print(x.view(2,-1)) #Same, If one of the dim. is -1, its size can be inferred
74 |
75 | #Comp. Graphs and Auto Diff: How your data is combeind
76 |
77 | # Variables wrap tensor objects
78 | x = autograd.Variable(torch.Tensor([1., 2., 3]), requires_grad=True)
79 | # You can access the data with the .data attribute
80 | print(x.data)
81 |
82 | # You can also do all the same operations you did with tensors with Variables.
83 | y = autograd.Variable(torch.Tensor([4., 5., 6]), requires_grad=True)
84 | z = x + y
85 | print(z.data)
86 |
87 | # BUT z knows something extra.
88 | #print(z.grad_fn) does not work
89 |
90 | s = z.sum()
91 | print(s)
92 | #print(s.grad_fn) does not work
93 |
94 | s.backward()
95 | print(x.grad)
96 |
97 | ##Sumamry
98 |
99 | x = torch.randn((2,2))
100 | y = torch.randn((2,2))
101 |
102 | z= x + y
103 |
104 | var_x = autograd.Variable(x)
105 | var_y = autograd.Variable(y)
106 |
107 | var_z = var_x + var_y
108 | print(var_z.grad_fn)
109 |
110 | var_z_data = var_z.data # Get the wrapped Tensor object out of var_z...
111 | new_var_z = autograd.Variable(var_z_data)
112 |
113 | print(new_var_z.grad_fn)
114 |
115 |
116 |
117 |
118 |
119 |
120 |
121 |
122 |
123 |
124 |
--------------------------------------------------------------------------------
/pytorch_basic/pytorch_nlp2.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # -*- coding: utf-8 -*-
3 | """
4 | Created on Tue Jun 6 17:37:50 2017
5 |
6 | @author: ryan
7 | """
8 |
9 | import torch
10 | import torch.autograd as autograd
11 | import torch.nn as nn
12 | import torch.nn.functional as F
13 | import torch.optim as optim
14 |
15 | torch.manual_seed(1)
16 |
17 | lin = nn.Linear(5,3)
18 | data = autograd.Variable(torch.randn(2,5))
19 | #차원수 변환
20 | print(lin(data))
21 |
22 | #Non linearity
23 | print(data)
24 | print(F.relu(data))
25 |
26 | # Softmax is also in torch.functional
27 | data = autograd.Variable(torch.randn(5))
28 | print(data)
29 | print(F.softmax(data))
30 | print(F.softmax(data).sum()) # Sums to 1 because it is a distribution!
31 | print(F.log_softmax(data)) # theres also log_softmax
32 |
33 | #BOW 모델 연습
34 |
35 | data = [("me gusta comer en la cafeteria".split(), "SPANISH"),
36 | ("Give it to me".split(), "ENGLISH"),
37 | ("No creo que sea una buena idea".split(), "SPANISH"),
38 | ("No it is not a good idea to get lost at sea".split(), "ENGLISH")]
39 |
40 | test_data = [("Yo creo que si".split(), "SPANISH"),
41 | ("it is lost on me".split(), "ENGLISH")]
42 |
43 | # word_to_ix maps each word in the vocab to a unique integer, which will be its
44 | # index into the Bag of words vector
45 | word_to_ix = {}
46 | for sent, _ in data + test_data:
47 | for word in sent:
48 | if word not in word_to_ix:
49 | word_to_ix[word] = len(word_to_ix)
50 | print(word_to_ix)
51 |
52 | VOCAB_SIZE = len(word_to_ix)
53 | NUM_LABELS = 2
54 |
55 | class BoWClassifier(nn.Module): #inheriting from nn.Module!
56 | def __init__(self, num_labels, vocab_size):
57 | # calls the init function of nn.Module. Dont get confused by syntax,
58 | # just always do it in an nn.Module
59 | super(BoWClassifier, self).__init__()
60 | """
61 | 상속하게 되면 명확히 상속된 클래스 이름을 한정자로 부모 클래스의 속성과
62 | 메소드를 접근 할 수 있지만 super()를 이용하여 부모 클래스를 접근 가능
63 | Super는 하나의 클래스이다.
64 | Super를 지정하고 접근하면 클래스의 속성과 메소드를 접근해서 처리 가능
65 | 주로 오버라이딩을 작성할 때 super를 이용하여 상위 속성이나 메소드를 참조
66 | """
67 |
68 | # Define the parameters that you will need. In this case, we need A and b,
69 | # the parameters of the affine mapping.
70 | # Torch defines nn.Linear(), which provides the affine map.
71 | # Make sure you understand why the input dimension is vocab_size
72 | # and the output is num_labels!
73 | self.linear = nn.Linear(vocab_size, num_labels)
74 |
75 | # NOTE! The non-linearity log softmax does not have parameters! So we don't need
76 | # to worry about that here
77 |
78 |
79 |
80 |
81 |
82 |
83 |
84 |
85 |
86 |
87 |
--------------------------------------------------------------------------------
/pytorch_basic/pytorch_nlp3.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # -*- coding: utf-8 -*-
3 | """
4 | Created on Sat Jun 10 11:48:58 2017
5 |
6 | @author: ryan
7 | http://pytorch.org/tutorials/beginner/nlp/word_embeddings_tutorial.html
8 |
9 | """
10 |
11 | import torch
12 | import torch.autograd as autograd
13 | import torch.nn as nn
14 | import torch.nn.functional as F
15 | import torch.optim as optim
16 |
17 | torch.manual_seed(1)
18 |
19 | word_to_ix = {"안녕": 0, "반가워": 1}
20 | embeds = nn.Embedding(2, 5) # 2 words in vocab, 5 dimensional embeddings
21 | lookup_tensor = torch.LongTensor([word_to_ix["안녕"]])
22 | hello_embed = embeds(autograd.Variable(lookup_tensor))
23 | print(hello_embed)
24 |
25 | ##
26 | CONTEXT_SIZE = 2
27 | EMBEDDING_DIM = 10
28 |
29 | # We will use Shakespeare Sonnet 2
30 | test_sentence = """미국 로스앤젤레스에 사는 배우 척 매카시는 사람들과 산책을 해주고 돈을 번다. 지난해 그가 시작한 '친구 대여(Rent-a-Friend)'는 새로운 형태의 비즈니스다. 매카시는 일감이 많지 않은 무명 배우였지만 이 부업은 조수들을 고용해야 할 만큼 번창하고 있다. 다른 도시와 외국에서도 '출장 산책' 주문이 쇄도한다.
31 |
32 | 매카시는 집 근처 공원과 거리를 고객과 함께 걸으면서 이야기를 나누는 대가로 1마일(1.6㎞)에 7달러를 받는다. 사회적 관계를 구매 가능한 상품으로 포장한 셈이다. 이름 붙이자면 '고독 비즈니스'다. 그는 영국 일간지 가디언과의 인터뷰에서 "혼자 산책하기 두렵거나 친구 없는 사람으로 비칠까봐 걱정하는 사람이 많았다"며 "자기 이야기를 누가 들어준다는 데 기뻐하며 다시 나를 찾는다"고 했다.
33 |
34 | 20~30대에서는 미혼과 만혼(晩婚), 40대 이후로는 이혼과 고령화 등으로 1인 가구가 빠르게 늘어가는 한국 사회에서 고독은 강 건너 불구경이 아니다. 우리는 페이스북·트위터·인스타그램 같은 소셜미디어로 긴밀하게 연결돼 있지만 관계의 응집력은 어느 때보다 느슨하다. '혼밥' '혼술' '혼영(나 홀로 영화)' '혼행(나 홀로 여행)' 같은 소비 패턴이 방증한다. 외로움을 감추기보다 즐기려는 경향도 나타난다. Why?는 예스24에 의뢰해 지난 1~5일 설문조사를 했다. 5864명(여성 4398명)이 응답했다. 고독을 바라보는 한국인의 태도가 드러났다.
35 | """.split()
36 | # we should tokenize the input, but we will ignore that for now
37 | # build a list of tuples. Each tuple is ([ word_i-2, word_i-1 ], target word)
38 |
39 | trigrams = [([test_sentence[i], test_sentence[i + 1]], test_sentence[i + 2])
40 | for i in range(len(test_sentence) -2)]
41 |
42 | #중복 단어 제외 및 일반 단어 넣어 주기
43 | vocab = set(test_sentence)
44 | word_to_ix = {word: i for i , word in enumerate(vocab)}
45 |
46 | #https://wikidocs.net/28
47 |
48 | class NGramLanguageModeler(nn.Module):
49 | def __init__(self, vocab_size, embedding_dim, context_size):
50 | super(NGramLanguageModeler, self).__init__()
51 | self.embeddings = nn.Embedding(vocab_size, embedding_dim)
52 | self.linear1 = nn.Linear(context_size * embedding_dim, 128)
53 | self.linear2 = nn.Linear(128, vocab_size)
54 |
55 | def forward(self, inputs):
56 | embeds = self.embeddings(inputs).view((1,-1))
57 | out = F.relu(self.linear1(embeds))
58 | out = self.linear2(out)
59 | log_probs = F.log_softmax(out)
60 | return log_probs
61 |
62 | losses = []
63 | loss_function = nn.NLLLoss()
64 | model = NGramLanguageModeler(len(vocab), EMBEDDING_DIM, CONTEXT_SIZE)
65 | optimizer = optim.SGD(model.parameters(), lr = 0.001)
66 |
67 | for epoch in range(100):
68 | total_loss = torch.Tensor([0])
69 | for context, target in trigrams:
70 |
71 | #Step1: 입력전처리 (integer indices(색인) 와 변수로 변환)
72 | context_idxs = [word_to_ix[w] for w in context]
73 | context_var = autograd.Variable(torch.LongTensor(context_idxs))
74 |
75 | #Step2: torch는 gradients를 accumlates한다. 새로운 instances를 넘기기 전에,
76 | #모든 그레디언트를 오래된 instnaces로 부터 zero out 해야함
77 | model.zero_grad()
78 |
79 | #Step3: 전진 학습을 하며, 다음 단어에 대한 log prob.얻기
80 | log_probs = model(context_var)
81 |
82 | #Step4: log function 사용하기
83 | loss = loss_function(log_probs, autograd.Variable(
84 | torch.LongTensor([word_to_ix[target]])))
85 |
86 | #Step5: 백프로게이션 실행 후 그레디언트 수치 업데이트
87 | loss.backward()
88 | optimizer.step()
89 |
90 | total_loss += loss.data
91 | losses.append(total_loss)
92 | print(losses)
93 |
94 |
95 | """Exercise: CBow"""
96 | #.view() check
97 | CONTEXT_SIZE = 2 # 2 words to the left, 2 to the right
98 | raw_text = """We are about to study the idea of a computational process.
99 | Computational processes are abstract beings that inhabit computers.
100 | As they evolve, processes manipulate other abstract things called data.
101 | The evolution of a process is directed by a pattern of rules
102 | called a program. People create programs to direct processes. In effect,
103 | we conjure the spirits of the computer with our spells.""".split()
104 |
105 | # By deriving a set from `raw_text`, we deduplicate the array
106 | vocab = set(raw_text)
107 | vocab_size = len(vocab)
108 |
109 | word_to_ix = {word: i for i, word in enumerate(vocab)}
110 | data = []
111 |
112 | for i in range(2, len(raw_text) - 2):
113 | context = [raw_text[i - 2], raw_text[i - 1],
114 | raw_text[i + 1], raw_text[i + 2]]
115 | target = raw_text[i]
116 | data.append((context, target))
117 |
118 | class CBOW(nn.Module):
119 |
120 | def __init__(self):
121 | pass
122 |
123 | def forward(self, inputs):
124 | pass
125 |
126 | def make_context_vector(context, word_to_ix):
127 | idxs = [word_to_ix[w] for w in context]
128 | tensor = torch.LongTensor(idxs)
129 | return autograd.Variable(tensor)
130 |
131 | make_context_vector(data[0][0], word_to_ix)
132 |
133 |
134 |
135 |
136 |
137 |
138 |
--------------------------------------------------------------------------------
/pytorch_basic/pytorch_seq2seq(LSTM).py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # -*- coding: utf-8 -*-
3 | """
4 | Created on Sat Jun 10 18:06:39 2017
5 | # Author: Robert Guthrie
6 | http://pytorch.org/tutorials/beginner/nlp/sequence_models_tutorial.html
7 | """
8 |
9 | import torch
10 | import torch.autograd as autograd
11 | import torch.nn as nn
12 | import torch.nn.functional as F
13 | import torch.optim as optim
14 |
15 | torch.manual_seed(1)
16 |
17 | lstm = nn.LSTM(3,3) #Input dim, output dim (3,3)
18 | inputs = [autograd.Variable(torch.randn((1, 3)))
19 | for _ in range(5)] # make a sequence of length 5
20 |
21 | #hidden state 초기화
22 | hidden = (autograd.Variable(torch.randn(1,1,3)),
23 | autograd.Variable(torch.randn(1,1,3)))
24 |
25 | for i in inputs:
26 | # Step through the sequence one elements at a time.
27 | # after each step, hidden contains the hidden state
28 | out, hidden = lstm(i.view(1,1,-1), hidden)
29 |
30 | # 전체 seq.를 한번에 진행이 가능하다.
31 | # LSTM에서 받은 첫번째 값은
32 | # 두번째는 가장 최근의 hidden state이다.
33 | # 그 이유는, "out"은 모든 hidden state 차례대로 접근 할 수 있고,
34 | # "hidden"은 seq를 진행하며 backprop을 하게 해주기 때문이다.
35 | inputs = torch.cat(inputs).view(len(inputs), 1, -1)
36 | hidden = (autograd.Variable(torch.randn(1, 1, 3)), autograd.Variable(
37 | torch.randn(1,1,3)))
38 | out. hidden = lstm(inputs, hidden)
39 | print(out)
40 | print(hidden)
41 |
42 | """LSTM for POS Tagging
43 |
44 | """
45 |
46 | def prepare_sequence(seq, to_ix):
47 | idxs = [to_ix[w] for w in seq]
48 | tensor = torch.LongTensor(idxs)
49 | return autograd.Variable(tensor)
50 |
51 | training_data = [
52 | ("The dog ate the apple".split(), ["DET", "NN", "V", "DET", "NN"]),
53 | ("Everybody read that book".split(), ["NN", "V", "DET", "NN"])
54 | ]
55 |
56 | word_to_ix = {}
57 | for sent, tags in training_data:
58 | for word in sent:
59 | if word not in word_to_ix:
60 | word_to_ix[word] = len(word_to_ix)
61 |
62 | tag_to_ix = {"DET": 0, "NN": 1, "V": 2}
63 |
64 | #일반적으로 약 32~64 차원이지만, 값을 적게하여 학습이 진행 되면 값이 어떻게 보내는지 체크
65 | EMBEDDING_DIM = 6
66 | HIDDEN_DIM = 6
67 |
68 | #Create the Model
69 | class LSTMTagger(nn.Module):
70 |
71 | def __init__(self, embedding_dim, hidden_dim, vocab_size, tagset_size):
72 | super(LSTMTagger, self).__init__()
73 | self.hidden_dim = hidden_dim
74 |
75 | self.word_embeddings = nn.Embedding(vocab_size, embedding_dim)
76 |
77 | #LSTM -> input: word embeddings / output: hidden state / dim: hidden_dim
78 | self.lstm = nn.LSTM(embedding_dim, hidden_dim)
79 |
80 | #linear layer는 hidden에서 tag공간으로 변경
81 | self.hidden2tag = nn.Linear(hidden_dim, tagset_size)
82 | self.hidden = self.init_hidden()
83 |
84 | def init_hidden(self):
85 | # The axes semantics are (num_layers, mini_batch_size, hidden_dim)
86 | return (autograd.Variable(torch.zeros(1,1, self.hidden_dim)),
87 | autograd.Variable(torch.zeros(1,1, self.hidden_dim)))
88 |
89 | def forward(self, sentence):
90 | embeds = self.word_embeddings(sentence)
91 | lstm_out, self.hidden = self.lstm(
92 | embeds.view(len(sentence), 1, -1), self.hidden)
93 | tag_space = self.hidden2tag(lstm_out.view(len(sentence), -1))
94 | tag_scores = F.log_softmax(tag_space)
95 | tag_scores = F.log_softmax(tag_space)
96 | return tag_scores
97 |
98 | #Training Model
99 | model = LSTMTagger(EMBEDDING_DIM, HIDDEN_DIM, len(word_to_ix), len(tag_to_ix))
100 | loss_function = nn.NLLLoss()
101 | optimizer = optim.SGD(model.parameters(), lr=0.1)
102 |
103 | #학습 전에 성능을 확인해보자 - i: word / j: tag
104 | inputs = prepare_sequence(training_data[0][0], word_to_ix)
105 | tag_scores = model(inputs)
106 | print(tag_scores)
107 |
108 | for epoch in range(300): #toy data이기 때문에 300번만 하는 것, 원래는 그 이상
109 | for sentence, tags in training_data:
110 | #Step1: Pytorch는 gradient를 중첩하는 방식이므로, 각각의 instance들을 명확히 해주는 작업이 필요.
111 | model.zero_grad()
112 |
113 | #또한, hidden state LSTM을 명확히 해주는 것이 필요
114 | #지난 history를 보유하고 있는 instance를 떼어 정보를 공유
115 | model.hidden = model.init_hidden()
116 |
117 | #Step2: input에서 단어의 index형태로 변환시키는 작업
118 | sentence_in = prepare_sequence(sentence, word_to_ix)
119 | targets = prepare_sequence(tags, tag_to_ix)
120 |
121 | #Step3: Run our forward pass.
122 | tag_scores = model(sentence_in)
123 |
124 | #Step4: Compare the loss, gradients, and update the param. by calling optimizer.step()
125 | loss = loss_function(tag_scores, targets)
126 | loss.backward()
127 | optimizer.step()
128 |
129 | #학습 후 점수 확인하기
130 | inputs = prepare_sequence(training_data[0][0], word_to_ix)
131 | tag_scores = model(inputs)
132 | print(tag_scores)
133 | #결과 값을 보면, 예측한 seq는 0 1 2 0 1 (가장 높은 수) 이다.
134 | #문장은 "the dog ate the apple."
135 | #확인해보면, DET, NOUN, VERB, DET, NOUN 이므로 정확한 문장
136 |
137 |
138 |
139 |
--------------------------------------------------------------------------------
/pytorch_basic/sec 6. Linear regression wih Python.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 4,
6 | "metadata": {
7 | "collapsed": false
8 | },
9 | "outputs": [
10 | {
11 | "ename": "NameError",
12 | "evalue": "name 'n' is not defined",
13 | "output_type": "error",
14 | "traceback": [
15 | "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
16 | "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)",
17 | "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 2\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrandom\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mseed\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 4\u001b[0;31m \u001b[0mx\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrandom\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrand\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mn\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 5\u001b[0m \u001b[0my\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mx\u001b[0m \u001b[0;34m**\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrandom\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrand\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mn\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 6\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
18 | "\u001b[0;31mNameError\u001b[0m: name 'n' is not defined"
19 | ]
20 | }
21 | ],
22 | "source": [
23 | "import numpy as np\n",
24 | "np.random.seed(1)\n",
25 | "\n",
26 | "x = np.random.rand(n)\n",
27 | "y = x ** np.random.rand(n)\n",
28 | "\n",
29 | "colors = np.random.rand()\n",
30 | "plt.plot(np.unique(x), np.poly1d(np.ployfit(x,y,1))(np.unique(x)))\n",
31 | "\n",
32 | "plt.scatter(x,y, colors, alpha=0.5)\n",
33 | "plt.show()"
34 | ]
35 | },
36 | {
37 | "cell_type": "code",
38 | "execution_count": 5,
39 | "metadata": {
40 | "collapsed": true
41 | },
42 | "outputs": [],
43 | "source": [
44 | "# Linear Regression model by pytorch"
45 | ]
46 | }
47 | ],
48 | "metadata": {
49 | "anaconda-cloud": {},
50 | "kernelspec": {
51 | "display_name": "Python [conda root]",
52 | "language": "python",
53 | "name": "conda-root-py"
54 | },
55 | "language_info": {
56 | "codemirror_mode": {
57 | "name": "ipython",
58 | "version": 3
59 | },
60 | "file_extension": ".py",
61 | "mimetype": "text/x-python",
62 | "name": "python",
63 | "nbconvert_exporter": "python",
64 | "pygments_lexer": "ipython3",
65 | "version": "3.5.2"
66 | }
67 | },
68 | "nbformat": 4,
69 | "nbformat_minor": 1
70 | }
71 |
--------------------------------------------------------------------------------
/pytorch_basic/text_loader.py:
--------------------------------------------------------------------------------
1 | # References
2 | # https://github.com/yunjey/pytorch-tutorial/blob/master/tutorials/01-basics/pytorch_basics/main.py
3 | # http://pytorch.org/tutorials/beginner/data_loading_tutorial.html#dataset-class
4 | import gzip
5 | from torch.utils.data import Dataset, DataLoader
6 |
7 |
8 | class TextDataset(Dataset):
9 | # Initialize your data, download, etc.
10 |
11 | def __init__(self, filename="./data/shakespeare.txt.gz"):
12 | self.len = 0
13 | with gzip.open(filename, 'rt') as f:
14 | self.targetLines = [x.strip() for x in f if x.strip()]
15 | self.srcLines = [x.lower().replace(' ', '')
16 | for x in self.targetLines]
17 | self.len = len(self.srcLines)
18 |
19 | def __getitem__(self, index):
20 | return self.srcLines[index], self.targetLines[index]
21 |
22 | def __len__(self):
23 | return self.len
24 |
25 |
26 | # Test the loader
27 | if __name__ == "__main__":
28 | dataset = TextDataset()
29 | train_loader = DataLoader(dataset=dataset,
30 | batch_size=3,
31 | shuffle=True,
32 | num_workers=2)
33 |
34 | for i, (src, target) in enumerate(train_loader):
35 | print(i, "data", src)
--------------------------------------------------------------------------------