├── Pre-trained GloVe.ipynb
├── Pre-trained word-vectors from gensim-data.ipynb
├── README.md
└── data
    ├── r8-test-all-terms.txt
    ├── r8-train-all-terms.txt
    └── readme


/Pre-trained GloVe.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 2,
  6 |    "metadata": {},
  7 |    "outputs": [
  8 |     {
  9 |      "name": "stdout",
 10 |      "output_type": "stream",
 11 |      "text": [
 12 |       "Loading word vectors...\n",
 13 |       "Found 400000 word vectors.\n",
 14 |       "king - man = queen - woman\n",
 15 |       "france - paris = britain - london\n",
 16 |       "france - paris = italy - rome\n",
 17 |       "paris - france = rome - italy\n",
 18 |       "france - french = england - english\n",
 19 |       "japan - japanese = china - chinese\n",
 20 |       "japan - japanese = italy - italian\n",
 21 |       "japan - japanese = australia - australian\n",
 22 |       "december - november = july - june\n",
 23 |       "miami - florida = houston - texas\n",
 24 |       "einstein - scientist = matisse - painter\n",
 25 |       "china - rice = chinese - bread\n",
 26 |       "man - woman = he - she\n",
 27 |       "man - woman = uncle - aunt\n",
 28 |       "man - woman = brother - sister\n",
 29 |       "man - woman = friend - wife\n",
 30 |       "man - woman = actor - actress\n",
 31 |       "man - woman = father - mother\n",
 32 |       "heir - heiress = queen - princess\n",
 33 |       "nephew - niece = uncle - aunt\n",
 34 |       "france - paris = japan - tokyo\n",
 35 |       "france - paris = china - beijing\n",
 36 |       "february - january = october - november\n",
 37 |       "france - paris = italy - rome\n",
 38 |       "paris - france = rome - italy\n"
 39 |      ]
 40 |     }
 41 |    ],
 42 |    "source": [
 43 |     "from __future__ import print_function, division\n",
 44 |     "from future.utils import iteritems\n",
 45 |     "from builtins import range\n",
 46 |     "\n",
 47 |     "import numpy as np\n",
 48 |     "from sklearn.metrics.pairwise import pairwise_distances\n",
 49 |     "\n",
 50 |     "\n",
 51 |     "def dist1(a, b):\n",
 52 |     "    return np.linalg.norm(a - b)\n",
 53 |     "def dist2(a, b):\n",
 54 |     "    return 1 - a.dot(b) / (np.linalg.norm(a) * np.linalg.norm(b))\n",
 55 |     "\n",
 56 |     "# pick a distance type\n",
 57 |     "dist, metric = dist2, 'cosine'\n",
 58 |     "# dist, metric = dist1, 'euclidean'\n",
 59 |     "\n",
 60 |     "## faster\n",
 61 |     "def find_analogies(w1, w2, w3):\n",
 62 |     "  for w in (w1, w2, w3):\n",
 63 |     "    if w not in word2vec:\n",
 64 |     "      print(\"%s not in dictionary\" % w)\n",
 65 |     "      return\n",
 66 |     "\n",
 67 |     "  king = word2vec[w1]\n",
 68 |     "  man = word2vec[w2]\n",
 69 |     "  woman = word2vec[w3]\n",
 70 |     "  v0 = king - man + woman\n",
 71 |     "\n",
 72 |     "  distances = pairwise_distances(v0.reshape(1, D), embedding, metric=metric).reshape(V)\n",
 73 |     "  idxs = distances.argsort()[:4]\n",
 74 |     "  for idx in idxs:\n",
 75 |     "    word = idx2word[idx]\n",
 76 |     "    if word not in (w1, w2, w3): \n",
 77 |     "      best_word = word\n",
 78 |     "      break\n",
 79 |     "\n",
 80 |     "  print(w1, \"-\", w2, \"=\", best_word, \"-\", w3)\n",
 81 |     "\n",
 82 |     "\n",
 83 |     "def nearest_neighbors(w, n=5):\n",
 84 |     "  if w not in word2vec:\n",
 85 |     "    print(\"%s not in dictionary:\" % w)\n",
 86 |     "    return\n",
 87 |     "\n",
 88 |     "  v = word2vec[w]\n",
 89 |     "  distances = pairwise_distances(v.reshape(1, D), embedding, metric=metric).reshape(V)\n",
 90 |     "  idxs = distances.argsort()[1:n+1]\n",
 91 |     "  print(\"neighbors of: %s\" % w)\n",
 92 |     "  for idx in idxs:\n",
 93 |     "    print(\"\\t%s\" % idx2word[idx])\n",
 94 |     "\n",
 95 |     "\n",
 96 |     "\n",
 97 |     "# load in pre-trained word vectors\n",
 98 |     "print('Loading word vectors...')\n",
 99 |     "word2vec = {}\n",
100 |     "embedding = []\n",
101 |     "idx2word = []\n",
102 |     "with open('glove/glove.6B.50d.txt', encoding='utf-8') as f:\n",
103 |     "  # is just a space-separated text file in the format:\n",
104 |     "  # word vec[0] vec[1] vec[2] ...\n",
105 |     "  for line in f:\n",
106 |     "    values = line.split()\n",
107 |     "    word = values[0]\n",
108 |     "    vec = np.asarray(values[1:], dtype='float32')\n",
109 |     "    word2vec[word] = vec\n",
110 |     "    embedding.append(vec)\n",
111 |     "    idx2word.append(word)\n",
112 |     "print('Found %s word vectors.' % len(word2vec))\n",
113 |     "embedding = np.array(embedding)\n",
114 |     "V, D = embedding.shape\n",
115 |     "\n",
116 |     "\n",
117 |     "find_analogies('king', 'man', 'woman')\n",
118 |     "find_analogies('france', 'paris', 'london')\n",
119 |     "find_analogies('france', 'paris', 'rome')\n",
120 |     "find_analogies('paris', 'france', 'italy')\n",
121 |     "find_analogies('france', 'french', 'english')\n",
122 |     "find_analogies('japan', 'japanese', 'chinese')\n",
123 |     "find_analogies('japan', 'japanese', 'italian')\n",
124 |     "find_analogies('japan', 'japanese', 'australian')\n",
125 |     "find_analogies('december', 'november', 'june')\n",
126 |     "find_analogies('miami', 'florida', 'texas')\n",
127 |     "find_analogies('einstein', 'scientist', 'painter')\n",
128 |     "find_analogies('china', 'rice', 'bread')\n",
129 |     "find_analogies('man', 'woman', 'she')\n",
130 |     "find_analogies('man', 'woman', 'aunt')\n",
131 |     "find_analogies('man', 'woman', 'sister')\n",
132 |     "find_analogies('man', 'woman', 'wife')\n",
133 |     "find_analogies('man', 'woman', 'actress')\n",
134 |     "find_analogies('man', 'woman', 'mother')\n",
135 |     "find_analogies('heir', 'heiress', 'princess')\n",
136 |     "find_analogies('nephew', 'niece', 'aunt')\n",
137 |     "find_analogies('france', 'paris', 'tokyo')\n",
138 |     "find_analogies('france', 'paris', 'beijing')\n",
139 |     "find_analogies('february', 'january', 'november')\n",
140 |     "find_analogies('france', 'paris', 'rome')\n",
141 |     "find_analogies('paris', 'france', 'italy')"
142 |    ]
143 |   },
144 |   {
145 |    "cell_type": "code",
146 |    "execution_count": 3,
147 |    "metadata": {},
148 |    "outputs": [
149 |     {
150 |      "name": "stdout",
151 |      "output_type": "stream",
152 |      "text": [
153 |       "neighbors of: king\n",
154 |       "\tprince\n",
155 |       "\tqueen\n",
156 |       "\tii\n",
157 |       "\temperor\n",
158 |       "\tson\n",
159 |       "neighbors of: france\n",
160 |       "\tfrench\n",
161 |       "\tbelgium\n",
162 |       "\tparis\n",
163 |       "\tspain\n",
164 |       "\tnetherlands\n",
165 |       "neighbors of: japan\n",
166 |       "\tjapanese\n",
167 |       "\tchina\n",
168 |       "\tkorea\n",
169 |       "\ttokyo\n",
170 |       "\ttaiwan\n",
171 |       "neighbors of: einstein\n",
172 |       "\trelativity\n",
173 |       "\tbohr\n",
174 |       "\tphysics\n",
175 |       "\theisenberg\n",
176 |       "\tfreud\n",
177 |       "neighbors of: woman\n",
178 |       "\tgirl\n",
179 |       "\tman\n",
180 |       "\tmother\n",
181 |       "\ther\n",
182 |       "\tboy\n",
183 |       "neighbors of: nephew\n",
184 |       "\tcousin\n",
185 |       "\tbrother\n",
186 |       "\tgrandson\n",
187 |       "\tson\n",
188 |       "\tuncle\n",
189 |       "neighbors of: february\n",
190 |       "\toctober\n",
191 |       "\tdecember\n",
192 |       "\tjanuary\n",
193 |       "\taugust\n",
194 |       "\tseptember\n",
195 |       "neighbors of: rome\n",
196 |       "\tnaples\n",
197 |       "\tvenice\n",
198 |       "\titaly\n",
199 |       "\tturin\n",
200 |       "\tpope\n"
201 |      ]
202 |     }
203 |    ],
204 |    "source": [
205 |     "nearest_neighbors('king')\n",
206 |     "nearest_neighbors('france')\n",
207 |     "nearest_neighbors('japan')\n",
208 |     "nearest_neighbors('einstein')\n",
209 |     "nearest_neighbors('woman')\n",
210 |     "nearest_neighbors('nephew')\n",
211 |     "nearest_neighbors('february')\n",
212 |     "nearest_neighbors('rome')"
213 |    ]
214 |   },
215 |   {
216 |    "cell_type": "code",
217 |    "execution_count": null,
218 |    "metadata": {},
219 |    "outputs": [],
220 |    "source": []
221 |   }
222 |  ],
223 |  "metadata": {
224 |   "kernelspec": {
225 |    "display_name": "Python 3",
226 |    "language": "python",
227 |    "name": "python3"
228 |   },
229 |   "language_info": {
230 |    "codemirror_mode": {
231 |     "name": "ipython",
232 |     "version": 3
233 |    },
234 |    "file_extension": ".py",
235 |    "mimetype": "text/x-python",
236 |    "name": "python",
237 |    "nbconvert_exporter": "python",
238 |    "pygments_lexer": "ipython3",
239 |    "version": "3.6.7"
240 |   }
241 |  },
242 |  "nbformat": 4,
243 |  "nbformat_minor": 2
244 | }
245 | 


--------------------------------------------------------------------------------
/Pre-trained word-vectors from gensim-data.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 16,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "import gensim.downloader as api\n",
 10 |     "\n",
 11 |     "word_vectors = api.load(\"glove-wiki-gigaword-100\")"
 12 |    ]
 13 |   },
 14 |   {
 15 |    "cell_type": "code",
 16 |    "execution_count": 17,
 17 |    "metadata": {},
 18 |    "outputs": [],
 19 |    "source": [
 20 |     "def find_analogies(w1, w2, w3):\n",
 21 |     "  r = word_vectors.most_similar(positive=[w1, w3], negative=[w2])\n",
 22 |     "  print(\"%s - %s = %s - %s\" % (w1, w2, r[0][0], w3))"
 23 |    ]
 24 |   },
 25 |   {
 26 |    "cell_type": "code",
 27 |    "execution_count": 18,
 28 |    "metadata": {},
 29 |    "outputs": [],
 30 |    "source": [
 31 |     "def nearest_neighbors(w):\n",
 32 |     "  r = word_vectors.most_similar(positive=[w])\n",
 33 |     "  print(\"neighbors of: %s\" % w)\n",
 34 |     "  for word, score in r:\n",
 35 |     "    print(\"\\t%s\" % word)"
 36 |    ]
 37 |   },
 38 |   {
 39 |    "cell_type": "code",
 40 |    "execution_count": 19,
 41 |    "metadata": {},
 42 |    "outputs": [
 43 |     {
 44 |      "name": "stdout",
 45 |      "output_type": "stream",
 46 |      "text": [
 47 |       "king - man = queen - woman\n",
 48 |       "france - paris = britain - london\n",
 49 |       "france - paris = italy - rome\n",
 50 |       "paris - france = rome - italy\n",
 51 |       "france - french = england - english\n",
 52 |       "japan - japanese = china - chinese\n",
 53 |       "japan - japanese = italy - italian\n",
 54 |       "japan - japanese = australia - australian\n",
 55 |       "december - november = july - june\n",
 56 |       "miami - florida = dallas - texas\n",
 57 |       "einstein - scientist = picasso - painter\n",
 58 |       "china - rice = chinese - bread\n",
 59 |       "man - woman = he - she\n",
 60 |       "man - woman = uncle - aunt\n",
 61 |       "man - woman = brother - sister\n",
 62 |       "man - woman = brother - wife\n",
 63 |       "man - woman = actor - actress\n",
 64 |       "man - woman = father - mother\n",
 65 |       "heir - heiress = prince - princess\n",
 66 |       "nephew - niece = uncle - aunt\n",
 67 |       "france - paris = japan - tokyo\n",
 68 |       "france - paris = china - beijing\n",
 69 |       "february - january = october - november\n",
 70 |       "france - paris = italy - rome\n",
 71 |       "paris - france = rome - italy\n"
 72 |      ]
 73 |     }
 74 |    ],
 75 |    "source": [
 76 |     "find_analogies('king', 'man', 'woman')\n",
 77 |     "find_analogies('france', 'paris', 'london')\n",
 78 |     "find_analogies('france', 'paris', 'rome')\n",
 79 |     "find_analogies('paris', 'france', 'italy')\n",
 80 |     "find_analogies('france', 'french', 'english')\n",
 81 |     "find_analogies('japan', 'japanese', 'chinese')\n",
 82 |     "find_analogies('japan', 'japanese', 'italian')\n",
 83 |     "find_analogies('japan', 'japanese', 'australian')\n",
 84 |     "find_analogies('december', 'november', 'june')\n",
 85 |     "find_analogies('miami', 'florida', 'texas')\n",
 86 |     "find_analogies('einstein', 'scientist', 'painter')\n",
 87 |     "find_analogies('china', 'rice', 'bread')\n",
 88 |     "find_analogies('man', 'woman', 'she')\n",
 89 |     "find_analogies('man', 'woman', 'aunt')\n",
 90 |     "find_analogies('man', 'woman', 'sister')\n",
 91 |     "find_analogies('man', 'woman', 'wife')\n",
 92 |     "find_analogies('man', 'woman', 'actress')\n",
 93 |     "find_analogies('man', 'woman', 'mother')\n",
 94 |     "find_analogies('heir', 'heiress', 'princess')\n",
 95 |     "find_analogies('nephew', 'niece', 'aunt')\n",
 96 |     "find_analogies('france', 'paris', 'tokyo')\n",
 97 |     "find_analogies('france', 'paris', 'beijing')\n",
 98 |     "find_analogies('february', 'january', 'november')\n",
 99 |     "find_analogies('france', 'paris', 'rome')\n",
100 |     "find_analogies('paris', 'france', 'italy')"
101 |    ]
102 |   },
103 |   {
104 |    "cell_type": "code",
105 |    "execution_count": 20,
106 |    "metadata": {},
107 |    "outputs": [
108 |     {
109 |      "name": "stdout",
110 |      "output_type": "stream",
111 |      "text": [
112 |       "neighbors of: king\n",
113 |       "\tprince\n",
114 |       "\tqueen\n",
115 |       "\tson\n",
116 |       "\tbrother\n",
117 |       "\tmonarch\n",
118 |       "\tthrone\n",
119 |       "\tkingdom\n",
120 |       "\tfather\n",
121 |       "\temperor\n",
122 |       "\tii\n",
123 |       "neighbors of: france\n",
124 |       "\tbelgium\n",
125 |       "\tfrench\n",
126 |       "\tbritain\n",
127 |       "\tspain\n",
128 |       "\tparis\n",
129 |       "\tgermany\n",
130 |       "\titaly\n",
131 |       "\teurope\n",
132 |       "\tnetherlands\n",
133 |       "\tluxembourg\n",
134 |       "neighbors of: japan\n",
135 |       "\tjapanese\n",
136 |       "\tkorea\n",
137 |       "\ttokyo\n",
138 |       "\tchina\n",
139 |       "\tindonesia\n",
140 |       "\trussia\n",
141 |       "\tasia\n",
142 |       "\tthailand\n",
143 |       "\ttaiwan\n",
144 |       "\tgermany\n",
145 |       "neighbors of: einstein\n",
146 |       "\trelativity\n",
147 |       "\tfreud\n",
148 |       "\tphysics\n",
149 |       "\tbohr\n",
150 |       "\ttheory\n",
151 |       "\tquantum\n",
152 |       "\tsigmund\n",
153 |       "\ttheories\n",
154 |       "\tcosmological\n",
155 |       "\tequations\n",
156 |       "neighbors of: woman\n",
157 |       "\tgirl\n",
158 |       "\tman\n",
159 |       "\tmother\n",
160 |       "\tboy\n",
161 |       "\tshe\n",
162 |       "\tchild\n",
163 |       "\twife\n",
164 |       "\ther\n",
165 |       "\therself\n",
166 |       "\tdaughter\n",
167 |       "neighbors of: nephew\n",
168 |       "\tcousin\n",
169 |       "\tgrandson\n",
170 |       "\tbrother\n",
171 |       "\tson\n",
172 |       "\tuncle\n",
173 |       "\teldest\n",
174 |       "\tbrother-in-law\n",
175 |       "\tniece\n",
176 |       "\tfather\n",
177 |       "\tgrandfather\n",
178 |       "neighbors of: february\n",
179 |       "\toctober\n",
180 |       "\tdecember\n",
181 |       "\tseptember\n",
182 |       "\tjanuary\n",
183 |       "\taugust\n",
184 |       "\tnovember\n",
185 |       "\tjune\n",
186 |       "\tapril\n",
187 |       "\tjuly\n",
188 |       "\tmarch\n",
189 |       "neighbors of: rome\n",
190 |       "\tnaples\n",
191 |       "\tparis\n",
192 |       "\tvenice\n",
193 |       "\troman\n",
194 |       "\tvienna\n",
195 |       "\titaly\n",
196 |       "\tturin\n",
197 |       "\tmilan\n",
198 |       "\tpope\n",
199 |       "\tvatican\n"
200 |      ]
201 |     }
202 |    ],
203 |    "source": [
204 |     "nearest_neighbors('king')\n",
205 |     "nearest_neighbors('france')\n",
206 |     "nearest_neighbors('japan')\n",
207 |     "nearest_neighbors('einstein')\n",
208 |     "nearest_neighbors('woman')\n",
209 |     "nearest_neighbors('nephew')\n",
210 |     "nearest_neighbors('february')\n",
211 |     "nearest_neighbors('rome')"
212 |    ]
213 |   },
214 |   {
215 |    "cell_type": "code",
216 |    "execution_count": null,
217 |    "metadata": {},
218 |    "outputs": [],
219 |    "source": []
220 |   }
221 |  ],
222 |  "metadata": {
223 |   "kernelspec": {
224 |    "display_name": "Python 3",
225 |    "language": "python",
226 |    "name": "python3"
227 |   },
228 |   "language_info": {
229 |    "codemirror_mode": {
230 |     "name": "ipython",
231 |     "version": 3
232 |    },
233 |    "file_extension": ".py",
234 |    "mimetype": "text/x-python",
235 |    "name": "python",
236 |    "nbconvert_exporter": "python",
237 |    "pygments_lexer": "ipython3",
238 |    "version": "3.6.7"
239 |   }
240 |  },
241 |  "nbformat": 4,
242 |  "nbformat_minor": 2
243 | }
244 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Natural-Language-Processing-with-Deep-Learning-in-Python-
2 | The repository for the course in Udemy
3 | 


--------------------------------------------------------------------------------
/data/readme:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------