├── .gitignore ├── LICENSE ├── README.md ├── clustering ├── K-nearest-neighbors │ ├── knn-iris.ipynb │ └── knn-mushroom.ipynb ├── dbcan │ └── dbscan-iris.ipynb └── k-mean │ ├── customer.ipynb │ ├── pp-centroids.gif │ └── random-centroids.gif ├── decomposition ├── latent-dirichlet-allocation │ ├── similarity.ipynb │ ├── summary.ipynb │ └── topic-modelling.ipynb ├── latent-semantic-analysis │ ├── similarity.ipynb │ ├── summarize.ipynb │ └── topic-modelling.ipynb ├── linear-decomposition-analysis │ └── iris.ipynb ├── nonnegative-matrix │ ├── similarity.ipynb │ ├── summary.ipynb │ └── topic-modelling.ipynb ├── principal-component-analysis │ ├── iris.ipynb │ └── mushroom.ipynb └── tsne │ ├── animation-tsne-iris.gif │ ├── animation-tsne-perplexity-iris.gif │ ├── iris-adaptive-momentum.ipynb │ └── iris.ipynb ├── gradient-visualization ├── gradient-MAE.ipynb ├── gradient-MSE.ipynb ├── gradient-RMSE.ipynb ├── mae_plot.py ├── mse_plot.py └── rmse_plot.py ├── monte-carlo ├── TSLA.csv ├── markov-chain │ ├── metropolis-hasting-normal.ipynb │ └── metropolis-hasting-stock-forecasting.ipynb ├── monte-carlo-pi.ipynb └── stock-prediction.ipynb ├── neural-network ├── batch-normalization │ ├── animation-batch-normalization-iris.gif │ └── softmax-entropy-gradientdescent.ipynb ├── convolutional │ ├── README.md │ ├── atrous1d-loop │ │ └── atrous-1d.ipynb │ ├── atrous2d-loop │ │ └── atrous-2d.ipynb │ ├── atrous2d │ │ └── softmax-cross-entropy-gradient-descent.ipynb │ ├── audio.zip │ ├── avgpooling1d │ │ └── avgpooling1d.ipynb │ ├── avgpooling2d │ │ └── avgpooling2d.ipynb │ ├── conv1d-loop │ │ └── softmax-cross-entropy-gradient-descent.ipynb │ ├── conv1d │ │ └── softmax-cross-entropy-gradient-descent.ipynb │ ├── conv2d-loop │ │ └── softmax-cross-entropy-gradient-descent.ipynb │ ├── conv2d │ │ └── softmax-cross-entropy-gradient-descent.ipynb │ ├── maxpooling1d │ │ └── maxpooling1d.ipynb │ └── maxpooling2d │ │ └── maxpooling2d.ipynb ├── dropout │ ├── alpha-dropout │ │ └── softmax-entropy-gradientdescent.ipynb │ └── dropout │ │ ├── animation-dropout-iris.gif │ │ └── softmax-entropy-gradientdescent.ipynb ├── evolution-strategy │ ├── Deep-Evolution-Strategies-Iris.ipynb │ ├── evolution-iris-decision.ipynb │ ├── evolution_strategy.py │ └── function.py ├── feed-forward │ ├── softmax-entropy-adagrad │ │ └── Softmax-Entropy-Adagrad-GradientDescent.ipynb │ ├── softmax-entropy-adam │ │ └── Softmax-Entropy-Adam-GradientDescent.ipynb │ ├── softmax-entropy-gradientdescent │ │ ├── Softmax-Entropy-GradientDescent.ipynb │ │ └── function.py │ ├── softmax-entropy-momentum │ │ ├── Softmax-Entropy-Momentum-GradientDescent.ipynb │ │ └── function.py │ ├── softmax-entropy-nesterov │ │ └── Softmax-Entropy-Momentum-Nesterov.ipynb │ └── softmax-entropy-rmsprop │ │ └── Softmax-Entropy-RMSprop-GradientDescent.ipynb ├── gated-recurrent-unit-RNN │ ├── README.md │ ├── adagrad │ │ └── gru-rnn-adagrad.ipynb │ ├── adam │ │ └── gru-rnn-adam.ipynb │ ├── consumer.h │ ├── gradient-descent │ │ └── gru-rnn-gradient-descent.ipynb │ ├── momentum │ │ └── gru-rnn-momentum.ipynb │ ├── nesterov │ │ └── gru-rnn-nesterov.ipynb │ └── rmsprop │ │ └── gru-rnn-rmsprop.ipynb ├── long-short-term-RNN │ ├── README.md │ ├── adagrad │ │ └── lstm-rnn-adagrad.ipynb │ ├── adam │ │ └── lstm-rnn-adam.ipynb │ ├── consumer.h │ ├── gradient-descent │ │ └── lstm-rnn-gradient-descent.ipynb │ ├── momentum │ │ └── lstm-rnn-momentum.ipynb │ ├── nesterov │ │ └── lstm-rnn-nesterov.ipynb │ └── rmsprop │ │ └── lstm-rnn-rmsprop.ipynb ├── neuro-evolution │ ├── neuro-evolution-iris.ipynb │ ├── neuro-evolution-novelty-search-iris.ipynb │ └── neuro-evolution-regression.ipynb ├── regularizarion │ ├── animation-l1-iris.gif │ ├── animation-l1l2-iris.gif │ ├── animation-l2-iris.gif │ ├── softmax-entropy-gradientdescent-l1.ipynb │ ├── softmax-entropy-gradientdescent-l1l2.ipynb │ └── softmax-entropy-gradientdescent-l2.ipynb └── vanilla-RNN │ ├── README.md │ ├── adagrad │ └── vanilla-rnn-adagrad.ipynb │ ├── adam │ └── vanilla-rnn-adam.ipynb │ ├── consumer.h │ ├── gradient-descent │ └── vanilla-rnn-gradient-descent.ipynb │ ├── momentum │ └── vanilla-rnn-momentum.ipynb │ ├── nesterov │ └── vanilla-rnn-nesterov.ipynb │ └── rmsprop │ └── vanilla-rnn-rmsprop.ipynb ├── probabilistic ├── bayes-tfidf │ ├── gaussian-tfidf.ipynb │ ├── local │ │ ├── kerajaan │ │ │ └── kerajaan │ │ └── pembangkang │ │ │ └── pembangkang │ └── multinomial-tfidf.ipynb ├── hidden-markov │ ├── markov-text.ipynb │ ├── speeches.txt │ └── trump.py └── neural-network │ └── probabilistic-neural-network.ipynb ├── regression ├── animation-elasticnet-regression.gif ├── animation-lasso-regression.gif ├── animation-linear-regression.gif ├── animation-poly-k-regression.gif ├── animation-ridge-regression.gif ├── elasticnet regression.ipynb ├── lasso regression.ipynb ├── linear regression.ipynb ├── polynomial-regression.ipynb ├── regularization.py ├── ridge regression.ipynb └── sigmoid regression.ipynb ├── results ├── animation-adagrad-gradientdescent-iris.gif ├── animation-adam-gradientdescent-iris.gif ├── animation-evolution-iris.gif ├── animation-gradientdescent-iris.gif ├── animation-momentum-gradientdescent-iris.gif ├── animation-nesterov-gradientdescent-iris.gif ├── animation-rmsprop-gradientdescent-iris.gif ├── gradient-descent.png ├── gradient-evolution.png └── mse-gradient.png ├── signal-processing ├── conv-1d.ipynb ├── conv-2d.ipynb └── pass-filters.ipynb ├── timeseries ├── ARIMA.ipynb ├── TSLA.csv ├── anchor-smooth.ipynb ├── detect-outliers.ipynb ├── john-ehlers.ipynb ├── linear-weight-moving-average.ipynb ├── moving-average.ipynb ├── noise-removal-get.ipynb └── seasonal-decomposition.ipynb └── trees-based ├── adaboost └── classification-iris-adaboost.ipynb ├── bagging ├── bagging-classification.ipynb └── bagging-regression.ipynb ├── decision-tree ├── classification-entropy.ipynb └── regression-tree.ipynb ├── gradient-boosting ├── gradient-boosting-classification.ipynb └── gradient-boosting-regression.ipynb └── random-forest ├── random-forest-classification.ipynb └── random-forest-regression.ipynb /.gitignore: -------------------------------------------------------------------------------- 1 | *.DS_Store 2 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2018 HUSEIN ZOLKEPLI 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Machine-Learning-Numpy 2 | 3 | Code Machine learning models without any frameworks, Numpy only. 4 | 5 | 6 | 7 | ## Table of contents 8 | * [Neural Network](https://github.com/huseinzol05/Machine-Learning-Numpy#neural-network) 9 | * [Clustering](https://github.com/huseinzol05/Machine-Learning-Numpy#clustering) 10 | * [Decomposition](https://github.com/huseinzol05/Machine-Learning-Numpy#decomposition) 11 | * [Probabilistic](https://github.com/huseinzol05/Machine-Learning-Numpy#probabilistic) 12 | * [Regression](https://github.com/huseinzol05/Machine-Learning-Numpy#regression) 13 | * [Trees based](https://github.com/huseinzol05/Machine-Learning-Numpy#trees-based) 14 | * [Timeseries](https://github.com/huseinzol05/Machine-Learning-Numpy#timeseries) 15 | * [Signal processing](https://github.com/huseinzol05/Machine-Learning-Numpy#signal-processing) 16 | * [Monte-carlo](https://github.com/huseinzol05/Machine-Learning-Numpy#monte-carlo) 17 | 18 | ### Neural Network 19 | 20 | 1. Deep Feed-forward 21 | * gradient descent 22 | * momentum 23 | * nesterov 24 | * rmsprop 25 | * adagrad 26 | * adam 27 | 28 | 2. Vanilla recurrent 29 | * gradient descent 30 | * momentum 31 | * nesterov 32 | * rmsprop 33 | * adagrad 34 | * adam 35 | 36 | 3. Long-short-term-memory recurrent 37 | * gradient descent 38 | * momentum 39 | * nesterov 40 | * rmsprop 41 | * adagrad 42 | * adam 43 | 44 | 4. gated-recurrent-unit recurrent 45 | * gradient descent 46 | * momentum 47 | * nesterov 48 | * rmsprop 49 | * adagrad 50 | * adam 51 | 52 | 5. Convolutional 53 | * atrous 1D 54 | * atrous 2D 55 | * average pooling 1D 56 | * average pooling 2D 57 | * convolution 1D 58 | * convolution 2D 59 | * max pooling 1D 60 | * max pooling 2D 61 | 62 | 6. batch-normalization 63 | 7. Dropout 64 | 8. Regularization 65 | 9. Neuro-evolution 66 | * Iris classification 67 | * Iris classification + Novelty search 68 | * Regression 69 | 70 | 10. Evolution-strategy 71 | 72 | ### Clustering 73 | 74 | 1. DBScan 75 | 2. K-Mean 76 | 3. K-Nearest Neighbors 77 | 78 | ### Decomposition 79 | 80 | 1. Latent Dirichlet Allocation 81 | 2. Latent Semantic Analysis 82 | 3. Linear Decomposition Analysis 83 | 4. Non-negative Matrix Feature 84 | 5. Principal Component Analysis 85 | 6. TSNE 86 | 87 | ### Probabilistic 88 | 89 | 1. Gaussian TF-IDF 90 | 2. Multinomial TF-IDF 91 | 3. Hidden Markov 92 | 4. Neural Network 93 | 94 | ### Regression 95 | 96 | 1. Linear 97 | 2. Polynomial 98 | 3. Lasso 99 | 4. Ridge 100 | 5. Sigmoid logistic 101 | 102 | ### Trees based 103 | 104 | 1. Decision Tree 105 | 2. Random Forest 106 | 3. Adaptive Boosting 107 | 4. Bagging 108 | 5. Gradient Boosting 109 | 110 | ### Timeseries 111 | 112 | 1. Moving Average 113 | 2. Linear Weight Moving Average 114 | 3. John-Ehlers 115 | 4. Noise Removal-Get 116 | 5. Anchor Smoothing 117 | 6. Detect Outliers 118 | 7. ARIMA 119 | 8. Seasonal Decomposition 120 | 121 | ### Signal processing 122 | 123 | 1. Convolutional 1D 124 | 2. Convolutional 2D 125 | 3. Pass-Filters 126 | 127 | ### Monte-carlo 128 | 129 | 1. Markov Chain 130 | * metropolis hasting normal distribution 131 | * metropolis hasting stock forecasting 132 | 2. Pi estimation 133 | 3. Stock market prediction 134 | 135 | ## Discussions 136 | 137 | Some of results are not good because of softmax and cross entropy functions I code. 138 | 139 | If found any error on my chain-rules, feel free to branch. 140 | -------------------------------------------------------------------------------- /clustering/k-mean/pp-centroids.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huseinzol05/Machine-Learning-Numpy/ae8f9266a87ffd3f67471a97bf183b82740b7deb/clustering/k-mean/pp-centroids.gif -------------------------------------------------------------------------------- /clustering/k-mean/random-centroids.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huseinzol05/Machine-Learning-Numpy/ae8f9266a87ffd3f67471a97bf183b82740b7deb/clustering/k-mean/random-centroids.gif -------------------------------------------------------------------------------- /decomposition/latent-dirichlet-allocation/similarity.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import numpy as np\n", 10 | "import scipy as sp\n", 11 | "from scipy.special import gammaln\n", 12 | "import re, random" 13 | ] 14 | }, 15 | { 16 | "cell_type": "code", 17 | "execution_count": 2, 18 | "metadata": {}, 19 | "outputs": [], 20 | "source": [ 21 | "with open('kerajaan','r') as fopen:\n", 22 | " kerajaan = list(filter(None, fopen.read().split('\\n')))" 23 | ] 24 | }, 25 | { 26 | "cell_type": "code", 27 | "execution_count": 3, 28 | "metadata": {}, 29 | "outputs": [], 30 | "source": [ 31 | "def clearstring(string):\n", 32 | " string = re.sub('[^A-Za-z0-9 ]+', '', string)\n", 33 | " string = string.split(' ')\n", 34 | " string = filter(None, string)\n", 35 | " string = [y.strip() for y in string]\n", 36 | " string = ' '.join(string)\n", 37 | " return string.lower()\n", 38 | "\n", 39 | "kerajaan = [clearstring(i) for i in kerajaan]" 40 | ] 41 | }, 42 | { 43 | "cell_type": "code", 44 | "execution_count": 4, 45 | "metadata": {}, 46 | "outputs": [], 47 | "source": [ 48 | "def sample_index(p):\n", 49 | " return np.random.multinomial(1,p).argmax()\n", 50 | "\n", 51 | "def word_indices(vec):\n", 52 | " for idx in vec.nonzero()[0]:\n", 53 | " for i in range(int(vec[idx])):\n", 54 | " yield idx\n", 55 | "\n", 56 | "def log_multi_beta(alpha, K=None):\n", 57 | " if K is None:\n", 58 | " return np.sum(gammaln(alpha)) - gammaln(np.sum(alpha))\n", 59 | " else:\n", 60 | " return K * gammaln(alpha) - gammaln(K*alpha)" 61 | ] 62 | }, 63 | { 64 | "cell_type": "code", 65 | "execution_count": 5, 66 | "metadata": {}, 67 | "outputs": [], 68 | "source": [ 69 | "class LDA:\n", 70 | " def __init__(self, corpus, n_topics, iteration=30, alpha=0.1, beta=0.1):\n", 71 | " self.corpus = corpus\n", 72 | " self.vocabulary = list(set(' '.join(self.corpus).split()))\n", 73 | " self.iteration = iteration\n", 74 | " self.alpha = alpha\n", 75 | " self.beta = beta\n", 76 | " self.n_topics = n_topics\n", 77 | " self._bow()\n", 78 | " n_docs, vocab_size = self.tfidf.shape\n", 79 | " self.nmz = np.zeros((n_docs, n_topics))\n", 80 | " self.nzw = np.zeros((n_topics, vocab_size))\n", 81 | " self.nm = np.zeros(n_docs)\n", 82 | " self.nz = np.zeros(self.n_topics)\n", 83 | " self.topics = {}\n", 84 | " \n", 85 | " for m in range(n_docs):\n", 86 | " for i, w in enumerate(word_indices(self.tfidf[m, :])):\n", 87 | " z = np.random.randint(n_topics)\n", 88 | " self.nmz[m,z] += 1\n", 89 | " self.nm[m] += 1\n", 90 | " self.nzw[z,w] += 1\n", 91 | " self.nz[z] += 1\n", 92 | " self.topics[(m,i)] = z\n", 93 | " \n", 94 | " def _bow(self):\n", 95 | " self.tfidf = np.zeros((len(self.corpus),len(self.vocabulary)))\n", 96 | " for no, i in enumerate(self.corpus):\n", 97 | " for text in i.split():\n", 98 | " self.tfidf[no, self.vocabulary.index(text)] += 1\n", 99 | " \n", 100 | " def _conditional_distribution(self, m, w):\n", 101 | " vocab_size = self.nzw.shape[1]\n", 102 | " left = (self.nzw[:,w] + self.beta) / (self.nz + self.beta * vocab_size)\n", 103 | " right = (self.nmz[m,:] + self.alpha) / (self.nm[m] + self.alpha * self.n_topics)\n", 104 | " p_z = left * right\n", 105 | " p_z /= np.sum(p_z)\n", 106 | " return p_z\n", 107 | " \n", 108 | " def loglikelihood(self):\n", 109 | " vocab_size = self.nzw.shape[1]\n", 110 | " n_docs = self.nmz.shape[0]\n", 111 | " lik = 0\n", 112 | " for z in range(self.n_topics):\n", 113 | " lik += log_multi_beta(self.nzw[z,:]+self.beta)\n", 114 | " lik -= log_multi_beta(self.beta, vocab_size)\n", 115 | " for m in range(n_docs):\n", 116 | " lik += log_multi_beta(self.nmz[m,:]+self.alpha)\n", 117 | " lik -= log_multi_beta(self.alpha, self.n_topics)\n", 118 | " return lik\n", 119 | " \n", 120 | " def run(self):\n", 121 | " for it in range(self.iteration):\n", 122 | " for m in range(self.tfidf.shape[0]):\n", 123 | " for i, w in enumerate(word_indices(self.tfidf[m, :])):\n", 124 | " z = self.topics[(m,i)]\n", 125 | " self.nmz[m,z] -= 1\n", 126 | " self.nm[m] -= 1\n", 127 | " self.nzw[z,w] -= 1\n", 128 | " self.nz[z] -= 1\n", 129 | " p_z = self._conditional_distribution(m, w)\n", 130 | " z = sample_index(p_z)\n", 131 | " self.nmz[m,z] += 1\n", 132 | " self.nm[m] += 1\n", 133 | " self.nzw[z,w] += 1\n", 134 | " self.nz[z] += 1\n", 135 | " self.topics[(m,i)] = z\n", 136 | " " 137 | ] 138 | }, 139 | { 140 | "cell_type": "code", 141 | "execution_count": 6, 142 | "metadata": {}, 143 | "outputs": [], 144 | "source": [ 145 | "def find_sentences(keyword, corpus):\n", 146 | " d = []\n", 147 | " for content in [i for i in corpus if i.find(keyword)>=0]:\n", 148 | " a = content.split()\n", 149 | " d.append(a)\n", 150 | " return ' '.join([j for i in d for j in i if re.match(\"^[a-zA-Z_-]*$\", j) and len(j) > 1])" 151 | ] 152 | }, 153 | { 154 | "cell_type": "code", 155 | "execution_count": 7, 156 | "metadata": {}, 157 | "outputs": [], 158 | "source": [ 159 | "def compare(string1, string2, corpus, use_tfidf=True, epoch=50, learning_rate=1e-6, lam=1e3, penalty=1e-6):\n", 160 | " queries = [find_sentences(string1, corpus), find_sentences(string2, corpus)]\n", 161 | " lda = LDA(queries,2)\n", 162 | " lda.run()\n", 163 | " a=lda.nmz.dot(lda.nzw)\n", 164 | " angles=np.arccos(np.dot(a[0,:],a[1:].T) / (np.linalg.norm(a[0,:],2)* np.linalg.norm(a[1:],2)))\n", 165 | " return np.abs(1 - float(angles[0])/float(np.pi/2))" 166 | ] 167 | }, 168 | { 169 | "cell_type": "code", 170 | "execution_count": 8, 171 | "metadata": {}, 172 | "outputs": [ 173 | { 174 | "data": { 175 | "text/plain": [ 176 | "0.9991450069748095" 177 | ] 178 | }, 179 | "execution_count": 8, 180 | "metadata": {}, 181 | "output_type": "execute_result" 182 | } 183 | ], 184 | "source": [ 185 | "compare('kedah', 'kedah', kerajaan)" 186 | ] 187 | }, 188 | { 189 | "cell_type": "code", 190 | "execution_count": 9, 191 | "metadata": {}, 192 | "outputs": [ 193 | { 194 | "data": { 195 | "text/plain": [ 196 | "0.34125433347880385" 197 | ] 198 | }, 199 | "execution_count": 9, 200 | "metadata": {}, 201 | "output_type": "execute_result" 202 | } 203 | ], 204 | "source": [ 205 | "compare('kedah', 'dap', kerajaan)" 206 | ] 207 | }, 208 | { 209 | "cell_type": "code", 210 | "execution_count": 10, 211 | "metadata": {}, 212 | "outputs": [ 213 | { 214 | "data": { 215 | "text/plain": [ 216 | "0.3489930111865034" 217 | ] 218 | }, 219 | "execution_count": 10, 220 | "metadata": {}, 221 | "output_type": "execute_result" 222 | } 223 | ], 224 | "source": [ 225 | "compare('kedah', 'bn', kerajaan)" 226 | ] 227 | } 228 | ], 229 | "metadata": { 230 | "kernelspec": { 231 | "display_name": "Python 3", 232 | "language": "python", 233 | "name": "python3" 234 | }, 235 | "language_info": { 236 | "codemirror_mode": { 237 | "name": "ipython", 238 | "version": 3 239 | }, 240 | "file_extension": ".py", 241 | "mimetype": "text/x-python", 242 | "name": "python", 243 | "nbconvert_exporter": "python", 244 | "pygments_lexer": "ipython3", 245 | "version": "3.5.2" 246 | } 247 | }, 248 | "nbformat": 4, 249 | "nbformat_minor": 2 250 | } 251 | -------------------------------------------------------------------------------- /decomposition/latent-dirichlet-allocation/topic-modelling.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import numpy as np\n", 10 | "import scipy as sp\n", 11 | "from scipy.special import gammaln\n", 12 | "import re, random" 13 | ] 14 | }, 15 | { 16 | "cell_type": "code", 17 | "execution_count": 2, 18 | "metadata": {}, 19 | "outputs": [], 20 | "source": [ 21 | "def sample_index(p):\n", 22 | " return np.random.multinomial(1,p).argmax()\n", 23 | "\n", 24 | "def word_indices(vec):\n", 25 | " for idx in vec.nonzero()[0]:\n", 26 | " for i in range(int(vec[idx])):\n", 27 | " yield idx\n", 28 | "\n", 29 | "def log_multi_beta(alpha, K=None):\n", 30 | " if K is None:\n", 31 | " return np.sum(gammaln(alpha)) - gammaln(np.sum(alpha))\n", 32 | " else:\n", 33 | " return K * gammaln(alpha) - gammaln(K*alpha)" 34 | ] 35 | }, 36 | { 37 | "cell_type": "code", 38 | "execution_count": 3, 39 | "metadata": {}, 40 | "outputs": [], 41 | "source": [ 42 | "class LDA:\n", 43 | " def __init__(self, corpus, n_topics, iteration=30, alpha=0.1, beta=0.1):\n", 44 | " self.corpus = corpus\n", 45 | " self.vocabulary = list(set(' '.join(self.corpus).split()))\n", 46 | " self.iteration = iteration\n", 47 | " self.alpha = alpha\n", 48 | " self.beta = beta\n", 49 | " self.n_topics = n_topics\n", 50 | " self._bow()\n", 51 | " n_docs, vocab_size = self.tfidf.shape\n", 52 | " self.nmz = np.zeros((n_docs, n_topics))\n", 53 | " self.nzw = np.zeros((n_topics, vocab_size))\n", 54 | " self.nm = np.zeros(n_docs)\n", 55 | " self.nz = np.zeros(self.n_topics)\n", 56 | " self.topics = {}\n", 57 | " \n", 58 | " for m in range(n_docs):\n", 59 | " for i, w in enumerate(word_indices(self.tfidf[m, :])):\n", 60 | " z = np.random.randint(n_topics)\n", 61 | " self.nmz[m,z] += 1\n", 62 | " self.nm[m] += 1\n", 63 | " self.nzw[z,w] += 1\n", 64 | " self.nz[z] += 1\n", 65 | " self.topics[(m,i)] = z\n", 66 | " \n", 67 | " def _bow(self):\n", 68 | " self.tfidf = np.zeros((len(self.corpus),len(self.vocabulary)))\n", 69 | " for no, i in enumerate(self.corpus):\n", 70 | " for text in i.split():\n", 71 | " self.tfidf[no, self.vocabulary.index(text)] += 1\n", 72 | " \n", 73 | " def _conditional_distribution(self, m, w):\n", 74 | " vocab_size = self.nzw.shape[1]\n", 75 | " left = (self.nzw[:,w] + self.beta) / (self.nz + self.beta * vocab_size)\n", 76 | " right = (self.nmz[m,:] + self.alpha) / (self.nm[m] + self.alpha * self.n_topics)\n", 77 | " p_z = left * right\n", 78 | " p_z /= np.sum(p_z)\n", 79 | " return p_z\n", 80 | " \n", 81 | " def loglikelihood(self):\n", 82 | " vocab_size = self.nzw.shape[1]\n", 83 | " n_docs = self.nmz.shape[0]\n", 84 | " lik = 0\n", 85 | " for z in range(self.n_topics):\n", 86 | " lik += log_multi_beta(self.nzw[z,:]+self.beta)\n", 87 | " lik -= log_multi_beta(self.beta, vocab_size)\n", 88 | " for m in range(n_docs):\n", 89 | " lik += log_multi_beta(self.nmz[m,:]+self.alpha)\n", 90 | " lik -= log_multi_beta(self.alpha, self.n_topics)\n", 91 | " return lik\n", 92 | " \n", 93 | " def run(self):\n", 94 | " for it in range(self.iteration):\n", 95 | " for m in range(self.tfidf.shape[0]):\n", 96 | " for i, w in enumerate(word_indices(self.tfidf[m, :])):\n", 97 | " z = self.topics[(m,i)]\n", 98 | " self.nmz[m,z] -= 1\n", 99 | " self.nm[m] -= 1\n", 100 | " self.nzw[z,w] -= 1\n", 101 | " self.nz[z] -= 1\n", 102 | " p_z = self._conditional_distribution(m, w)\n", 103 | " z = sample_index(p_z)\n", 104 | " self.nmz[m,z] += 1\n", 105 | " self.nm[m] += 1\n", 106 | " self.nzw[z,w] += 1\n", 107 | " self.nz[z] += 1\n", 108 | " self.topics[(m,i)] = z\n", 109 | " " 110 | ] 111 | }, 112 | { 113 | "cell_type": "code", 114 | "execution_count": 4, 115 | "metadata": {}, 116 | "outputs": [], 117 | "source": [ 118 | "with open('kerajaan','r') as fopen:\n", 119 | " kerajaan = list(filter(None, fopen.read().split('\\n')))" 120 | ] 121 | }, 122 | { 123 | "cell_type": "code", 124 | "execution_count": 5, 125 | "metadata": {}, 126 | "outputs": [], 127 | "source": [ 128 | "def clearstring(string):\n", 129 | " string = re.sub('[^A-Za-z0-9 ]+', '', string)\n", 130 | " string = string.split(' ')\n", 131 | " string = filter(None, string)\n", 132 | " string = [y.strip() for y in string]\n", 133 | " string = ' '.join(string)\n", 134 | " return string.lower()\n", 135 | "\n", 136 | "kerajaan = [clearstring(i) for i in kerajaan]" 137 | ] 138 | }, 139 | { 140 | "cell_type": "code", 141 | "execution_count": 6, 142 | "metadata": {}, 143 | "outputs": [], 144 | "source": [ 145 | "def show_topics(corpus, count=10, k_words=10):\n", 146 | " lda = LDA(corpus,k_words)\n", 147 | " lda.run()\n", 148 | " vectors = lda.nmz[:count] \n", 149 | " top_words = lambda t: [lda.vocabulary[i] for i in np.argsort(t)[:-k_words-1:-1]]\n", 150 | " topic_words = ([top_words(t) for t in vectors])\n", 151 | " return [' '.join(t) for t in topic_words]" 152 | ] 153 | }, 154 | { 155 | "cell_type": "code", 156 | "execution_count": 7, 157 | "metadata": {}, 158 | "outputs": [ 159 | { 160 | "data": { 161 | "text/plain": [ 162 | "['buckle airbus bersepadu lawat kemalangn kongres 17rakaat ksemua menampung berliku',\n", 163 | " 'kemalangn buckle bersepadu lawat kongres 17rakaat ksemua menampung airbus berliku',\n", 164 | " 'kongres bersepadu lawat kemalangn 17rakaat buckle ksemua menampung airbus berliku',\n", 165 | " 'kongres bersepadu lawat kemalangn 17rakaat buckle ksemua menampung airbus berliku',\n", 166 | " 'menampung bersepadu lawat kemalangn kongres 17rakaat buckle ksemua airbus berliku',\n", 167 | " 'kemalangn buckle lawat berliku bersepadu kongres 17rakaat ksemua menampung airbus',\n", 168 | " 'menampung airbus bersepadu lawat kemalangn kongres 17rakaat buckle ksemua berliku',\n", 169 | " 'kongres bersepadu lawat kemalangn 17rakaat buckle ksemua menampung airbus berliku',\n", 170 | " 'ksemua kemalangn buckle airbus bersepadu lawat kongres 17rakaat menampung berliku',\n", 171 | " 'berliku buckle kemalangn bersepadu lawat kongres 17rakaat ksemua menampung airbus']" 172 | ] 173 | }, 174 | "execution_count": 7, 175 | "metadata": {}, 176 | "output_type": "execute_result" 177 | } 178 | ], 179 | "source": [ 180 | "show_topics(kerajaan)" 181 | ] 182 | }, 183 | { 184 | "cell_type": "code", 185 | "execution_count": null, 186 | "metadata": {}, 187 | "outputs": [], 188 | "source": [] 189 | } 190 | ], 191 | "metadata": { 192 | "kernelspec": { 193 | "display_name": "Python 3", 194 | "language": "python", 195 | "name": "python3" 196 | }, 197 | "language_info": { 198 | "codemirror_mode": { 199 | "name": "ipython", 200 | "version": 3 201 | }, 202 | "file_extension": ".py", 203 | "mimetype": "text/x-python", 204 | "name": "python", 205 | "nbconvert_exporter": "python", 206 | "pygments_lexer": "ipython3", 207 | "version": "3.5.2" 208 | } 209 | }, 210 | "nbformat": 4, 211 | "nbformat_minor": 2 212 | } 213 | -------------------------------------------------------------------------------- /decomposition/latent-semantic-analysis/similarity.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import numpy as np\n", 10 | "from scipy.linalg import svd, inv\n", 11 | "import re, random" 12 | ] 13 | }, 14 | { 15 | "cell_type": "code", 16 | "execution_count": 2, 17 | "metadata": {}, 18 | "outputs": [], 19 | "source": [ 20 | "with open('kerajaan','r') as fopen:\n", 21 | " kerajaan = list(filter(None, fopen.read().split('\\n')))" 22 | ] 23 | }, 24 | { 25 | "cell_type": "code", 26 | "execution_count": 3, 27 | "metadata": {}, 28 | "outputs": [], 29 | "source": [ 30 | "def clearstring(string):\n", 31 | " string = re.sub('[^A-Za-z0-9 ]+', '', string)\n", 32 | " string = string.split(' ')\n", 33 | " string = filter(None, string)\n", 34 | " string = [y.strip() for y in string]\n", 35 | " string = ' '.join(string)\n", 36 | " return string.lower()\n", 37 | "\n", 38 | "kerajaan = [clearstring(i) for i in kerajaan]" 39 | ] 40 | }, 41 | { 42 | "cell_type": "code", 43 | "execution_count": 4, 44 | "metadata": {}, 45 | "outputs": [], 46 | "source": [ 47 | "class LSA:\n", 48 | " def __init__(self, corpus, tfidf=False):\n", 49 | " self.corpus = corpus\n", 50 | " self.vocabulary = list(set(' '.join(self.corpus).split()))\n", 51 | " if tfidf:\n", 52 | " self._tfidf()\n", 53 | " else:\n", 54 | " self._bow()\n", 55 | " self._calc_svd()\n", 56 | " \n", 57 | " def _calc_svd(self):\n", 58 | " self.U, self.S, self.Vt = svd(self.tfidf.T, full_matrices =False)\n", 59 | " \n", 60 | " def _bow(self):\n", 61 | " self.tfidf = np.zeros((len(self.corpus),len(self.vocabulary)))\n", 62 | " for no, i in enumerate(self.corpus):\n", 63 | " for text in i.split():\n", 64 | " self.tfidf[no, self.vocabulary.index(text)] += 1\n", 65 | " \n", 66 | " def _tfidf(self):\n", 67 | " idf = {}\n", 68 | " for i in self.vocabulary:\n", 69 | " idf[i] = 0\n", 70 | " for k in self.corpus:\n", 71 | " if i in k.split():\n", 72 | " idf[i] += 1\n", 73 | " idf[i] = np.log(idf[i] / len(self.corpus))\n", 74 | " self.tfidf = np.zeros((len(self.corpus),len(self.vocabulary)))\n", 75 | " for no, i in enumerate(self.corpus):\n", 76 | " for text in i.split():\n", 77 | " self.tfidf[no, self.vocabulary.index(text)] += 1\n", 78 | " for text in i.split():\n", 79 | " self.tfidf[no, self.vocabulary.index(text)] = self.tfidf[no, self.vocabulary.index(text)] * idf[text]" 80 | ] 81 | }, 82 | { 83 | "cell_type": "code", 84 | "execution_count": 5, 85 | "metadata": {}, 86 | "outputs": [], 87 | "source": [ 88 | "def find_sentences(keyword, corpus):\n", 89 | " d = []\n", 90 | " for content in [i for i in corpus if i.find(keyword)>=0]:\n", 91 | " a = content.split()\n", 92 | " d.append(a)\n", 93 | " return ' '.join([j for i in d for j in i if re.match(\"^[a-zA-Z_-]*$\", j) and len(j) > 1])\n", 94 | "\n", 95 | "def compare(string1, string2, corpus, tfidf=False):\n", 96 | " queries = [find_sentences(string1, corpus), find_sentences(string2, corpus)]\n", 97 | " lsa = LSA(queries,tfidf=tfidf)\n", 98 | " Vt = lsa.Vt\n", 99 | " S = np.diag(lsa.S)\n", 100 | " vectors =[(np.dot(S,Vt[:,0]), np.dot(S,Vt[:,i])) for i in range(len(Vt))]\n", 101 | " angles = [np.arccos(np.dot(a,b) / (np.linalg.norm(a,2)* np.linalg.norm(b,2))) for a,b in vectors[1:]]\n", 102 | " return np.abs(1 - float(angles[0])/float(np.pi/2))" 103 | ] 104 | }, 105 | { 106 | "cell_type": "code", 107 | "execution_count": 6, 108 | "metadata": {}, 109 | "outputs": [ 110 | { 111 | "data": { 112 | "text/plain": [ 113 | "1.0" 114 | ] 115 | }, 116 | "execution_count": 6, 117 | "metadata": {}, 118 | "output_type": "execute_result" 119 | } 120 | ], 121 | "source": [ 122 | "compare('kedah', 'kedah', kerajaan)" 123 | ] 124 | }, 125 | { 126 | "cell_type": "code", 127 | "execution_count": 7, 128 | "metadata": {}, 129 | "outputs": [ 130 | { 131 | "data": { 132 | "text/plain": [ 133 | "0.18372139960335687" 134 | ] 135 | }, 136 | "execution_count": 7, 137 | "metadata": {}, 138 | "output_type": "execute_result" 139 | } 140 | ], 141 | "source": [ 142 | "compare('kedah', 'dap', kerajaan)" 143 | ] 144 | }, 145 | { 146 | "cell_type": "code", 147 | "execution_count": null, 148 | "metadata": {}, 149 | "outputs": [], 150 | "source": [] 151 | } 152 | ], 153 | "metadata": { 154 | "kernelspec": { 155 | "display_name": "Python 3", 156 | "language": "python", 157 | "name": "python3" 158 | }, 159 | "language_info": { 160 | "codemirror_mode": { 161 | "name": "ipython", 162 | "version": 3 163 | }, 164 | "file_extension": ".py", 165 | "mimetype": "text/x-python", 166 | "name": "python", 167 | "nbconvert_exporter": "python", 168 | "pygments_lexer": "ipython3", 169 | "version": "3.5.2" 170 | } 171 | }, 172 | "nbformat": 4, 173 | "nbformat_minor": 2 174 | } 175 | -------------------------------------------------------------------------------- /decomposition/latent-semantic-analysis/topic-modelling.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import numpy as np\n", 10 | "from scipy.linalg import svd, inv\n", 11 | "import re, random" 12 | ] 13 | }, 14 | { 15 | "cell_type": "code", 16 | "execution_count": 2, 17 | "metadata": {}, 18 | "outputs": [], 19 | "source": [ 20 | "with open('kerajaan','r') as fopen:\n", 21 | " kerajaan = list(filter(None, fopen.read().split('\\n')))" 22 | ] 23 | }, 24 | { 25 | "cell_type": "code", 26 | "execution_count": 3, 27 | "metadata": {}, 28 | "outputs": [], 29 | "source": [ 30 | "def clearstring(string):\n", 31 | " string = re.sub('[^A-Za-z0-9 ]+', '', string)\n", 32 | " string = string.split(' ')\n", 33 | " string = filter(None, string)\n", 34 | " string = [y.strip() for y in string]\n", 35 | " string = ' '.join(string)\n", 36 | " return string.lower()\n", 37 | "\n", 38 | "kerajaan = [clearstring(i) for i in kerajaan]" 39 | ] 40 | }, 41 | { 42 | "cell_type": "code", 43 | "execution_count": 4, 44 | "metadata": {}, 45 | "outputs": [], 46 | "source": [ 47 | "class LSA:\n", 48 | " def __init__(self, corpus, tfidf=False):\n", 49 | " self.corpus = corpus\n", 50 | " self.vocabulary = list(set(' '.join(self.corpus).split()))\n", 51 | " if tfidf:\n", 52 | " self._tfidf()\n", 53 | " else:\n", 54 | " self._bow()\n", 55 | " self._calc_svd()\n", 56 | " \n", 57 | " def _calc_svd(self):\n", 58 | " self.U, self.S, self.Vt = svd(self.tfidf.T, full_matrices =False)\n", 59 | " \n", 60 | " def _bow(self):\n", 61 | " self.tfidf = np.zeros((len(self.corpus),len(self.vocabulary)))\n", 62 | " for no, i in enumerate(self.corpus):\n", 63 | " for text in i.split():\n", 64 | " self.tfidf[no, self.vocabulary.index(text)] += 1\n", 65 | " \n", 66 | " def _tfidf(self):\n", 67 | " idf = {}\n", 68 | " for i in self.vocabulary:\n", 69 | " idf[i] = 0\n", 70 | " for k in self.corpus:\n", 71 | " if i in k.split():\n", 72 | " idf[i] += 1\n", 73 | " idf[i] = np.log(idf[i] / len(self.corpus))\n", 74 | " self.tfidf = np.zeros((len(self.corpus),len(self.vocabulary)))\n", 75 | " for no, i in enumerate(self.corpus):\n", 76 | " for text in i.split():\n", 77 | " self.tfidf[no, self.vocabulary.index(text)] += 1\n", 78 | " for text in i.split():\n", 79 | " self.tfidf[no, self.vocabulary.index(text)] = self.tfidf[no, self.vocabulary.index(text)] * idf[text]" 80 | ] 81 | }, 82 | { 83 | "cell_type": "code", 84 | "execution_count": 5, 85 | "metadata": {}, 86 | "outputs": [], 87 | "source": [ 88 | "def show_topics(corpus, count=10, k_words=10, tfidf=False):\n", 89 | " lsa = LSA(corpus,tfidf=tfidf)\n", 90 | " vectors = lsa.Vt[:count]\n", 91 | " top_words = lambda t: [lsa.vocabulary[i] for i in np.argsort(t)[:-k_words-1:-1]]\n", 92 | " topic_words = ([top_words(t) for t in vectors])\n", 93 | " return [' '.join(t) for t in topic_words]" 94 | ] 95 | }, 96 | { 97 | "cell_type": "code", 98 | "execution_count": 6, 99 | "metadata": {}, 100 | "outputs": [ 101 | { 102 | "data": { 103 | "text/plain": [ 104 | "['wanitaumnomsia saudarafitri sejam ydpa bndr doping isteri dijadikan berkisar menempatkan',\n", 105 | " 'wanitaumnomsia sejam ydpa bndr doping saudarafitri isteri berkisar dijadikan menempatkan',\n", 106 | " 'brgn mnjlnkn anthem jim hate underwood patutlah unprecedented elias seed',\n", 107 | " 'institut 39yo wohoo wajar mengiringi impact onehomeless gapena kedaulatan pipit',\n", 108 | " 'queen terhdp hantar salute consist kebrangkatan perbadanan elektrik pertikaian dayabumi',\n", 109 | " 'bombardierdauphin global car barangan belibelah modul armizanameer kjg rachel betterpeacekeeping',\n", 110 | " 'sekecil ketibaan penguasaan practices bomba bajet2017 duli menyerikan belasah sachs',\n", 111 | " 'terhdp munliv menyempurnakan dilarikan menerajuiperubahan pandan themthis pendekar fiskal berpendidikan',\n", 112 | " 'kritikal haizzz theirs pjm appreciation dahulu malaysiansunitedrun2017 diberikan seorng boarding',\n", 113 | " 'seringgit jomcny list cleaner sidek waterfront hamidi thorough bpmonline hassanrouhani']" 114 | ] 115 | }, 116 | "execution_count": 6, 117 | "metadata": {}, 118 | "output_type": "execute_result" 119 | } 120 | ], 121 | "source": [ 122 | "show_topics(kerajaan)" 123 | ] 124 | }, 125 | { 126 | "cell_type": "code", 127 | "execution_count": null, 128 | "metadata": {}, 129 | "outputs": [], 130 | "source": [] 131 | } 132 | ], 133 | "metadata": { 134 | "kernelspec": { 135 | "display_name": "Python 3", 136 | "language": "python", 137 | "name": "python3" 138 | }, 139 | "language_info": { 140 | "codemirror_mode": { 141 | "name": "ipython", 142 | "version": 3 143 | }, 144 | "file_extension": ".py", 145 | "mimetype": "text/x-python", 146 | "name": "python", 147 | "nbconvert_exporter": "python", 148 | "pygments_lexer": "ipython3", 149 | "version": "3.5.2" 150 | } 151 | }, 152 | "nbformat": 4, 153 | "nbformat_minor": 2 154 | } 155 | -------------------------------------------------------------------------------- /decomposition/nonnegative-matrix/similarity.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import re, random\n", 10 | "import numpy as np" 11 | ] 12 | }, 13 | { 14 | "cell_type": "code", 15 | "execution_count": 2, 16 | "metadata": {}, 17 | "outputs": [], 18 | "source": [ 19 | "with open('kerajaan','r') as fopen:\n", 20 | " kerajaan = list(filter(None, fopen.read().split('\\n')))" 21 | ] 22 | }, 23 | { 24 | "cell_type": "code", 25 | "execution_count": 3, 26 | "metadata": {}, 27 | "outputs": [], 28 | "source": [ 29 | "def clearstring(string):\n", 30 | " string = re.sub('[^A-Za-z0-9 ]+', '', string)\n", 31 | " string = string.split(' ')\n", 32 | " string = filter(None, string)\n", 33 | " string = [y.strip() for y in string]\n", 34 | " string = ' '.join(string)\n", 35 | " return string.lower()\n", 36 | "\n", 37 | "kerajaan = [clearstring(i) for i in kerajaan]" 38 | ] 39 | }, 40 | { 41 | "cell_type": "code", 42 | "execution_count": 4, 43 | "metadata": {}, 44 | "outputs": [], 45 | "source": [ 46 | "def penalty(M, mu):\n", 47 | " return np.where(M>=mu,0, np.min(M - mu, 0))\n", 48 | "\n", 49 | "def grads(M, W, H, lam, mu):\n", 50 | " R = W.dot(H) - M\n", 51 | " return R.dot(H.T) + penalty(W, mu)*lam, W.T.dot(R) + penalty(H, mu)*lam\n", 52 | "\n", 53 | "def upd(M, W, H, lr, lam, mu):\n", 54 | " dW,dH = grads(M,W,H,lam,mu)\n", 55 | " W -= lr*dW\n", 56 | " H -= lr*dH\n", 57 | " \n", 58 | "def tfidf(corpus):\n", 59 | " vocabulary = list(set(' '.join(corpus).split()))\n", 60 | " idf = {}\n", 61 | " for i in vocabulary:\n", 62 | " idf[i] = 0\n", 63 | " for k in corpus:\n", 64 | " if i in k.split():\n", 65 | " idf[i] += 1\n", 66 | " idf[i] = np.log(idf[i] / len(corpus))\n", 67 | " tfidf = np.zeros((len(corpus),len(vocabulary)))\n", 68 | " for no, i in enumerate(corpus):\n", 69 | " for text in i.split():\n", 70 | " tfidf[no, vocabulary.index(text)] += 1\n", 71 | " for text in i.split():\n", 72 | " tfidf[no, vocabulary.index(text)] = tfidf[no, vocabulary.index(text)] * idf[text]\n", 73 | " return vocabulary, tfidf\n", 74 | "\n", 75 | "def bow(corpus):\n", 76 | " vocabulary = list(set(' '.join(corpus).split()))\n", 77 | " bow = np.zeros((len(corpus),len(vocabulary)))\n", 78 | " for no, i in enumerate(corpus):\n", 79 | " for text in i.split():\n", 80 | " bow[no, vocabulary.index(text)] += 1\n", 81 | " return vocabulary, bow" 82 | ] 83 | }, 84 | { 85 | "cell_type": "code", 86 | "execution_count": 5, 87 | "metadata": {}, 88 | "outputs": [], 89 | "source": [ 90 | "def find_sentences(keyword, corpus):\n", 91 | " d = []\n", 92 | " for content in [i for i in corpus if i.find(keyword)>=0]:\n", 93 | " a = content.split()\n", 94 | " d.append(a)\n", 95 | " return ' '.join([j for i in d for j in i if re.match(\"^[a-zA-Z_-]*$\", j) and len(j) > 1])" 96 | ] 97 | }, 98 | { 99 | "cell_type": "code", 100 | "execution_count": 15, 101 | "metadata": {}, 102 | "outputs": [], 103 | "source": [ 104 | "def compare(string1, string2, corpus, use_tfidf=True, epoch=50, learning_rate=1e-6, lam=1e3, penalty=1e-6):\n", 105 | " queries = [find_sentences(string1, corpus), find_sentences(string2, corpus)]\n", 106 | " if use_tfidf:\n", 107 | " vocab, vectors = tfidf(queries)\n", 108 | " else:\n", 109 | " vocab, vectors = bow(queries)\n", 110 | " m, n = vectors.shape\n", 111 | " W = np.abs(np.random.normal(scale=0.01, size=(m,2)))\n", 112 | " H = np.abs(np.random.normal(scale=0.01, size=(2,n)))\n", 113 | " for i in range(epoch):\n", 114 | " upd(vectors,W,H,learning_rate,lam,penalty)\n", 115 | " a=W.dot(H)\n", 116 | " angles=np.arccos(np.dot(a[0,:],a[1:].T) / (np.linalg.norm(a[0,:],2)* np.linalg.norm(a[1:],2)))\n", 117 | " return np.abs(1 - float(angles[0])/float(np.pi/2))" 118 | ] 119 | }, 120 | { 121 | "cell_type": "code", 122 | "execution_count": 16, 123 | "metadata": {}, 124 | "outputs": [ 125 | { 126 | "data": { 127 | "text/plain": [ 128 | "0.896504454896407" 129 | ] 130 | }, 131 | "execution_count": 16, 132 | "metadata": {}, 133 | "output_type": "execute_result" 134 | } 135 | ], 136 | "source": [ 137 | "compare('kedah', 'kedah', kerajaan)" 138 | ] 139 | }, 140 | { 141 | "cell_type": "code", 142 | "execution_count": 17, 143 | "metadata": {}, 144 | "outputs": [ 145 | { 146 | "data": { 147 | "text/plain": [ 148 | "0.923361608472873" 149 | ] 150 | }, 151 | "execution_count": 17, 152 | "metadata": {}, 153 | "output_type": "execute_result" 154 | } 155 | ], 156 | "source": [ 157 | "compare('kedah', 'dap', kerajaan)" 158 | ] 159 | }, 160 | { 161 | "cell_type": "code", 162 | "execution_count": 18, 163 | "metadata": {}, 164 | "outputs": [ 165 | { 166 | "data": { 167 | "text/plain": [ 168 | "0.7958667334387592" 169 | ] 170 | }, 171 | "execution_count": 18, 172 | "metadata": {}, 173 | "output_type": "execute_result" 174 | } 175 | ], 176 | "source": [ 177 | "compare('kedah', 'bn', kerajaan)" 178 | ] 179 | } 180 | ], 181 | "metadata": { 182 | "kernelspec": { 183 | "display_name": "Python 3", 184 | "language": "python", 185 | "name": "python3" 186 | }, 187 | "language_info": { 188 | "codemirror_mode": { 189 | "name": "ipython", 190 | "version": 3 191 | }, 192 | "file_extension": ".py", 193 | "mimetype": "text/x-python", 194 | "name": "python", 195 | "nbconvert_exporter": "python", 196 | "pygments_lexer": "ipython3", 197 | "version": "3.5.2" 198 | } 199 | }, 200 | "nbformat": 4, 201 | "nbformat_minor": 2 202 | } 203 | -------------------------------------------------------------------------------- /decomposition/nonnegative-matrix/topic-modelling.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import re, random\n", 10 | "import numpy as np" 11 | ] 12 | }, 13 | { 14 | "cell_type": "code", 15 | "execution_count": 2, 16 | "metadata": {}, 17 | "outputs": [], 18 | "source": [ 19 | "with open('kerajaan','r') as fopen:\n", 20 | " kerajaan = list(filter(None, fopen.read().split('\\n')))" 21 | ] 22 | }, 23 | { 24 | "cell_type": "code", 25 | "execution_count": 3, 26 | "metadata": {}, 27 | "outputs": [], 28 | "source": [ 29 | "def clearstring(string):\n", 30 | " string = re.sub('[^A-Za-z0-9 ]+', '', string)\n", 31 | " string = string.split(' ')\n", 32 | " string = filter(None, string)\n", 33 | " string = [y.strip() for y in string]\n", 34 | " string = ' '.join(string)\n", 35 | " return string.lower()\n", 36 | "\n", 37 | "kerajaan = [clearstring(i) for i in kerajaan]" 38 | ] 39 | }, 40 | { 41 | "cell_type": "code", 42 | "execution_count": 5, 43 | "metadata": {}, 44 | "outputs": [], 45 | "source": [ 46 | "def penalty(M, mu):\n", 47 | " return np.where(M>=mu,0, np.min(M - mu, 0))\n", 48 | "\n", 49 | "def grads(M, W, H, lam, mu):\n", 50 | " R = W.dot(H) - M\n", 51 | " return R.dot(H.T) + penalty(W, mu)*lam, W.T.dot(R) + penalty(H, mu)*lam\n", 52 | "\n", 53 | "def upd(M, W, H, lr, lam, mu):\n", 54 | " dW,dH = grads(M,W,H,lam,mu)\n", 55 | " W -= lr*dW\n", 56 | " H -= lr*dH\n", 57 | " \n", 58 | "def tfidf(corpus):\n", 59 | " vocabulary = list(set(' '.join(corpus).split()))\n", 60 | " idf = {}\n", 61 | " for i in vocabulary:\n", 62 | " idf[i] = 0\n", 63 | " for k in corpus:\n", 64 | " if i in k.split():\n", 65 | " idf[i] += 1\n", 66 | " idf[i] = np.log(idf[i] / len(corpus))\n", 67 | " tfidf = np.zeros((len(corpus),len(vocabulary)))\n", 68 | " for no, i in enumerate(corpus):\n", 69 | " for text in i.split():\n", 70 | " tfidf[no, vocabulary.index(text)] += 1\n", 71 | " for text in i.split():\n", 72 | " tfidf[no, vocabulary.index(text)] = tfidf[no, vocabulary.index(text)] * idf[text]\n", 73 | " return vocabulary, tfidf\n", 74 | "\n", 75 | "def bow(corpus):\n", 76 | " vocabulary = list(set(' '.join(corpus).split()))\n", 77 | " bow = np.zeros((len(corpus),len(vocabulary)))\n", 78 | " for no, i in enumerate(corpus):\n", 79 | " for text in i.split():\n", 80 | " bow[no, vocabulary.index(text)] += 1\n", 81 | " return vocabulary, bow" 82 | ] 83 | }, 84 | { 85 | "cell_type": "code", 86 | "execution_count": 7, 87 | "metadata": {}, 88 | "outputs": [], 89 | "source": [ 90 | "def show_topics(corpus, count=10, k_words=10, use_tfidf=True, penalty=1e-6, learning_rate=1e-6,\n", 91 | " lam=1e3,epoch=50):\n", 92 | " if use_tfidf:\n", 93 | " vocab, vectors = tfidf(corpus)\n", 94 | " else:\n", 95 | " vocab, vectors = bow(corpus)\n", 96 | " print('vectors shape:',vectors.shape)\n", 97 | " m, n = vectors.shape\n", 98 | " W = np.abs(np.random.normal(scale=0.01, size=(m,count)))\n", 99 | " H = np.abs(np.random.normal(scale=0.01, size=(count,n)))\n", 100 | " for i in range(epoch):\n", 101 | " upd(vectors,W,H,learning_rate,lam,penalty)\n", 102 | " print('epoch:',i, W.min(), H.min())\n", 103 | " top_words = lambda t: [vocab[i] for i in np.argsort(t)[:-k_words-1:-1]]\n", 104 | " topic_words = ([top_words(t) for t in H])\n", 105 | " return [' '.join(t) for t in topic_words]" 106 | ] 107 | }, 108 | { 109 | "cell_type": "code", 110 | "execution_count": 8, 111 | "metadata": {}, 112 | "outputs": [ 113 | { 114 | "name": "stdout", 115 | "output_type": "stream", 116 | "text": [ 117 | "vectors shape: (6957, 16212)\n", 118 | "epoch: 0 -4.1044924381583935e-07 -2.90772193167458e-06\n", 119 | "epoch: 1 -7.8695576390063e-06 -6.253696457086038e-06\n", 120 | "epoch: 2 -3.4228037418379374e-05 -9.596089298466051e-06\n", 121 | "epoch: 3 -6.055980410656732e-05 -1.3333561301495036e-05\n", 122 | "epoch: 4 -8.686499916818119e-05 -2.0944640100188828e-05\n", 123 | "epoch: 5 -0.00011314376380983153 -2.8547641497971757e-05\n", 124 | "epoch: 6 -0.00013939623898103027 -3.6142573639530124e-05\n", 125 | "epoch: 7 -0.00016562256537509216 -4.3729444662373384e-05\n", 126 | "epoch: 8 -0.0001918228834300349 -5.130826269686379e-05\n", 127 | "epoch: 9 -0.00021799733332947895 -5.887903586627732e-05\n", 128 | "epoch: 10 -0.0002441460550035473 -6.64417722868597e-05\n", 129 | "epoch: 11 -0.00027026918812976373 -7.39964800678866e-05\n", 130 | "epoch: 12 -0.0002963668721339562 -8.154316731171842e-05\n", 131 | "epoch: 13 -0.00032243924619115133 -8.908184211389151e-05\n", 132 | "epoch: 14 -0.0003484864492264663 -9.661251256313463e-05\n", 133 | "epoch: 15 -0.0003745086199160063 -0.00010413518674145421\n", 134 | "epoch: 16 -0.0004005058966877604 -0.00011164987272422302\n", 135 | "epoch: 17 -0.0004264784177225016 -0.0001279288940378729\n", 136 | "epoch: 18 -0.00045242632095455104 -0.00017022687973194478\n", 137 | "epoch: 19 -0.00047834974407267184 -0.00021248205910133673\n", 138 | "epoch: 20 -0.0005042488245209668 -0.0002546946584620727\n", 139 | "epoch: 21 -0.000530123699499772 -0.00029686490371796215\n", 140 | "epoch: 22 -0.0005559745059665528 -0.00033899302036178805\n", 141 | "epoch: 23 -0.0005818013806368022 -0.000381079233476807\n", 142 | "epoch: 24 -0.0006076044599849372 -0.0004231237677380898\n", 143 | "epoch: 25 -0.0006333838802451981 -0.0004651268474141948\n", 144 | "epoch: 26 -0.0006591397774125548 -0.0005070886963683779\n", 145 | "epoch: 27 -0.0006848722872435997 -0.0005490095380599262\n", 146 | "epoch: 28 -0.0007105815452574472 -0.0005908895955454773\n", 147 | "epoch: 29 -0.0007362676867366378 -0.0006327290914810896\n", 148 | "epoch: 30 -0.0007619308467280407 -0.0006745282481228718\n", 149 | "epoch: 31 -0.0007875711600437581 -0.000716287287328728\n", 150 | "epoch: 32 -0.0008173509272152328 -0.000758006430559504\n", 151 | "epoch: 33 -0.0008513582303096573 -0.0007996858988820443\n", 152 | "epoch: 34 -0.0008853325305552274 -0.0008413259129674747\n", 153 | "epoch: 35 -0.0009192739276010804 -0.0008829266930953532\n", 154 | "epoch: 36 -0.0009531825209330819 -0.0009244884591534525\n", 155 | "epoch: 37 -0.0009870584098742618 -0.0009660114306396433\n", 156 | "epoch: 38 -0.0010209016935852234 -0.001007495826662702\n", 157 | "epoch: 39 -0.00105471247106459 -0.0010489418659462102\n", 158 | "epoch: 40 -0.0010884908411494078 -0.0010903497668260733\n", 159 | "epoch: 41 -0.0011222369025156147 -0.0011317197472553127\n", 160 | "epoch: 42 -0.001155950753678441 -0.0011730520248030844\n", 161 | "epoch: 43 -0.0011896324929928487 -0.0012143468166576255\n", 162 | "epoch: 44 -0.0012232822186539999 -0.0012556043396279525\n", 163 | "epoch: 45 -0.0012569000286976662 -0.0012968248101398491\n", 164 | "epoch: 46 -0.001290486021000694 -0.0013380084442423039\n", 165 | "epoch: 47 -0.0013240402932814404 -0.0013791554576082045\n", 166 | "epoch: 48 -0.0013575629431002358 -0.0014202660655350163\n", 167 | "epoch: 49 -0.0013910540678598402 -0.001461340482947894\n" 168 | ] 169 | }, 170 | { 171 | "data": { 172 | "text/plain": [ 173 | "['jpm acucomei tilawah aadkkedah singapore mnjadi matches gerus support sukacita',\n", 174 | " 'yef2016 chill bersinar saudara2 episod harmed rm26j pu3uampang bnm blake',\n", 175 | " 'cth peneraju two pemandngan fasal meru witnessing petrol tersebut triples',\n", 176 | " 'theyve buloh merbokjaguar nasionalfm wawrinka kepada harithiskander exert providing rencana',\n", 177 | " 'bistro prktanjongdatu betul ringankan pilatus pergigian february bakul ayaq rai',\n", 178 | " 'airforcenextgen peneraju beijingtianjin gulai perkuburan jwtn ummah sibu dvm funding',\n", 179 | " 'peneraju cleanliness stiap ahead lembu pangkalan beserta computers insan denyutan',\n", 180 | " 'almari 754 cities programs 132 penyelenggaraan penerangan totally vivekananda simptom',\n", 181 | " 'declaration 012017 akademi jkpd card perlis ditetapkan 4852 dipandu sai',\n", 182 | " 'padi tudung didahului herman mbm jannahhishammuddinh2o tapah hartanah sebat jadilah']" 183 | ] 184 | }, 185 | "execution_count": 8, 186 | "metadata": {}, 187 | "output_type": "execute_result" 188 | } 189 | ], 190 | "source": [ 191 | "show_topics(kerajaan)" 192 | ] 193 | }, 194 | { 195 | "cell_type": "code", 196 | "execution_count": null, 197 | "metadata": {}, 198 | "outputs": [], 199 | "source": [] 200 | } 201 | ], 202 | "metadata": { 203 | "kernelspec": { 204 | "display_name": "Python 3", 205 | "language": "python", 206 | "name": "python3" 207 | }, 208 | "language_info": { 209 | "codemirror_mode": { 210 | "name": "ipython", 211 | "version": 3 212 | }, 213 | "file_extension": ".py", 214 | "mimetype": "text/x-python", 215 | "name": "python", 216 | "nbconvert_exporter": "python", 217 | "pygments_lexer": "ipython3", 218 | "version": "3.5.2" 219 | } 220 | }, 221 | "nbformat": 4, 222 | "nbformat_minor": 2 223 | } 224 | -------------------------------------------------------------------------------- /decomposition/tsne/animation-tsne-iris.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huseinzol05/Machine-Learning-Numpy/ae8f9266a87ffd3f67471a97bf183b82740b7deb/decomposition/tsne/animation-tsne-iris.gif -------------------------------------------------------------------------------- /decomposition/tsne/animation-tsne-perplexity-iris.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huseinzol05/Machine-Learning-Numpy/ae8f9266a87ffd3f67471a97bf183b82740b7deb/decomposition/tsne/animation-tsne-perplexity-iris.gif -------------------------------------------------------------------------------- /gradient-visualization/mae_plot.py: -------------------------------------------------------------------------------- 1 | import matplotlib.pyplot as plt 2 | import seaborn as sns 3 | import numpy as np 4 | sns.set() 5 | 6 | def subplot_evolution_strategies(step, learning_rate, sigma, population_size, 7 | x_boundary = 1, y_boundary = 2, 8 | step_x = 20, step_y = 50, midpoint = 0, ax=None): 9 | if ax is None: 10 | ax = plt.gca() 11 | x = np.linspace(-x_boundary,x_boundary,step_x) 12 | y = midpoint * x 13 | 14 | def mean_abs_error(theta): 15 | theta = np.atleast_2d(np.asarray(theta)) 16 | return np.mean(np.abs(y-hypothesis(x, theta)), axis=1) 17 | 18 | def hypothesis(x, theta): 19 | return theta * x 20 | 21 | theta_grid = np.linspace(-y_boundary,y_boundary,step_y) 22 | J_grid = mean_abs_error(theta_grid[:,np.newaxis]) 23 | 24 | ax.plot(theta_grid, J_grid) 25 | theta = [-y_boundary] 26 | J = [mean_abs_error(theta[0])[0]] 27 | strings = 'X-axis steps:\n\n' 28 | for j in range(step-1): 29 | last_theta = theta[-1] 30 | random_weight = np.random.randn(population_size, step_x) 31 | population = np.zeros(population_size) 32 | for l in range(population_size): 33 | w_try = last_theta + sigma * random_weight[l] 34 | population[l] = -mean_abs_error(w_try) 35 | A = (population - np.mean(population)) / np.std(population) 36 | current_theta = last_theta + learning_rate * np.mean((population_size * sigma) * np.dot(random_weight.T, A)) 37 | strings += str(current_theta) + '\n' 38 | theta.append(current_theta) 39 | J.append(mean_abs_error(current_theta)[0]) 40 | colors = sns.color_palette("husl", step) 41 | for j in range(1,step): 42 | ax.annotate('', xy=(theta[j], J[j]), xytext=(theta[j-1], J[j-1]), arrowprops={'arrowstyle': '->', 'color': 'r', 'lw': 1},va='center', ha='center') 43 | ax.scatter(theta, J, c=colors, s=40, lw=0) 44 | ax.set_xlabel(r'$\theta_1$') 45 | ax.set_ylabel(r'$J(\theta_1)$') 46 | ax.set_title('MAE function on Evolution Strategies') 47 | return ax 48 | 49 | def subplot_gradient_descent(step, learning_rate, technique, 50 | x_boundary = 1, y_boundary = 2, 51 | momentum = 0.9, rho = 0.9, epsilon = 1e-8, 52 | b1 = 0.9, b2 = 0.999, 53 | step_x = 20, step_y = 50, midpoint = 0, ax=None): 54 | if ax is None: 55 | ax = plt.gca() 56 | x = np.linspace(-x_boundary,x_boundary,step_x) 57 | y = midpoint * x 58 | 59 | def mean_abs_error(theta): 60 | theta = np.atleast_2d(np.asarray(theta)) 61 | return np.mean(np.abs(y-hypothesis(x, theta)), axis=1) 62 | 63 | def hypothesis(x, theta): 64 | return theta * x 65 | 66 | theta_grid = np.linspace(-y_boundary,y_boundary,step_y) 67 | J_grid = mean_abs_error(theta_grid[:,np.newaxis]) 68 | 69 | ax.plot(theta_grid, J_grid) 70 | theta = [-y_boundary] 71 | J = [mean_abs_error(theta[0])[0]] 72 | strings = 'X-axis steps:\n\n' 73 | velocity = np.zeros((1)) 74 | second_velocity = np.zeros((1)) 75 | 76 | for j in range(step-1): 77 | last_theta = theta[-1] 78 | if technique == 'gradient descent': 79 | gradient = np.sum(np.sign(hypothesis(x, last_theta) - y) * x) 80 | current_theta = last_theta - learning_rate * gradient 81 | elif technique == 'momentum': 82 | gradient = np.sum(np.sign(hypothesis(x, last_theta) - y) * x) 83 | velocity = velocity * momentum + learning_rate * gradient 84 | current_theta = last_theta - velocity 85 | elif technique == 'nesterov': 86 | gradient = np.sum(np.sign(hypothesis(x, last_theta - momentum * velocity) - y) * x) 87 | velocity = velocity * momentum + learning_rate * gradient 88 | current_theta = last_theta - velocity 89 | elif technique == 'adagrad': 90 | gradient = np.sum(np.sign(hypothesis(x, last_theta) - y) * x) 91 | velocity += np.square(gradient) 92 | current_theta = last_theta - learning_rate * gradient / np.sqrt(velocity + epsilon) 93 | elif technique == 'rmsprop': 94 | gradient = np.sum(np.sign(hypothesis(x, last_theta) - y) * x) 95 | velocity += rho * velocity + (1 - rho) * np.square(gradient) 96 | current_theta = last_theta - learning_rate * gradient / np.sqrt(velocity + epsilon) 97 | elif technique == 'adam': 98 | gradient = np.sum(np.sign(hypothesis(x, last_theta) - y) * x) 99 | velocity += b1 * velocity + (1-b1) * gradient 100 | second_velocity += b2 * second_velocity + (1-b2) * np.square(gradient) 101 | velocity_hat = velocity / (1-b1) 102 | second_velocity_hat = second_velocity / (1-b2) 103 | current_theta = learning_rate * velocity_hat / np.sqrt(second_velocity_hat + epsilon) 104 | else: 105 | raise Exception('Invalid optimizer') 106 | strings += str(current_theta) + '\n' 107 | theta.append(current_theta) 108 | J.append(mean_abs_error(current_theta)[0]) 109 | colors = sns.color_palette("husl", step) 110 | for j in range(1,step): 111 | ax.annotate('', xy=(theta[j], J[j]), xytext=(theta[j-1], J[j-1]), arrowprops={'arrowstyle': '->', 'color': 'r', 'lw': 1},va='center', ha='center') 112 | ax.scatter(theta, J, c=colors, s=40, lw=0) 113 | ax.set_xlabel(r'$\theta_1$') 114 | ax.set_ylabel(r'$J(\theta_1)$') 115 | ax.set_title('MAE function on %s Optimizer'%(technique)) 116 | return ax -------------------------------------------------------------------------------- /gradient-visualization/mse_plot.py: -------------------------------------------------------------------------------- 1 | import matplotlib.pyplot as plt 2 | import seaborn as sns 3 | import numpy as np 4 | sns.set() 5 | 6 | def subplot_evolution_strategies(step, learning_rate, sigma, population_size, 7 | x_boundary = 1, y_boundary = 2, 8 | step_x = 20, step_y = 50, midpoint = 0, ax=None): 9 | if ax is None: 10 | ax = plt.gca() 11 | x = np.linspace(-x_boundary,x_boundary,step_x) 12 | y = midpoint * x 13 | 14 | def mean_square_error(theta): 15 | theta = np.atleast_2d(np.asarray(theta)) 16 | return np.mean((y-hypothesis(x, theta))**2, axis=1) 17 | 18 | def hypothesis(x, theta): 19 | return theta * x 20 | 21 | theta_grid = np.linspace(-y_boundary,y_boundary,step_y) 22 | J_grid = mean_square_error(theta_grid[:,np.newaxis]) 23 | 24 | ax.plot(theta_grid, J_grid) 25 | theta = [-y_boundary] 26 | J = [mean_square_error(theta[0])[0]] 27 | strings = 'X-axis steps:\n\n' 28 | for j in range(step-1): 29 | last_theta = theta[-1] 30 | random_weight = np.random.randn(population_size, step_x) 31 | population = np.zeros(population_size) 32 | for l in range(population_size): 33 | w_try = last_theta + sigma * random_weight[l] 34 | population[l] = -mean_square_error(w_try) 35 | A = (population - np.mean(population)) / np.std(population) 36 | current_theta = last_theta + learning_rate * np.mean((population_size * sigma) * np.dot(random_weight.T, A)) 37 | strings += str(current_theta) + '\n' 38 | theta.append(current_theta) 39 | J.append(mean_square_error(current_theta)[0]) 40 | colors = sns.color_palette("husl", step) 41 | for j in range(1,step): 42 | ax.annotate('', xy=(theta[j], J[j]), xytext=(theta[j-1], J[j-1]), arrowprops={'arrowstyle': '->', 'color': 'r', 'lw': 1},va='center', ha='center') 43 | ax.scatter(theta, J, c=colors, s=40, lw=0) 44 | ax.set_xlabel(r'$\theta_1$') 45 | ax.set_ylabel(r'$J(\theta_1)$') 46 | ax.set_title('MSE function on Evolution Strategies') 47 | return ax 48 | 49 | def subplot_gradient_descent(step, learning_rate, technique, 50 | x_boundary = 1, y_boundary = 2, 51 | momentum = 0.9, rho = 0.9, epsilon = 1e-8, 52 | b1 = 0.9, b2 = 0.999, 53 | step_x = 20, step_y = 50, midpoint = 0, ax=None): 54 | if ax is None: 55 | ax = plt.gca() 56 | x = np.linspace(-x_boundary,x_boundary,step_x) 57 | y = midpoint * x 58 | 59 | def mean_square_error(theta): 60 | theta = np.atleast_2d(np.asarray(theta)) 61 | return np.mean((y-hypothesis(x, theta))**2, axis=1) 62 | 63 | def hypothesis(x, theta): 64 | return theta * x 65 | 66 | theta_grid = np.linspace(-y_boundary,y_boundary,step_y) 67 | J_grid = mean_square_error(theta_grid[:,np.newaxis]) 68 | 69 | ax.plot(theta_grid, J_grid) 70 | theta = [-y_boundary] 71 | J = [mean_square_error(theta[0])[0]] 72 | strings = 'X-axis steps:\n\n' 73 | velocity = np.zeros((1)) 74 | second_velocity = np.zeros((1)) 75 | 76 | for j in range(step-1): 77 | last_theta = theta[-1] 78 | if technique == 'gradient descent': 79 | gradient = np.sum(2*(hypothesis(x, last_theta) - y) * x) 80 | current_theta = last_theta - learning_rate * gradient 81 | elif technique == 'momentum': 82 | gradient = np.sum(2*(hypothesis(x, last_theta) - y) * x) 83 | velocity = velocity * momentum + learning_rate * gradient 84 | current_theta = last_theta - velocity 85 | elif technique == 'nesterov': 86 | gradient = np.sum(2*(hypothesis(x, last_theta - momentum * velocity) - y) * x) 87 | velocity = velocity * momentum + learning_rate * gradient 88 | current_theta = last_theta - velocity 89 | elif technique == 'adagrad': 90 | gradient = np.sum(2*(hypothesis(x, last_theta) - y) * x) 91 | velocity += np.square(gradient) 92 | current_theta = last_theta - learning_rate * gradient / np.sqrt(velocity + epsilon) 93 | elif technique == 'rmsprop': 94 | gradient = np.sum(2*(hypothesis(x, last_theta) - y) * x) 95 | velocity += rho * velocity + (1 - rho) * np.square(gradient) 96 | current_theta = last_theta - learning_rate * gradient / np.sqrt(velocity + epsilon) 97 | elif technique == 'adam': 98 | gradient = np.sum(2*(hypothesis(x, last_theta) - y) * x) 99 | velocity += b1 * velocity + (1-b1) * gradient 100 | second_velocity += b2 * second_velocity + (1-b2) * np.square(gradient) 101 | velocity_hat = velocity / (1-b1) 102 | second_velocity_hat = second_velocity / (1-b2) 103 | current_theta = learning_rate * velocity_hat / np.sqrt(second_velocity_hat + epsilon) 104 | else: 105 | raise Exception('Invalid optimizer') 106 | strings += str(current_theta) + '\n' 107 | theta.append(current_theta) 108 | J.append(mean_square_error(current_theta)[0]) 109 | colors = sns.color_palette("husl", step) 110 | for j in range(1,step): 111 | ax.annotate('', xy=(theta[j], J[j]), xytext=(theta[j-1], J[j-1]), arrowprops={'arrowstyle': '->', 'color': 'r', 'lw': 1},va='center', ha='center') 112 | ax.scatter(theta, J, c=colors, s=40, lw=0) 113 | ax.set_xlabel(r'$\theta_1$') 114 | ax.set_ylabel(r'$J(\theta_1)$') 115 | ax.set_title('MSE function on %s Optimizer'%(technique)) 116 | return ax -------------------------------------------------------------------------------- /gradient-visualization/rmse_plot.py: -------------------------------------------------------------------------------- 1 | import matplotlib.pyplot as plt 2 | import seaborn as sns 3 | import numpy as np 4 | sns.set() 5 | 6 | def subplot_evolution_strategies(step, learning_rate, sigma, population_size, 7 | x_boundary = 1, y_boundary = 2, 8 | step_x = 20, step_y = 50, midpoint = 0, ax=None): 9 | if ax is None: 10 | ax = plt.gca() 11 | x = np.linspace(-x_boundary,x_boundary,step_x) 12 | y = midpoint * x 13 | 14 | def root_mean_square_error(theta): 15 | theta = np.atleast_2d(np.asarray(theta)) 16 | return np.sqrt(np.mean((y-hypothesis(x, theta))**2, axis=1)) 17 | 18 | def hypothesis(x, theta): 19 | return theta * x 20 | 21 | theta_grid = np.linspace(-y_boundary,y_boundary,step_y) 22 | J_grid = root_mean_square_error(theta_grid[:,np.newaxis]) 23 | 24 | ax.plot(theta_grid, J_grid) 25 | theta = [-y_boundary] 26 | J = [root_mean_square_error(theta[0])[0]] 27 | strings = 'X-axis steps:\n\n' 28 | for j in range(step-1): 29 | last_theta = theta[-1] 30 | random_weight = np.random.randn(population_size, step_x) 31 | population = np.zeros(population_size) 32 | for l in range(population_size): 33 | w_try = last_theta + sigma * random_weight[l] 34 | population[l] = -root_mean_square_error(w_try) 35 | A = (population - np.mean(population)) / np.std(population) 36 | current_theta = last_theta + learning_rate * np.mean((population_size * sigma) * np.dot(random_weight.T, A)) 37 | strings += str(current_theta) + '\n' 38 | theta.append(current_theta) 39 | J.append(root_mean_square_error(current_theta)[0]) 40 | colors = sns.color_palette("husl", step) 41 | for j in range(1,step): 42 | ax.annotate('', xy=(theta[j], J[j]), xytext=(theta[j-1], J[j-1]), arrowprops={'arrowstyle': '->', 'color': 'r', 'lw': 1},va='center', ha='center') 43 | ax.scatter(theta, J, c=colors, s=40, lw=0) 44 | ax.set_xlabel(r'$\theta_1$') 45 | ax.set_ylabel(r'$J(\theta_1)$') 46 | ax.set_title('RMSE function on Evolution Strategies') 47 | return ax 48 | 49 | def subplot_gradient_descent(step, learning_rate, technique, 50 | x_boundary = 1, y_boundary = 2, 51 | momentum = 0.9, rho = 0.9, epsilon = 1e-8, 52 | b1 = 0.9, b2 = 0.999, 53 | step_x = 20, step_y = 50, midpoint = 0, ax=None): 54 | if ax is None: 55 | ax = plt.gca() 56 | x = np.linspace(-x_boundary,x_boundary,step_x) 57 | y = midpoint * x 58 | 59 | def root_mean_square_error(theta): 60 | theta = np.atleast_2d(np.asarray(theta)) 61 | return np.sqrt(np.mean((y-hypothesis(x, theta))**2, axis=1)) 62 | 63 | def hypothesis(x, theta): 64 | return theta * x 65 | 66 | theta_grid = np.linspace(-y_boundary,y_boundary,step_y) 67 | J_grid = root_mean_square_error(theta_grid[:,np.newaxis]) 68 | 69 | ax.plot(theta_grid, J_grid) 70 | theta = [-y_boundary] 71 | J = [root_mean_square_error(theta[0])[0]] 72 | strings = 'X-axis steps:\n\n' 73 | velocity = np.zeros((1)) 74 | second_velocity = np.zeros((1)) 75 | 76 | for j in range(step-1): 77 | last_theta = theta[-1] 78 | if technique == 'gradient descent': 79 | gradient = np.sum(np.sign((hypothesis(x, last_theta) - y)) * x) 80 | current_theta = last_theta - learning_rate * gradient 81 | elif technique == 'momentum': 82 | gradient = np.sum(np.sign((hypothesis(x, last_theta) - y)) * x) 83 | velocity = velocity * momentum + learning_rate * gradient 84 | current_theta = last_theta - velocity 85 | elif technique == 'nesterov': 86 | gradient = np.sum(np.sign((hypothesis(x, last_theta - momentum * velocity) - y)) * x) 87 | velocity = velocity * momentum + learning_rate * gradient 88 | current_theta = last_theta - velocity 89 | elif technique == 'adagrad': 90 | gradient = np.sum(np.sign((hypothesis(x, last_theta) - y)) * x) 91 | velocity += np.square(gradient) 92 | current_theta = last_theta - learning_rate * gradient / np.sqrt(velocity + epsilon) 93 | elif technique == 'rmsprop': 94 | gradient = np.sum(((hypothesis(x, last_theta) - y)) * x) 95 | velocity += rho * velocity + (1 - rho) * np.square(gradient) 96 | current_theta = last_theta - learning_rate * gradient / np.sqrt(velocity + epsilon) 97 | elif technique == 'adam': 98 | gradient = np.sum(np.sign((hypothesis(x, last_theta) - y)) * x) 99 | velocity += b1 * velocity + (1-b1) * gradient 100 | second_velocity += b2 * second_velocity + (1-b2) * np.square(gradient) 101 | velocity_hat = velocity / (1-b1) 102 | second_velocity_hat = second_velocity / (1-b2) 103 | current_theta = learning_rate * velocity_hat / np.sqrt(second_velocity_hat + epsilon) 104 | else: 105 | raise Exception('Invalid optimizer') 106 | strings += str(current_theta) + '\n' 107 | theta.append(current_theta) 108 | J.append(root_mean_square_error(current_theta)[0]) 109 | colors = sns.color_palette("husl", step) 110 | for j in range(1,step): 111 | ax.annotate('', xy=(theta[j], J[j]), xytext=(theta[j-1], J[j-1]), arrowprops={'arrowstyle': '->', 'color': 'r', 'lw': 1},va='center', ha='center') 112 | ax.scatter(theta, J, c=colors, s=40, lw=0) 113 | ax.set_xlabel(r'$\theta_1$') 114 | ax.set_ylabel(r'$J(\theta_1)$') 115 | ax.set_title('RMSE function on %s Optimizer'%(technique)) 116 | return ax -------------------------------------------------------------------------------- /neural-network/batch-normalization/animation-batch-normalization-iris.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huseinzol05/Machine-Learning-Numpy/ae8f9266a87ffd3f67471a97bf183b82740b7deb/neural-network/batch-normalization/animation-batch-normalization-iris.gif -------------------------------------------------------------------------------- /neural-network/convolutional/README.md: -------------------------------------------------------------------------------- 1 | ## How to train in your local machine 2 | 3 | 1. Unzip audio.zip to current directory 4 | 2. change any location string to your own directory 5 | ```python 6 | # please change to yours 7 | audio_location = os.listdir('/home/husein/Desktop/convolutional-neural-network/audio') 8 | ``` 9 | 10 | ### You can read more implementation of conv2d [here](https://gist.github.com/huseinzol05/472d51d356b88195d1d00984cb774559) 11 | ### You can read more implementation of atrous2d [here](https://gist.github.com/huseinzol05/c1ef7ed1c7e3afcfe14bcc361689dfba) 12 | -------------------------------------------------------------------------------- /neural-network/convolutional/atrous1d-loop/atrous-1d.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "What is atrous convolution? atrous is a french word, means hole.\n", 8 | "\n", 9 | "![alt text](http://liangchiehchen.com/fig/deeplab_aspp.jpg)" 10 | ] 11 | }, 12 | { 13 | "cell_type": "code", 14 | "execution_count": 6, 15 | "metadata": {}, 16 | "outputs": [], 17 | "source": [ 18 | "import numpy as np" 19 | ] 20 | }, 21 | { 22 | "cell_type": "code", 23 | "execution_count": 7, 24 | "metadata": {}, 25 | "outputs": [ 26 | { 27 | "data": { 28 | "text/plain": [ 29 | "array([ 0., 0., 0.])" 30 | ] 31 | }, 32 | "execution_count": 7, 33 | "metadata": {}, 34 | "output_type": "execute_result" 35 | } 36 | ], 37 | "source": [ 38 | "x = np.zeros((3))\n", 39 | "rate = 2\n", 40 | "x" 41 | ] 42 | }, 43 | { 44 | "cell_type": "code", 45 | "execution_count": 8, 46 | "metadata": {}, 47 | "outputs": [ 48 | { 49 | "data": { 50 | "text/plain": [ 51 | "array([ 1., 1., 1., 1., 1.])" 52 | ] 53 | }, 54 | "execution_count": 8, 55 | "metadata": {}, 56 | "output_type": "execute_result" 57 | } 58 | ], 59 | "source": [ 60 | "atrous = np.ones(np.array(x.shape) + rate)\n", 61 | "atrous" 62 | ] 63 | }, 64 | { 65 | "cell_type": "code", 66 | "execution_count": 9, 67 | "metadata": {}, 68 | "outputs": [ 69 | { 70 | "data": { 71 | "text/plain": [ 72 | "array([ 0., 1., 0., 1., 0.])" 73 | ] 74 | }, 75 | "execution_count": 9, 76 | "metadata": {}, 77 | "output_type": "execute_result" 78 | } 79 | ], 80 | "source": [ 81 | "for i in range(0, atrous.shape[0], rate //2+1):\n", 82 | " atrous[i] = atrous[i] * x[int(i/rate/2)+1]\n", 83 | "atrous" 84 | ] 85 | }, 86 | { 87 | "cell_type": "code", 88 | "execution_count": 10, 89 | "metadata": {}, 90 | "outputs": [], 91 | "source": [ 92 | "x = np.random.rand(1,7,3)\n", 93 | "kernel = np.random.rand(3,3,7)\n", 94 | "filter_size = kernel.shape[0]\n", 95 | "stride = 2\n", 96 | "rate = 2" 97 | ] 98 | }, 99 | { 100 | "cell_type": "code", 101 | "execution_count": 11, 102 | "metadata": {}, 103 | "outputs": [], 104 | "source": [ 105 | "def padding(x, filter_size, pad='SAME'):\n", 106 | " if pad == 'SAME':\n", 107 | " pad_h_min = int(np.floor((filter_size - 1)/2))\n", 108 | " pad_h_max = int(np.ceil((filter_size - 1)/2))\n", 109 | " pad_h = (pad_h_min, pad_h_max)\n", 110 | " return np.pad(x, ((0, 0), pad_h, (0, 0)), mode='constant')\n", 111 | " else:\n", 112 | " return x\n", 113 | " \n", 114 | "def get_shape(x):\n", 115 | " output_height = int(np.ceil((x.shape[1] - rate * (filter_size-1)) / stride) + 1)\n", 116 | " return int(output_height)" 117 | ] 118 | }, 119 | { 120 | "cell_type": "code", 121 | "execution_count": 12, 122 | "metadata": {}, 123 | "outputs": [ 124 | { 125 | "data": { 126 | "text/plain": [ 127 | "(1, 4, 7)" 128 | ] 129 | }, 130 | "execution_count": 12, 131 | "metadata": {}, 132 | "output_type": "execute_result" 133 | } 134 | ], 135 | "source": [ 136 | "x_padded = padding(x, filter_size)\n", 137 | "h = get_shape(x_padded)\n", 138 | "out_atrous = np.zeros((1, h, kernel.shape[2]))\n", 139 | "out_atrous.shape" 140 | ] 141 | }, 142 | { 143 | "cell_type": "code", 144 | "execution_count": 15, 145 | "metadata": {}, 146 | "outputs": [], 147 | "source": [ 148 | "def atrous(x, w):\n", 149 | " for i in range(0, x.shape[0], rate //2+1):\n", 150 | " x[i,:] = x[i,:] * w[int(i/rate/2)+1,:]\n", 151 | " return x\n", 152 | "\n", 153 | "def conv(x, w, out):\n", 154 | " for k in range(x.shape[0]):\n", 155 | " for z in range(w.shape[2]):\n", 156 | " h_range = int(np.ceil((x.shape[1] - rate * (filter_size-1)) / stride) + 1)\n", 157 | " for _h in range(h_range):\n", 158 | " atroused = atrous(x[k, _h * stride:_h * stride + filter_size + rate, :], w[:, :, z])\n", 159 | " out[k, _h, z] = np.sum(atroused)\n", 160 | " return out" 161 | ] 162 | }, 163 | { 164 | "cell_type": "code", 165 | "execution_count": 17, 166 | "metadata": {}, 167 | "outputs": [ 168 | { 169 | "data": { 170 | "text/plain": [ 171 | "(1, 4, 7)" 172 | ] 173 | }, 174 | "execution_count": 17, 175 | "metadata": {}, 176 | "output_type": "execute_result" 177 | } 178 | ], 179 | "source": [ 180 | "out_atrous = conv(x_padded, kernel, out_atrous)\n", 181 | "out_atrous.shape" 182 | ] 183 | }, 184 | { 185 | "cell_type": "code", 186 | "execution_count": 18, 187 | "metadata": {}, 188 | "outputs": [], 189 | "source": [ 190 | "def deatrous_w(x, w, de):\n", 191 | " for i in range(0, x.shape[0], rate //2+1):\n", 192 | " w[int(i/rate/2)+1,:] = np.sum(x[i,:] * de[i,:])\n", 193 | " return w\n", 194 | "\n", 195 | "def deconv_w(x, w, de):\n", 196 | " for k in range(x.shape[0]):\n", 197 | " for z in range(w.shape[2]):\n", 198 | " h_range = int(np.ceil((x.shape[1] - rate * (filter_size-1)) / stride) + 1)\n", 199 | " for _h in range(h_range):\n", 200 | " weighted = deatrous_w(x[k, _h * stride:_h * stride + filter_size + rate, :], w[:, :, z],\n", 201 | " de[k, _h * stride:_h * stride + filter_size + rate, :])\n", 202 | " w[:, :, z] = weighted\n", 203 | " return w\n", 204 | "\n", 205 | "def deconv_x(x, w, de):\n", 206 | " for k in range(x.shape[0]):\n", 207 | " for z in range(x.shape[2]):\n", 208 | " h_range = int(np.ceil((x.shape[1] - rate * (filter_size-1)) / stride) + 1)\n", 209 | " for _h in range(h_range):\n", 210 | " atroused = atrous(de[k, _h * stride:_h * stride + filter_size + rate, :], w[:, z, :])\n", 211 | " x[k, _h, z] = np.sum(atroused)\n", 212 | " return x" 213 | ] 214 | }, 215 | { 216 | "cell_type": "code", 217 | "execution_count": 19, 218 | "metadata": {}, 219 | "outputs": [ 220 | { 221 | "data": { 222 | "text/plain": [ 223 | "(3, 3, 7)" 224 | ] 225 | }, 226 | "execution_count": 19, 227 | "metadata": {}, 228 | "output_type": "execute_result" 229 | } 230 | ], 231 | "source": [ 232 | "dkernel = np.zeros(kernel.shape)\n", 233 | "deconv_w(out_atrous, dkernel, out_atrous).shape" 234 | ] 235 | }, 236 | { 237 | "cell_type": "code", 238 | "execution_count": 20, 239 | "metadata": {}, 240 | "outputs": [ 241 | { 242 | "data": { 243 | "text/plain": [ 244 | "(1, 7, 3)" 245 | ] 246 | }, 247 | "execution_count": 20, 248 | "metadata": {}, 249 | "output_type": "execute_result" 250 | } 251 | ], 252 | "source": [ 253 | "dx = np.zeros(x.shape)\n", 254 | "deconv_x(dx, kernel, out_atrous).shape" 255 | ] 256 | }, 257 | { 258 | "cell_type": "code", 259 | "execution_count": null, 260 | "metadata": {}, 261 | "outputs": [], 262 | "source": [] 263 | } 264 | ], 265 | "metadata": { 266 | "kernelspec": { 267 | "display_name": "Python 3", 268 | "language": "python", 269 | "name": "python3" 270 | }, 271 | "language_info": { 272 | "codemirror_mode": { 273 | "name": "ipython", 274 | "version": 3 275 | }, 276 | "file_extension": ".py", 277 | "mimetype": "text/x-python", 278 | "name": "python", 279 | "nbconvert_exporter": "python", 280 | "pygments_lexer": "ipython3", 281 | "version": "3.5.2" 282 | } 283 | }, 284 | "nbformat": 4, 285 | "nbformat_minor": 2 286 | } 287 | -------------------------------------------------------------------------------- /neural-network/convolutional/atrous2d-loop/atrous-2d.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "What is atrous convolution? atrous is a french word, means hole.\n", 8 | "\n", 9 | "![alt text](http://liangchiehchen.com/fig/deeplab_aspp.jpg)" 10 | ] 11 | }, 12 | { 13 | "cell_type": "code", 14 | "execution_count": 1, 15 | "metadata": {}, 16 | "outputs": [], 17 | "source": [ 18 | "import numpy as np" 19 | ] 20 | }, 21 | { 22 | "cell_type": "code", 23 | "execution_count": 2, 24 | "metadata": {}, 25 | "outputs": [ 26 | { 27 | "data": { 28 | "text/plain": [ 29 | "array([[ 0., 0., 0.],\n", 30 | " [ 0., 0., 0.],\n", 31 | " [ 0., 0., 0.]])" 32 | ] 33 | }, 34 | "execution_count": 2, 35 | "metadata": {}, 36 | "output_type": "execute_result" 37 | } 38 | ], 39 | "source": [ 40 | "x = np.zeros((3,3))\n", 41 | "rate = 2\n", 42 | "x" 43 | ] 44 | }, 45 | { 46 | "cell_type": "code", 47 | "execution_count": 3, 48 | "metadata": {}, 49 | "outputs": [ 50 | { 51 | "data": { 52 | "text/plain": [ 53 | "array([[ 1., 1., 1., 1., 1.],\n", 54 | " [ 1., 1., 1., 1., 1.],\n", 55 | " [ 1., 1., 1., 1., 1.],\n", 56 | " [ 1., 1., 1., 1., 1.],\n", 57 | " [ 1., 1., 1., 1., 1.]])" 58 | ] 59 | }, 60 | "execution_count": 3, 61 | "metadata": {}, 62 | "output_type": "execute_result" 63 | } 64 | ], 65 | "source": [ 66 | "atrous = np.ones(np.array(x.shape) + rate)\n", 67 | "atrous" 68 | ] 69 | }, 70 | { 71 | "cell_type": "code", 72 | "execution_count": 4, 73 | "metadata": {}, 74 | "outputs": [ 75 | { 76 | "data": { 77 | "text/plain": [ 78 | "array([[ 0., 1., 0., 1., 0.],\n", 79 | " [ 1., 1., 1., 1., 1.],\n", 80 | " [ 0., 1., 0., 1., 0.],\n", 81 | " [ 1., 1., 1., 1., 1.],\n", 82 | " [ 0., 1., 0., 1., 0.]])" 83 | ] 84 | }, 85 | "execution_count": 4, 86 | "metadata": {}, 87 | "output_type": "execute_result" 88 | } 89 | ], 90 | "source": [ 91 | "for i in range(0, atrous.shape[0], rate //2+1):\n", 92 | " for k in range(0, atrous.shape[1], rate // 2+1):\n", 93 | " atrous[i,k] = atrous[i,k] * x[int(i/rate/2)+1,int(k/rate/2)+1]\n", 94 | "atrous" 95 | ] 96 | }, 97 | { 98 | "cell_type": "code", 99 | "execution_count": 6, 100 | "metadata": {}, 101 | "outputs": [], 102 | "source": [ 103 | "x = np.random.rand(1,7,7,3)\n", 104 | "kernel = np.random.rand(3,3,3,7)\n", 105 | "filter_size = kernel.shape[0]\n", 106 | "stride = 2\n", 107 | "rate = 2" 108 | ] 109 | }, 110 | { 111 | "cell_type": "code", 112 | "execution_count": 8, 113 | "metadata": {}, 114 | "outputs": [], 115 | "source": [ 116 | "def padding(x, filter_size, pad='SAME'):\n", 117 | " if pad == 'SAME':\n", 118 | " pad_h_min = int(np.floor((filter_size - 1)/2))\n", 119 | " pad_h_max = int(np.ceil((filter_size - 1)/2))\n", 120 | " pad_w_min = int(np.floor((filter_size - 1)/2))\n", 121 | " pad_w_max = int(np.ceil((filter_size - 1)/2))\n", 122 | " pad_h, pad_w = (pad_h_min, pad_h_max), (pad_w_min, pad_w_max)\n", 123 | " return np.pad(x, ((0, 0), pad_h, pad_w, (0, 0)), mode='constant')\n", 124 | " else:\n", 125 | " return x\n", 126 | " \n", 127 | "def get_shape(x):\n", 128 | " output_height = int(np.ceil((x.shape[1] - rate * (filter_size-1)) / stride) + 1)\n", 129 | " output_width = int(np.ceil((x.shape[2] - rate * (filter_size-1)) / stride) + 1)\n", 130 | " return int(output_height), int(output_width)" 131 | ] 132 | }, 133 | { 134 | "cell_type": "code", 135 | "execution_count": 9, 136 | "metadata": {}, 137 | "outputs": [ 138 | { 139 | "data": { 140 | "text/plain": [ 141 | "(1, 4, 4, 7)" 142 | ] 143 | }, 144 | "execution_count": 9, 145 | "metadata": {}, 146 | "output_type": "execute_result" 147 | } 148 | ], 149 | "source": [ 150 | "x_padded = padding(x, filter_size)\n", 151 | "h, w = get_shape(x_padded)\n", 152 | "out_atrous = np.zeros((1, h, w, kernel.shape[3]))\n", 153 | "out_atrous.shape" 154 | ] 155 | }, 156 | { 157 | "cell_type": "code", 158 | "execution_count": 11, 159 | "metadata": {}, 160 | "outputs": [], 161 | "source": [ 162 | "def atrous(x, w):\n", 163 | " for i in range(0, x.shape[0], rate //2+1):\n", 164 | " for k in range(0, x.shape[1], rate // 2+1):\n", 165 | " x[i,k,:] = x[i,k,:] * w[int(i/rate/2)+1,int(k/rate/2)+1,:]\n", 166 | " return x\n", 167 | "\n", 168 | "def conv(x, w, out):\n", 169 | " for k in range(x.shape[0]):\n", 170 | " for z in range(w.shape[3]):\n", 171 | " h_range = int(np.ceil((x.shape[1] - rate * (filter_size-1)) / stride) + 1)\n", 172 | " for _h in range(h_range):\n", 173 | " w_range = int(np.ceil((x.shape[2] - rate * (filter_size-1)) / stride) + 1)\n", 174 | " for _w in range(w_range):\n", 175 | " atroused = atrous(x[k, \n", 176 | " _h * stride:_h * stride + filter_size + rate, \n", 177 | " _w * stride:_w * stride + filter_size + rate, :],\n", 178 | " w[:, :, :, z])\n", 179 | " out[k, _h, _w, z] = np.sum(atroused)\n", 180 | " return out" 181 | ] 182 | }, 183 | { 184 | "cell_type": "code", 185 | "execution_count": 15, 186 | "metadata": {}, 187 | "outputs": [], 188 | "source": [ 189 | "out_atrous = conv(x_padded, kernel, out_atrous)" 190 | ] 191 | }, 192 | { 193 | "cell_type": "code", 194 | "execution_count": 33, 195 | "metadata": {}, 196 | "outputs": [], 197 | "source": [ 198 | "def deatrous_w(x, w, de):\n", 199 | " for i in range(0, x.shape[0], rate //2+1):\n", 200 | " for k in range(0, x.shape[1], rate // 2+1):\n", 201 | " w[int(i/rate/2)+1,int(k/rate/2)+1,:] = np.sum(x[i,k,:] * de[i,k,:])\n", 202 | " return w\n", 203 | "\n", 204 | "def deconv_w(x, w, de):\n", 205 | " for k in range(x.shape[0]):\n", 206 | " for z in range(w.shape[3]):\n", 207 | " h_range = int(np.ceil((x.shape[1] - rate * (filter_size-1)) / stride) + 1)\n", 208 | " for _h in range(h_range):\n", 209 | " w_range = int(np.ceil((x.shape[2] - rate * (filter_size-1)) / stride) + 1)\n", 210 | " for _w in range(w_range):\n", 211 | " weighted = deatrous_w(x[k, \n", 212 | " _h * stride:_h * stride + filter_size + rate, \n", 213 | " _w * stride:_w * stride + filter_size + rate, :],\n", 214 | " w[:, :, :, z],\n", 215 | " de[k, \n", 216 | " _h * stride:_h * stride + filter_size + rate, \n", 217 | " _w * stride:_w * stride + filter_size + rate, :])\n", 218 | " w[:, :, :, z] = weighted\n", 219 | " return w\n", 220 | "\n", 221 | "def deconv_x(x, w, de):\n", 222 | " for k in range(x.shape[0]):\n", 223 | " for z in range(x.shape[3]):\n", 224 | " h_range = int(np.ceil((x.shape[1] - rate * (filter_size-1)) / stride) + 1)\n", 225 | " for _h in range(h_range):\n", 226 | " w_range = int(np.ceil((x.shape[2] - rate * (filter_size-1)) / stride) + 1)\n", 227 | " for _w in range(w_range):\n", 228 | " atroused = atrous(de[k, \n", 229 | " _h * stride:_h * stride + filter_size + rate, \n", 230 | " _w * stride:_w * stride + filter_size + rate, :], w[:, :, z, :])\n", 231 | " x[k, _h, _w, z] = np.sum(atroused)\n", 232 | " return x" 233 | ] 234 | }, 235 | { 236 | "cell_type": "code", 237 | "execution_count": 27, 238 | "metadata": {}, 239 | "outputs": [ 240 | { 241 | "data": { 242 | "text/plain": [ 243 | "(3, 3, 3, 7)" 244 | ] 245 | }, 246 | "execution_count": 27, 247 | "metadata": {}, 248 | "output_type": "execute_result" 249 | } 250 | ], 251 | "source": [ 252 | "dkernel = np.zeros(kernel.shape)\n", 253 | "deconv_w(out_atrous, dkernel, out_atrous).shape" 254 | ] 255 | }, 256 | { 257 | "cell_type": "code", 258 | "execution_count": 36, 259 | "metadata": {}, 260 | "outputs": [ 261 | { 262 | "data": { 263 | "text/plain": [ 264 | "(1, 7, 7, 3)" 265 | ] 266 | }, 267 | "execution_count": 36, 268 | "metadata": {}, 269 | "output_type": "execute_result" 270 | } 271 | ], 272 | "source": [ 273 | "dx = np.zeros(x.shape)\n", 274 | "deconv_x(dx, kernel, out_atrous).shape" 275 | ] 276 | }, 277 | { 278 | "cell_type": "code", 279 | "execution_count": null, 280 | "metadata": {}, 281 | "outputs": [], 282 | "source": [] 283 | } 284 | ], 285 | "metadata": { 286 | "kernelspec": { 287 | "display_name": "Python 3", 288 | "language": "python", 289 | "name": "python3" 290 | }, 291 | "language_info": { 292 | "codemirror_mode": { 293 | "name": "ipython", 294 | "version": 3 295 | }, 296 | "file_extension": ".py", 297 | "mimetype": "text/x-python", 298 | "name": "python", 299 | "nbconvert_exporter": "python", 300 | "pygments_lexer": "ipython3", 301 | "version": "3.5.2" 302 | } 303 | }, 304 | "nbformat": 4, 305 | "nbformat_minor": 2 306 | } 307 | -------------------------------------------------------------------------------- /neural-network/convolutional/audio.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huseinzol05/Machine-Learning-Numpy/ae8f9266a87ffd3f67471a97bf183b82740b7deb/neural-network/convolutional/audio.zip -------------------------------------------------------------------------------- /neural-network/convolutional/avgpooling1d/avgpooling1d.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import numpy as np" 10 | ] 11 | }, 12 | { 13 | "cell_type": "code", 14 | "execution_count": 2, 15 | "metadata": {}, 16 | "outputs": [], 17 | "source": [ 18 | "nf = 3 # number of filters\n", 19 | "rf = 3 # filter size\n", 20 | "stride = 2\n", 21 | "x = np.random.randn(1, 7, 3)\n", 22 | "out = np.zeros((1, 3, nf))" 23 | ] 24 | }, 25 | { 26 | "cell_type": "code", 27 | "execution_count": 3, 28 | "metadata": {}, 29 | "outputs": [], 30 | "source": [ 31 | "for k in range(x.shape[0]):\n", 32 | " for z in range(nf):\n", 33 | " h_range = int((x.shape[1] - rf) / stride) + 1\n", 34 | " for _h in range(h_range):\n", 35 | " out[k, _h, z] = np.mean(x[k, _h * stride:_h * stride + rf, :])" 36 | ] 37 | }, 38 | { 39 | "cell_type": "code", 40 | "execution_count": 4, 41 | "metadata": {}, 42 | "outputs": [], 43 | "source": [ 44 | "# we assumed de = out\n", 45 | "dx = np.zeros((x.shape))" 46 | ] 47 | }, 48 | { 49 | "cell_type": "code", 50 | "execution_count": 5, 51 | "metadata": {}, 52 | "outputs": [], 53 | "source": [ 54 | "for k in range(x.shape[0]):\n", 55 | " for z in range(nf):\n", 56 | " h_range = int((x.shape[1] - rf) / stride) + 1\n", 57 | " for _h in range(h_range):\n", 58 | " dx[k, _h * stride:_h * stride + rf, :] = out[k, _h, z]" 59 | ] 60 | }, 61 | { 62 | "cell_type": "code", 63 | "execution_count": 6, 64 | "metadata": {}, 65 | "outputs": [ 66 | { 67 | "data": { 68 | "text/plain": [ 69 | "array([[[ 0.20745764, 0.20745764, 0.20745764],\n", 70 | " [ 0.20745764, 0.20745764, 0.20745764],\n", 71 | " [ 0.02002697, 0.02002697, 0.02002697],\n", 72 | " [ 0.02002697, 0.02002697, 0.02002697],\n", 73 | " [-0.5881302 , -0.5881302 , -0.5881302 ],\n", 74 | " [-0.5881302 , -0.5881302 , -0.5881302 ],\n", 75 | " [-0.5881302 , -0.5881302 , -0.5881302 ]]])" 76 | ] 77 | }, 78 | "execution_count": 6, 79 | "metadata": {}, 80 | "output_type": "execute_result" 81 | } 82 | ], 83 | "source": [ 84 | "dx" 85 | ] 86 | }, 87 | { 88 | "cell_type": "code", 89 | "execution_count": 7, 90 | "metadata": {}, 91 | "outputs": [ 92 | { 93 | "data": { 94 | "text/plain": [ 95 | "(1, 7, 3)" 96 | ] 97 | }, 98 | "execution_count": 7, 99 | "metadata": {}, 100 | "output_type": "execute_result" 101 | } 102 | ], 103 | "source": [ 104 | "dx.shape" 105 | ] 106 | }, 107 | { 108 | "cell_type": "code", 109 | "execution_count": null, 110 | "metadata": {}, 111 | "outputs": [], 112 | "source": [] 113 | } 114 | ], 115 | "metadata": { 116 | "kernelspec": { 117 | "display_name": "Python 3", 118 | "language": "python", 119 | "name": "python3" 120 | }, 121 | "language_info": { 122 | "codemirror_mode": { 123 | "name": "ipython", 124 | "version": 3 125 | }, 126 | "file_extension": ".py", 127 | "mimetype": "text/x-python", 128 | "name": "python", 129 | "nbconvert_exporter": "python", 130 | "pygments_lexer": "ipython3", 131 | "version": "3.5.2" 132 | } 133 | }, 134 | "nbformat": 4, 135 | "nbformat_minor": 2 136 | } 137 | -------------------------------------------------------------------------------- /neural-network/convolutional/avgpooling2d/avgpooling2d.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import numpy as np" 10 | ] 11 | }, 12 | { 13 | "cell_type": "code", 14 | "execution_count": 2, 15 | "metadata": {}, 16 | "outputs": [], 17 | "source": [ 18 | "nf = 3 # number of filters\n", 19 | "rf = 3 # filter size\n", 20 | "stride = 2\n", 21 | "x = np.random.randn(1, 7, 7, 3)\n", 22 | "out = np.zeros((1, 3, 3, nf))" 23 | ] 24 | }, 25 | { 26 | "cell_type": "code", 27 | "execution_count": 3, 28 | "metadata": {}, 29 | "outputs": [], 30 | "source": [ 31 | "for k in range(x.shape[0]):\n", 32 | " for z in range(nf):\n", 33 | " h_range = int((x.shape[1] - rf) / stride) + 1\n", 34 | " for _h in range(h_range):\n", 35 | " w_range = int((x.shape[2] - rf) / stride) + 1\n", 36 | " for _w in range(w_range):\n", 37 | " out[k, _h, _w, z] = np.mean(x[k, _h * stride:_h * stride + rf, _w * stride:_w * stride + rf, :])" 38 | ] 39 | }, 40 | { 41 | "cell_type": "code", 42 | "execution_count": 4, 43 | "metadata": {}, 44 | "outputs": [], 45 | "source": [ 46 | "# we assumed de = out\n", 47 | "dx = np.zeros((x.shape))" 48 | ] 49 | }, 50 | { 51 | "cell_type": "code", 52 | "execution_count": 5, 53 | "metadata": {}, 54 | "outputs": [], 55 | "source": [ 56 | "for k in range(x.shape[0]):\n", 57 | " for z in range(nf):\n", 58 | " h_range = int((x.shape[1] - rf) / stride) + 1\n", 59 | " for _h in range(h_range):\n", 60 | " w_range = int((x.shape[2] - rf) / stride) + 1\n", 61 | " for _w in range(w_range):\n", 62 | " dx[k, _h * stride:_h * stride + rf, _w * stride:_w * stride + rf, :] = out[k, _h, _w, z]" 63 | ] 64 | }, 65 | { 66 | "cell_type": "code", 67 | "execution_count": 6, 68 | "metadata": {}, 69 | "outputs": [ 70 | { 71 | "data": { 72 | "text/plain": [ 73 | "array([[[[-0.15306527, -0.15306527, -0.15306527],\n", 74 | " [-0.15306527, -0.15306527, -0.15306527],\n", 75 | " [-0.03723158, -0.03723158, -0.03723158],\n", 76 | " [-0.03723158, -0.03723158, -0.03723158],\n", 77 | " [-0.08554885, -0.08554885, -0.08554885],\n", 78 | " [-0.08554885, -0.08554885, -0.08554885],\n", 79 | " [-0.08554885, -0.08554885, -0.08554885]],\n", 80 | "\n", 81 | " [[-0.15306527, -0.15306527, -0.15306527],\n", 82 | " [-0.15306527, -0.15306527, -0.15306527],\n", 83 | " [-0.03723158, -0.03723158, -0.03723158],\n", 84 | " [-0.03723158, -0.03723158, -0.03723158],\n", 85 | " [-0.08554885, -0.08554885, -0.08554885],\n", 86 | " [-0.08554885, -0.08554885, -0.08554885],\n", 87 | " [-0.08554885, -0.08554885, -0.08554885]],\n", 88 | "\n", 89 | " [[ 0.18894809, 0.18894809, 0.18894809],\n", 90 | " [ 0.18894809, 0.18894809, 0.18894809],\n", 91 | " [ 0.23032554, 0.23032554, 0.23032554],\n", 92 | " [ 0.23032554, 0.23032554, 0.23032554],\n", 93 | " [ 0.03334547, 0.03334547, 0.03334547],\n", 94 | " [ 0.03334547, 0.03334547, 0.03334547],\n", 95 | " [ 0.03334547, 0.03334547, 0.03334547]],\n", 96 | "\n", 97 | " [[ 0.18894809, 0.18894809, 0.18894809],\n", 98 | " [ 0.18894809, 0.18894809, 0.18894809],\n", 99 | " [ 0.23032554, 0.23032554, 0.23032554],\n", 100 | " [ 0.23032554, 0.23032554, 0.23032554],\n", 101 | " [ 0.03334547, 0.03334547, 0.03334547],\n", 102 | " [ 0.03334547, 0.03334547, 0.03334547],\n", 103 | " [ 0.03334547, 0.03334547, 0.03334547]],\n", 104 | "\n", 105 | " [[ 0.48257824, 0.48257824, 0.48257824],\n", 106 | " [ 0.48257824, 0.48257824, 0.48257824],\n", 107 | " [ 0.39374719, 0.39374719, 0.39374719],\n", 108 | " [ 0.39374719, 0.39374719, 0.39374719],\n", 109 | " [ 0.08488663, 0.08488663, 0.08488663],\n", 110 | " [ 0.08488663, 0.08488663, 0.08488663],\n", 111 | " [ 0.08488663, 0.08488663, 0.08488663]],\n", 112 | "\n", 113 | " [[ 0.48257824, 0.48257824, 0.48257824],\n", 114 | " [ 0.48257824, 0.48257824, 0.48257824],\n", 115 | " [ 0.39374719, 0.39374719, 0.39374719],\n", 116 | " [ 0.39374719, 0.39374719, 0.39374719],\n", 117 | " [ 0.08488663, 0.08488663, 0.08488663],\n", 118 | " [ 0.08488663, 0.08488663, 0.08488663],\n", 119 | " [ 0.08488663, 0.08488663, 0.08488663]],\n", 120 | "\n", 121 | " [[ 0.48257824, 0.48257824, 0.48257824],\n", 122 | " [ 0.48257824, 0.48257824, 0.48257824],\n", 123 | " [ 0.39374719, 0.39374719, 0.39374719],\n", 124 | " [ 0.39374719, 0.39374719, 0.39374719],\n", 125 | " [ 0.08488663, 0.08488663, 0.08488663],\n", 126 | " [ 0.08488663, 0.08488663, 0.08488663],\n", 127 | " [ 0.08488663, 0.08488663, 0.08488663]]]])" 128 | ] 129 | }, 130 | "execution_count": 6, 131 | "metadata": {}, 132 | "output_type": "execute_result" 133 | } 134 | ], 135 | "source": [ 136 | "dx" 137 | ] 138 | }, 139 | { 140 | "cell_type": "code", 141 | "execution_count": null, 142 | "metadata": {}, 143 | "outputs": [], 144 | "source": [] 145 | } 146 | ], 147 | "metadata": { 148 | "kernelspec": { 149 | "display_name": "Python 3", 150 | "language": "python", 151 | "name": "python3" 152 | }, 153 | "language_info": { 154 | "codemirror_mode": { 155 | "name": "ipython", 156 | "version": 3 157 | }, 158 | "file_extension": ".py", 159 | "mimetype": "text/x-python", 160 | "name": "python", 161 | "nbconvert_exporter": "python", 162 | "pygments_lexer": "ipython3", 163 | "version": "3.5.2" 164 | } 165 | }, 166 | "nbformat": 4, 167 | "nbformat_minor": 2 168 | } 169 | -------------------------------------------------------------------------------- /neural-network/convolutional/maxpooling1d/maxpooling1d.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import numpy as np" 10 | ] 11 | }, 12 | { 13 | "cell_type": "code", 14 | "execution_count": 2, 15 | "metadata": {}, 16 | "outputs": [], 17 | "source": [ 18 | "nf = 3 # number of filters\n", 19 | "rf = 3 # filter size\n", 20 | "stride = 2\n", 21 | "x = np.random.randn(1, 7, 3)\n", 22 | "out = np.zeros((1, 3, nf))\n", 23 | "out_argmax = np.zeros((1, 3, nf))" 24 | ] 25 | }, 26 | { 27 | "cell_type": "code", 28 | "execution_count": 3, 29 | "metadata": {}, 30 | "outputs": [], 31 | "source": [ 32 | "for k in range(x.shape[0]):\n", 33 | " for z in range(nf):\n", 34 | " h_range = int((x.shape[1] - rf) / stride) + 1\n", 35 | " for _h in range(h_range):\n", 36 | " out[k, _h, z] = np.amax(x[k, _h * stride:_h * stride + rf, :])\n", 37 | " out_argmax[k, _h, z] = np.argmax(x[k, _h * stride:_h * stride + rf, :])" 38 | ] 39 | }, 40 | { 41 | "cell_type": "code", 42 | "execution_count": 4, 43 | "metadata": {}, 44 | "outputs": [], 45 | "source": [ 46 | "# we assumed de = out\n", 47 | "dx = np.zeros((x.shape))" 48 | ] 49 | }, 50 | { 51 | "cell_type": "code", 52 | "execution_count": 5, 53 | "metadata": {}, 54 | "outputs": [], 55 | "source": [ 56 | "for k in range(x.shape[0]):\n", 57 | " for z in range(nf):\n", 58 | " h_range = int((x.shape[1] - rf) / stride) + 1\n", 59 | " for _h in range(h_range):\n", 60 | " dx[k, _h * stride:_h * stride + rf, :].flat[int(out_argmax[k, _h, z])] = out[k, _h, z]" 61 | ] 62 | }, 63 | { 64 | "cell_type": "code", 65 | "execution_count": 6, 66 | "metadata": {}, 67 | "outputs": [ 68 | { 69 | "data": { 70 | "text/plain": [ 71 | "array([[[ 0. , 0. , 0. ],\n", 72 | " [ 0. , 0. , 0. ],\n", 73 | " [ 0. , 3.05578356, 0. ],\n", 74 | " [ 0. , 0. , 0. ],\n", 75 | " [ 1.73467803, 0. , 0. ],\n", 76 | " [ 0. , 0. , 0. ],\n", 77 | " [ 0. , 0. , 0. ]]])" 78 | ] 79 | }, 80 | "execution_count": 6, 81 | "metadata": {}, 82 | "output_type": "execute_result" 83 | } 84 | ], 85 | "source": [ 86 | "dx" 87 | ] 88 | }, 89 | { 90 | "cell_type": "code", 91 | "execution_count": null, 92 | "metadata": {}, 93 | "outputs": [], 94 | "source": [] 95 | } 96 | ], 97 | "metadata": { 98 | "kernelspec": { 99 | "display_name": "Python 3", 100 | "language": "python", 101 | "name": "python3" 102 | }, 103 | "language_info": { 104 | "codemirror_mode": { 105 | "name": "ipython", 106 | "version": 3 107 | }, 108 | "file_extension": ".py", 109 | "mimetype": "text/x-python", 110 | "name": "python", 111 | "nbconvert_exporter": "python", 112 | "pygments_lexer": "ipython3", 113 | "version": "3.5.2" 114 | } 115 | }, 116 | "nbformat": 4, 117 | "nbformat_minor": 2 118 | } 119 | -------------------------------------------------------------------------------- /neural-network/convolutional/maxpooling2d/maxpooling2d.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import numpy as np" 10 | ] 11 | }, 12 | { 13 | "cell_type": "code", 14 | "execution_count": 2, 15 | "metadata": {}, 16 | "outputs": [], 17 | "source": [ 18 | "nf = 3 # number of filters\n", 19 | "rf = 3 # filter size\n", 20 | "stride = 2\n", 21 | "x = np.random.randn(1, 7, 7, 3)\n", 22 | "out = np.zeros((1, 3, 3, nf))\n", 23 | "out_argmax = np.zeros((1, 3, 3, nf))" 24 | ] 25 | }, 26 | { 27 | "cell_type": "code", 28 | "execution_count": 3, 29 | "metadata": {}, 30 | "outputs": [], 31 | "source": [ 32 | "for k in range(x.shape[0]):\n", 33 | " for z in range(nf):\n", 34 | " h_range = int((x.shape[1] - rf) / stride) + 1\n", 35 | " for _h in range(h_range):\n", 36 | " w_range = int((x.shape[2] - rf) / stride) + 1\n", 37 | " for _w in range(w_range):\n", 38 | " out[k, _h, _w, z] = np.amax(x[k, _h * stride:_h * stride + rf, _w * stride:_w * stride + rf, :])\n", 39 | " out_argmax[k, _h, _w, z] = np.argmax(x[k, _h * stride:_h * stride + rf, _w * stride:_w * stride + rf, :])" 40 | ] 41 | }, 42 | { 43 | "cell_type": "code", 44 | "execution_count": 37, 45 | "metadata": {}, 46 | "outputs": [], 47 | "source": [ 48 | "# we assumed de = out\n", 49 | "dx = np.zeros((x.shape))" 50 | ] 51 | }, 52 | { 53 | "cell_type": "code", 54 | "execution_count": 38, 55 | "metadata": {}, 56 | "outputs": [], 57 | "source": [ 58 | "for k in range(x.shape[0]):\n", 59 | " for z in range(nf):\n", 60 | " h_range = int((x.shape[1] - rf) / stride) + 1\n", 61 | " for _h in range(h_range):\n", 62 | " w_range = int((x.shape[2] - rf) / stride) + 1\n", 63 | " for _w in range(w_range):\n", 64 | " dx[k, _h * stride:_h * stride + rf, _w * stride:_w * stride + rf, :].flat[int(out_argmax[k, _h, _w, z])] = out[k, _h, _w, z]" 65 | ] 66 | }, 67 | { 68 | "cell_type": "code", 69 | "execution_count": 39, 70 | "metadata": {}, 71 | "outputs": [ 72 | { 73 | "data": { 74 | "text/plain": [ 75 | "array([[[[ 0. , 0. , 0. ],\n", 76 | " [ 0. , 0. , 0. ],\n", 77 | " [ 0. , 1.48059643, 0. ],\n", 78 | " [ 0. , 0. , 0. ],\n", 79 | " [ 0. , 0. , 0. ],\n", 80 | " [ 0. , 0. , 0. ],\n", 81 | " [ 0. , 0. , 0. ]],\n", 82 | "\n", 83 | " [[ 0. , 0. , 0. ],\n", 84 | " [ 0. , 0. , 0. ],\n", 85 | " [ 0. , 0. , 0. ],\n", 86 | " [ 0. , 0. , 0. ],\n", 87 | " [ 0. , 0. , 0. ],\n", 88 | " [ 0. , 0. , 0. ],\n", 89 | " [ 0. , 0. , 2.45955209]],\n", 90 | "\n", 91 | " [[ 0. , 0. , 0. ],\n", 92 | " [ 0. , 0. , 0. ],\n", 93 | " [ 0. , 1.40093241, 0. ],\n", 94 | " [ 0. , 0. , 0. ],\n", 95 | " [ 0. , 0. , 0. ],\n", 96 | " [ 0. , 0. , 0. ],\n", 97 | " [ 0. , 0. , 0. ]],\n", 98 | "\n", 99 | " [[ 0. , 0. , 0. ],\n", 100 | " [ 0. , 0. , 0. ],\n", 101 | " [ 0. , 0. , 0. ],\n", 102 | " [ 0. , 0. , 0. ],\n", 103 | " [ 1.90659004, 0. , 0. ],\n", 104 | " [ 0. , 0. , 0. ],\n", 105 | " [ 0. , 0. , 0. ]],\n", 106 | "\n", 107 | " [[ 0. , 0. , 0. ],\n", 108 | " [ 0. , 0. , 0. ],\n", 109 | " [ 0. , 0. , 0. ],\n", 110 | " [ 0. , 0. , 0. ],\n", 111 | " [ 0. , 0. , 0. ],\n", 112 | " [ 0. , 2.08293146, 0. ],\n", 113 | " [ 0. , 0. , 0. ]],\n", 114 | "\n", 115 | " [[ 0. , 0. , 0. ],\n", 116 | " [ 0. , 0. , 0. ],\n", 117 | " [ 0. , 0. , 0. ],\n", 118 | " [ 0. , 0. , 0. ],\n", 119 | " [ 0. , 0. , 0. ],\n", 120 | " [ 0. , 0. , 0. ],\n", 121 | " [ 0. , 0. , 0. ]],\n", 122 | "\n", 123 | " [[ 0. , 0. , 0. ],\n", 124 | " [ 0. , 0. , 0. ],\n", 125 | " [ 0. , 3.11563315, 0. ],\n", 126 | " [ 0. , 0. , 0. ],\n", 127 | " [ 0. , 0. , 0. ],\n", 128 | " [ 0. , 0. , 0. ],\n", 129 | " [ 0. , 0. , 0. ]]]])" 130 | ] 131 | }, 132 | "execution_count": 39, 133 | "metadata": {}, 134 | "output_type": "execute_result" 135 | } 136 | ], 137 | "source": [ 138 | "dx" 139 | ] 140 | }, 141 | { 142 | "cell_type": "code", 143 | "execution_count": null, 144 | "metadata": {}, 145 | "outputs": [], 146 | "source": [] 147 | } 148 | ], 149 | "metadata": { 150 | "kernelspec": { 151 | "display_name": "Python 3", 152 | "language": "python", 153 | "name": "python3" 154 | }, 155 | "language_info": { 156 | "codemirror_mode": { 157 | "name": "ipython", 158 | "version": 3 159 | }, 160 | "file_extension": ".py", 161 | "mimetype": "text/x-python", 162 | "name": "python", 163 | "nbconvert_exporter": "python", 164 | "pygments_lexer": "ipython3", 165 | "version": "3.5.2" 166 | } 167 | }, 168 | "nbformat": 4, 169 | "nbformat_minor": 2 170 | } 171 | -------------------------------------------------------------------------------- /neural-network/dropout/dropout/animation-dropout-iris.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huseinzol05/Machine-Learning-Numpy/ae8f9266a87ffd3f67471a97bf183b82740b7deb/neural-network/dropout/dropout/animation-dropout-iris.gif -------------------------------------------------------------------------------- /neural-network/evolution-strategy/evolution_strategy.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import time 3 | 4 | class Deep_Evolution_Strategy: 5 | 6 | def __init__(self, weights, inputs, solutions, reward_function, population_size, sigma, learning_rate): 7 | self.weights = weights.copy() 8 | self.inputs = inputs 9 | self.solutions = solutions 10 | self.reward_function = reward_function 11 | self.population_size = population_size 12 | self.sigma = sigma 13 | self.learning_rate = learning_rate 14 | 15 | def _get_weight_from_population(self, population): 16 | weights_population = [] 17 | for index, i in enumerate(population): 18 | jittered = self.sigma * i 19 | weights_population.append(self.weights[index] + jittered) 20 | return weights_population 21 | 22 | def get_weight(self): 23 | return self.weights 24 | 25 | def predict(self, weights, inputs, activation_function = None): 26 | if activation_function[0]: 27 | w = activation_function[0](inputs * weights[0]) 28 | else: 29 | w = inputs * weights[0] 30 | for n in range(1, len(weights)): 31 | if activation_function[n]: 32 | w = activation_function[n](np.dot(w, weights[n])) 33 | else: 34 | w = np.dot(w, weights[n]) 35 | return w 36 | 37 | def train(self, epoch = 100, print_every = 5, activation_function = None): 38 | lasttime = time.time() 39 | for i in range(epoch): 40 | population = [] 41 | rewards = np.zeros(self.population_size) 42 | for k in range(self.population_size): 43 | x = [] 44 | for w in self.weights: 45 | x.append(np.random.randn(*w.shape)) 46 | population.append(x) 47 | for k in range(self.population_size): 48 | weights_population = self._get_weight_from_population(population[k]) 49 | w = self.predict(weights_population, self.inputs, activation_function = activation_function) 50 | rewards[k] = self.reward_function(self.solutions, w) 51 | rewards = (rewards - np.mean(rewards)) / np.std(rewards) 52 | for index, w in enumerate(self.weights): 53 | A = np.array([p[index] for p in population]) 54 | self.weights[index] = w + self.learning_rate/(self.population_size * self.sigma) * np.dot(A.T, rewards).T 55 | if (i+1) % print_every == 0: 56 | w = self.predict(self.weights, self.inputs, activation_function = activation_function) 57 | print('iter %d. reward: %f' % (i+1, self.reward_function(self.solutions, w))) 58 | print('time taken to train:', time.time()-lasttime, 'seconds') -------------------------------------------------------------------------------- /neural-network/evolution-strategy/function.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | def mean_square_error(solution, w): 4 | return -np.mean(np.square(solution - w)) 5 | 6 | def root_mean_square_error(solution, w): 7 | return -np.sqrt(np.mean(np.square(solution - w))) 8 | 9 | def mean_absolute_error(solution, w): 10 | return -np.mean(np.abs(solution - w)) 11 | 12 | def cross_entropy(solution, w): 13 | return np.mean(solution * np.log(w)) 14 | 15 | def sigmoid(x): 16 | return 1 / (1 + np.exp(-x)) 17 | 18 | def softmax(w): 19 | exp_scores = np.exp(w) 20 | return exp_scores / np.sum(exp_scores, axis=1, keepdims=True) -------------------------------------------------------------------------------- /neural-network/feed-forward/softmax-entropy-gradientdescent/function.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | def mean_square_error(solution, w): 4 | return -np.mean(np.square(solution - w)) 5 | 6 | def root_mean_square_error(solution, w): 7 | return -np.sqrt(np.mean(np.square(solution - w))) 8 | 9 | def mean_absolute_error(solution, w): 10 | return -np.mean(np.abs(solution - w)) 11 | 12 | def cross_entropy(solution, w): 13 | return np.mean(solution * np.log(w)) 14 | 15 | def sigmoid(x): 16 | return 1 / (1 + np.exp(-x)) 17 | 18 | def softmax(w): 19 | exp_scores = np.exp(w) 20 | return exp_scores / np.sum(exp_scores, axis=1, keepdims=True) -------------------------------------------------------------------------------- /neural-network/feed-forward/softmax-entropy-momentum/function.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | def mean_square_error(solution, w): 4 | return -np.mean(np.square(solution - w)) 5 | 6 | def root_mean_square_error(solution, w): 7 | return -np.sqrt(np.mean(np.square(solution - w))) 8 | 9 | def mean_absolute_error(solution, w): 10 | return -np.mean(np.abs(solution - w)) 11 | 12 | def cross_entropy(solution, w): 13 | return np.mean(solution * np.log(w)) 14 | 15 | def sigmoid(x): 16 | return 1 / (1 + np.exp(-x)) 17 | 18 | def softmax(w): 19 | exp_scores = np.exp(w) 20 | return exp_scores / np.sum(exp_scores, axis=1, keepdims=True) -------------------------------------------------------------------------------- /neural-network/gated-recurrent-unit-RNN/README.md: -------------------------------------------------------------------------------- 1 | ### All models train on predicting character sequence from consumer.h 2 | -------------------------------------------------------------------------------- /neural-network/gated-recurrent-unit-RNN/adagrad/gru-rnn-adagrad.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import numpy as np\n", 10 | "import matplotlib.pyplot as plt\n", 11 | "import seaborn as sns\n", 12 | "import random\n", 13 | "import time\n", 14 | "sns.set()" 15 | ] 16 | }, 17 | { 18 | "cell_type": "code", 19 | "execution_count": 2, 20 | "metadata": {}, 21 | "outputs": [], 22 | "source": [ 23 | "def get_vocab(file, lower = False):\n", 24 | " with open(file, 'r') as fopen:\n", 25 | " data = fopen.read()\n", 26 | " if lower:\n", 27 | " data = data.lower()\n", 28 | " vocab = list(set(data))\n", 29 | " return data, vocab\n", 30 | "\n", 31 | "def embed_to_onehot(data, vocab):\n", 32 | " onehot = np.zeros((len(data), len(vocab)), dtype = np.float32)\n", 33 | " for i in range(len(data)):\n", 34 | " onehot[i, vocab.index(data[i])] = 1.0\n", 35 | " return onehot" 36 | ] 37 | }, 38 | { 39 | "cell_type": "code", 40 | "execution_count": 3, 41 | "metadata": {}, 42 | "outputs": [], 43 | "source": [ 44 | "text, text_vocab = get_vocab('consumer.h', lower = False)\n", 45 | "onehot = embed_to_onehot(text, text_vocab)" 46 | ] 47 | }, 48 | { 49 | "cell_type": "code", 50 | "execution_count": 4, 51 | "metadata": {}, 52 | "outputs": [], 53 | "source": [ 54 | "learning_rate = 0.01\n", 55 | "batch_size = 64\n", 56 | "sequence_length = 12\n", 57 | "epoch = 1000\n", 58 | "num_layers = 2\n", 59 | "size_layer = 128\n", 60 | "possible_batch_id = range(len(text) - sequence_length - 1)\n", 61 | "dimension = onehot.shape[1]\n", 62 | "epsilon = 1e-8\n", 63 | "\n", 64 | "U = np.random.randn(size_layer, dimension) / np.sqrt(size_layer)\n", 65 | "U_g = np.zeros(U.shape)\n", 66 | "Wz = np.random.randn(size_layer, size_layer) / np.sqrt(size_layer)\n", 67 | "Wz_g = np.zeros(Wz.shape)\n", 68 | "Wr = np.random.randn(size_layer, size_layer) / np.sqrt(size_layer)\n", 69 | "Wr_g = np.zeros(Wr.shape)\n", 70 | "Wh = np.random.randn(size_layer, size_layer) / np.sqrt(size_layer)\n", 71 | "Wh_g = np.zeros(Wh.shape)\n", 72 | "V = np.random.randn(dimension, size_layer) / np.sqrt(dimension)\n", 73 | "V_g = np.zeros(V.shape)" 74 | ] 75 | }, 76 | { 77 | "cell_type": "code", 78 | "execution_count": 5, 79 | "metadata": {}, 80 | "outputs": [], 81 | "source": [ 82 | "def tanh(x, grad=False):\n", 83 | " if grad:\n", 84 | " output = np.tanh(x)\n", 85 | " return (1.0 - np.square(output))\n", 86 | " else:\n", 87 | " return np.tanh(x)\n", 88 | " \n", 89 | "def sigmoid(x, grad=False):\n", 90 | " if grad:\n", 91 | " return sigmoid(x) * (1 - sigmoid(x))\n", 92 | " else:\n", 93 | " return 1 / (1 + np.exp(-x))\n", 94 | " \n", 95 | "def softmax(x):\n", 96 | " exp_scores = np.exp(x - np.max(x))\n", 97 | " return exp_scores / (np.sum(exp_scores, axis=1, keepdims=True) + 1e-8)\n", 98 | "\n", 99 | "def derivative_softmax_cross_entropy(x, y):\n", 100 | " delta = softmax(x)\n", 101 | " delta[range(X.shape[0]), y] -= 1\n", 102 | " return delta\n", 103 | "\n", 104 | "def forward_multiply_gate(w, x):\n", 105 | " return np.dot(w, x)\n", 106 | "\n", 107 | "def backward_multiply_gate(w, x, dz):\n", 108 | " dW = np.dot(dz.T, x)\n", 109 | " dx = np.dot(w.T, dz.T)\n", 110 | " return dW, dx\n", 111 | "\n", 112 | "def forward_add_gate(x1, x2):\n", 113 | " return x1 + x2\n", 114 | "\n", 115 | "def backward_add_gate(x1, x2, dz):\n", 116 | " dx1 = dz * np.ones_like(x1)\n", 117 | " dx2 = dz * np.ones_like(x2)\n", 118 | " return dx1, dx2\n", 119 | "\n", 120 | "def cross_entropy(Y_hat, Y, epsilon=1e-12):\n", 121 | " Y_hat = np.clip(Y_hat, epsilon, 1. - epsilon)\n", 122 | " N = Y_hat.shape[0]\n", 123 | " return -np.sum(np.sum(Y * np.log(Y_hat+1e-9))) / N\n", 124 | "\n", 125 | "def forward_recurrent(x, h_state, U, Wz, Wr, Wh, V):\n", 126 | " mul_u = forward_multiply_gate(x, U.T)\n", 127 | " mul_Wz = forward_multiply_gate(h_state, Wz.T)\n", 128 | " add_Wz = forward_add_gate(mul_u, mul_Wz)\n", 129 | " z = sigmoid(add_Wz)\n", 130 | " mul_Wr = forward_multiply_gate(h_state, Wr.T)\n", 131 | " add_Wr = forward_add_gate(mul_u, mul_Wr)\n", 132 | " r = sigmoid(add_Wr)\n", 133 | " mul_Wh = forward_multiply_gate(h_state * r, Wh.T)\n", 134 | " add_Wh = forward_add_gate(mul_u, mul_Wh)\n", 135 | " h_hat = tanh(add_Wh)\n", 136 | " h = (1 - z) * h_state + z * h_hat\n", 137 | " mul_v = forward_multiply_gate(h, V.T)\n", 138 | " return (mul_u, mul_Wz, add_Wz, z, mul_Wr, add_Wr, r, mul_Wh, add_Wh, h_hat, h, mul_v)\n", 139 | "\n", 140 | "def backward_recurrent(x, h_state, U, Wz, Wr, Wh, V, d_mul_v, saved_graph):\n", 141 | " mul_u, mul_Wz, add_Wz, z, mul_Wr, add_Wr, r, mul_Wh, add_Wh, h_hat, h, mul_v = saved_graph\n", 142 | " dV, dh = backward_multiply_gate(V, h, d_mul_v)\n", 143 | " dh_hat = z * dh.T\n", 144 | " dadd_Wh = tanh(add_Wh, True) * dh_hat\n", 145 | " dmul_u1, dmul_Wh = backward_add_gate(mul_u, mul_Wh, dadd_Wh)\n", 146 | " dWh, dprev_state = backward_multiply_gate(Wh, h_state * r, dmul_Wh)\n", 147 | " dr = dprev_state * h_state.T\n", 148 | " dadd_Wr = sigmoid(add_Wr, True) * dr.T\n", 149 | " dmul_u2, dmul_Wr = backward_add_gate(mul_u, mul_Wr, dadd_Wr)\n", 150 | " dWr, dprev_state = backward_multiply_gate(Wr, h_state, dmul_Wr)\n", 151 | " dz = -h_state + h_hat\n", 152 | " dadd_Wz = sigmoid(add_Wz, True) * dz\n", 153 | " dmul_u3, dmul_Wz = backward_add_gate(mul_u, mul_Wz, dadd_Wz)\n", 154 | " dWz, dprev_state = backward_multiply_gate(Wz, h_state, dmul_Wz)\n", 155 | " dU, dx = backward_multiply_gate(U, x, dmul_u3)\n", 156 | " return (dU, dWz, dWr, dWh, dV)" 157 | ] 158 | }, 159 | { 160 | "cell_type": "code", 161 | "execution_count": 6, 162 | "metadata": {}, 163 | "outputs": [ 164 | { 165 | "name": "stdout", 166 | "output_type": "stream", 167 | "text": [ 168 | "epoch 50, loss 3.637528, accuracy 0.098958\n", 169 | "epoch 100, loss 3.612660, accuracy 0.105469\n", 170 | "epoch 150, loss 3.536577, accuracy 0.096354\n", 171 | "epoch 200, loss 3.491436, accuracy 0.084635\n", 172 | "epoch 250, loss 3.463406, accuracy 0.091146\n", 173 | "epoch 300, loss 3.535961, accuracy 0.092448\n", 174 | "epoch 350, loss 3.412024, accuracy 0.108073\n", 175 | "epoch 400, loss 3.481816, accuracy 0.093750\n", 176 | "epoch 450, loss 3.494790, accuracy 0.082031\n", 177 | "epoch 500, loss 3.433272, accuracy 0.096354\n", 178 | "epoch 550, loss 3.441682, accuracy 0.102865\n", 179 | "epoch 600, loss 3.535025, accuracy 0.100260\n", 180 | "epoch 650, loss 3.453496, accuracy 0.115885\n", 181 | "epoch 700, loss 3.548778, accuracy 0.091146\n", 182 | "epoch 750, loss 3.530042, accuracy 0.085938\n", 183 | "epoch 800, loss 3.489041, accuracy 0.088542\n", 184 | "epoch 850, loss 3.553946, accuracy 0.109375\n", 185 | "epoch 900, loss 3.455542, accuracy 0.088542\n", 186 | "epoch 950, loss 3.572077, accuracy 0.074219\n", 187 | "epoch 1000, loss 3.530765, accuracy 0.078125\n" 188 | ] 189 | } 190 | ], 191 | "source": [ 192 | "for i in range(epoch):\n", 193 | " batch_x = np.zeros((batch_size, sequence_length, dimension))\n", 194 | " batch_y = np.zeros((batch_size, sequence_length, dimension))\n", 195 | " batch_id = random.sample(possible_batch_id, batch_size)\n", 196 | " prev_h = np.zeros((batch_size, size_layer))\n", 197 | " for n in range(sequence_length):\n", 198 | " id1 = [k + n for k in batch_id]\n", 199 | " id2 = [k + n + 1 for k in batch_id]\n", 200 | " batch_x[:,n,:] = onehot[id1, :]\n", 201 | " batch_y[:,n,:] = onehot[id2, :]\n", 202 | " layers = []\n", 203 | " out_logits = np.zeros((batch_size, sequence_length, dimension))\n", 204 | " for n in range(sequence_length):\n", 205 | " layers.append(forward_recurrent(batch_x[:,n,:], prev_h, U, Wz, Wr, Wh, V))\n", 206 | " prev_h = layers[-1][-2]\n", 207 | " out_logits[:, n, :] = layers[-1][-1]\n", 208 | " probs = softmax(out_logits.reshape((-1, dimension)))\n", 209 | " y = np.argmax(batch_y.reshape((-1, dimension)),axis=1)\n", 210 | " accuracy = np.mean(np.argmax(probs,axis=1) == y)\n", 211 | " loss = cross_entropy(probs, batch_y.reshape((-1, dimension)))\n", 212 | " delta = probs\n", 213 | " delta[range(y.shape[0]), y] -= 1\n", 214 | " delta = delta.reshape((batch_size, sequence_length, dimension))\n", 215 | " dU = np.zeros(U.shape)\n", 216 | " dV = np.zeros(V.shape)\n", 217 | " dWz = np.zeros(Wz.shape)\n", 218 | " dWr = np.zeros(Wr.shape)\n", 219 | " dWh = np.zeros(Wh.shape)\n", 220 | " prev_h = np.zeros((batch_size, size_layer))\n", 221 | " for n in range(sequence_length):\n", 222 | " d_mul_v = delta[:, n, :]\n", 223 | " dU_t, dWz_t, dWr_t, dWh_t, dV_t = backward_recurrent(batch_x[:,n,:], prev_h, \n", 224 | " U, Wz, Wr, Wh, V, d_mul_v, layers[n])\n", 225 | " prev_h = layers[n][-2]\n", 226 | " dU += dU_t\n", 227 | " dV += dV_t\n", 228 | " dWz += dWz_t\n", 229 | " dWr += dWr_t\n", 230 | " dWh += dWh_t\n", 231 | " U_g += dU ** 2\n", 232 | " U += -learning_rate * dU / np.sqrt(U_g + epsilon)\n", 233 | " V_g += dV ** 2\n", 234 | " V += -learning_rate * dV / np.sqrt(V_g + epsilon)\n", 235 | " Wz_g += dWz ** 2\n", 236 | " Wz += -learning_rate * dWz / np.sqrt(Wz_g + epsilon)\n", 237 | " Wr_g += dWr ** 2\n", 238 | " Wr += -learning_rate * dWr / np.sqrt(Wr_g + epsilon)\n", 239 | " Wh_g += dWh ** 2\n", 240 | " Wh += -learning_rate * dWh / np.sqrt(Wh_g + epsilon)\n", 241 | " if (i+1) % 50 == 0:\n", 242 | " print('epoch %d, loss %f, accuracy %f'%(i+1, loss, accuracy))" 243 | ] 244 | }, 245 | { 246 | "cell_type": "code", 247 | "execution_count": null, 248 | "metadata": {}, 249 | "outputs": [], 250 | "source": [] 251 | } 252 | ], 253 | "metadata": { 254 | "kernelspec": { 255 | "display_name": "Python 3", 256 | "language": "python", 257 | "name": "python3" 258 | }, 259 | "language_info": { 260 | "codemirror_mode": { 261 | "name": "ipython", 262 | "version": 3 263 | }, 264 | "file_extension": ".py", 265 | "mimetype": "text/x-python", 266 | "name": "python", 267 | "nbconvert_exporter": "python", 268 | "pygments_lexer": "ipython3", 269 | "version": "3.5.2" 270 | } 271 | }, 272 | "nbformat": 4, 273 | "nbformat_minor": 2 274 | } 275 | -------------------------------------------------------------------------------- /neural-network/gated-recurrent-unit-RNN/gradient-descent/gru-rnn-gradient-descent.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import numpy as np\n", 10 | "import matplotlib.pyplot as plt\n", 11 | "import seaborn as sns\n", 12 | "import random\n", 13 | "import time\n", 14 | "sns.set()" 15 | ] 16 | }, 17 | { 18 | "cell_type": "code", 19 | "execution_count": 2, 20 | "metadata": {}, 21 | "outputs": [], 22 | "source": [ 23 | "def get_vocab(file, lower = False):\n", 24 | " with open(file, 'r') as fopen:\n", 25 | " data = fopen.read()\n", 26 | " if lower:\n", 27 | " data = data.lower()\n", 28 | " vocab = list(set(data))\n", 29 | " return data, vocab\n", 30 | "\n", 31 | "def embed_to_onehot(data, vocab):\n", 32 | " onehot = np.zeros((len(data), len(vocab)), dtype = np.float32)\n", 33 | " for i in range(len(data)):\n", 34 | " onehot[i, vocab.index(data[i])] = 1.0\n", 35 | " return onehot" 36 | ] 37 | }, 38 | { 39 | "cell_type": "code", 40 | "execution_count": 3, 41 | "metadata": {}, 42 | "outputs": [], 43 | "source": [ 44 | "text, text_vocab = get_vocab('consumer.h', lower = False)\n", 45 | "onehot = embed_to_onehot(text, text_vocab)" 46 | ] 47 | }, 48 | { 49 | "cell_type": "code", 50 | "execution_count": 4, 51 | "metadata": {}, 52 | "outputs": [], 53 | "source": [ 54 | "learning_rate = 0.001\n", 55 | "batch_size = 64\n", 56 | "sequence_length = 12\n", 57 | "epoch = 1000\n", 58 | "num_layers = 2\n", 59 | "size_layer = 128\n", 60 | "possible_batch_id = range(len(text) - sequence_length - 1)\n", 61 | "dimension = onehot.shape[1]\n", 62 | "\n", 63 | "U = np.random.randn(size_layer, dimension) / np.sqrt(size_layer)\n", 64 | "Wz = np.random.randn(size_layer, size_layer) / np.sqrt(size_layer)\n", 65 | "Wr = np.random.randn(size_layer, size_layer) / np.sqrt(size_layer)\n", 66 | "Wh = np.random.randn(size_layer, size_layer) / np.sqrt(size_layer)\n", 67 | "V = np.random.randn(dimension, size_layer) / np.sqrt(dimension)" 68 | ] 69 | }, 70 | { 71 | "cell_type": "code", 72 | "execution_count": 5, 73 | "metadata": {}, 74 | "outputs": [], 75 | "source": [ 76 | "def tanh(x, grad=False):\n", 77 | " if grad:\n", 78 | " output = np.tanh(x)\n", 79 | " return (1.0 - np.square(output))\n", 80 | " else:\n", 81 | " return np.tanh(x)\n", 82 | " \n", 83 | "def sigmoid(x, grad=False):\n", 84 | " if grad:\n", 85 | " return sigmoid(x) * (1 - sigmoid(x))\n", 86 | " else:\n", 87 | " return 1 / (1 + np.exp(-x))\n", 88 | " \n", 89 | "def softmax(x):\n", 90 | " exp_scores = np.exp(x - np.max(x))\n", 91 | " return exp_scores / (np.sum(exp_scores, axis=1, keepdims=True) + 1e-8)\n", 92 | "\n", 93 | "def derivative_softmax_cross_entropy(x, y):\n", 94 | " delta = softmax(x)\n", 95 | " delta[range(X.shape[0]), y] -= 1\n", 96 | " return delta\n", 97 | "\n", 98 | "def forward_multiply_gate(w, x):\n", 99 | " return np.dot(w, x)\n", 100 | "\n", 101 | "def backward_multiply_gate(w, x, dz):\n", 102 | " dW = np.dot(dz.T, x)\n", 103 | " dx = np.dot(w.T, dz.T)\n", 104 | " return dW, dx\n", 105 | "\n", 106 | "def forward_add_gate(x1, x2):\n", 107 | " return x1 + x2\n", 108 | "\n", 109 | "def backward_add_gate(x1, x2, dz):\n", 110 | " dx1 = dz * np.ones_like(x1)\n", 111 | " dx2 = dz * np.ones_like(x2)\n", 112 | " return dx1, dx2\n", 113 | "\n", 114 | "def cross_entropy(Y_hat, Y, epsilon=1e-12):\n", 115 | " Y_hat = np.clip(Y_hat, epsilon, 1. - epsilon)\n", 116 | " N = Y_hat.shape[0]\n", 117 | " return -np.sum(np.sum(Y * np.log(Y_hat+1e-9))) / N\n", 118 | "\n", 119 | "def forward_recurrent(x, h_state, U, Wz, Wr, Wh, V):\n", 120 | " mul_u = forward_multiply_gate(x, U.T)\n", 121 | " mul_Wz = forward_multiply_gate(h_state, Wz.T)\n", 122 | " add_Wz = forward_add_gate(mul_u, mul_Wz)\n", 123 | " z = sigmoid(add_Wz)\n", 124 | " mul_Wr = forward_multiply_gate(h_state, Wr.T)\n", 125 | " add_Wr = forward_add_gate(mul_u, mul_Wr)\n", 126 | " r = sigmoid(add_Wr)\n", 127 | " mul_Wh = forward_multiply_gate(h_state * r, Wh.T)\n", 128 | " add_Wh = forward_add_gate(mul_u, mul_Wh)\n", 129 | " h_hat = tanh(add_Wh)\n", 130 | " h = (1 - z) * h_state + z * h_hat\n", 131 | " mul_v = forward_multiply_gate(h, V.T)\n", 132 | " return (mul_u, mul_Wz, add_Wz, z, mul_Wr, add_Wr, r, mul_Wh, add_Wh, h_hat, h, mul_v)\n", 133 | "\n", 134 | "def backward_recurrent(x, h_state, U, Wz, Wr, Wh, V, d_mul_v, saved_graph):\n", 135 | " mul_u, mul_Wz, add_Wz, z, mul_Wr, add_Wr, r, mul_Wh, add_Wh, h_hat, h, mul_v = saved_graph\n", 136 | " dV, dh = backward_multiply_gate(V, h, d_mul_v)\n", 137 | " dh_hat = z * dh.T\n", 138 | " dadd_Wh = tanh(add_Wh, True) * dh_hat\n", 139 | " dmul_u1, dmul_Wh = backward_add_gate(mul_u, mul_Wh, dadd_Wh)\n", 140 | " dWh, dprev_state = backward_multiply_gate(Wh, h_state * r, dmul_Wh)\n", 141 | " dr = dprev_state * h_state.T\n", 142 | " dadd_Wr = sigmoid(add_Wr, True) * dr.T\n", 143 | " dmul_u2, dmul_Wr = backward_add_gate(mul_u, mul_Wr, dadd_Wr)\n", 144 | " dWr, dprev_state = backward_multiply_gate(Wr, h_state, dmul_Wr)\n", 145 | " dz = -h_state + h_hat\n", 146 | " dadd_Wz = sigmoid(add_Wz, True) * dz\n", 147 | " dmul_u3, dmul_Wz = backward_add_gate(mul_u, mul_Wz, dadd_Wz)\n", 148 | " dWz, dprev_state = backward_multiply_gate(Wz, h_state, dmul_Wz)\n", 149 | " dU, dx = backward_multiply_gate(U, x, dmul_u3)\n", 150 | " return (dU, dWz, dWr, dWh, dV)" 151 | ] 152 | }, 153 | { 154 | "cell_type": "code", 155 | "execution_count": 6, 156 | "metadata": {}, 157 | "outputs": [ 158 | { 159 | "name": "stdout", 160 | "output_type": "stream", 161 | "text": [ 162 | "epoch 50, loss 4.220839, accuracy 0.223958\n", 163 | "epoch 100, loss 4.151175, accuracy 0.196615\n", 164 | "epoch 150, loss 3.968643, accuracy 0.158854\n", 165 | "epoch 200, loss 4.017142, accuracy 0.191406\n", 166 | "epoch 250, loss 4.093218, accuracy 0.182292\n", 167 | "epoch 300, loss 4.048298, accuracy 0.134115\n", 168 | "epoch 350, loss 3.878078, accuracy 0.130208\n", 169 | "epoch 400, loss 3.772771, accuracy 0.085938\n", 170 | "epoch 450, loss 3.726045, accuracy 0.106771\n", 171 | "epoch 500, loss 3.694129, accuracy 0.109375\n", 172 | "epoch 550, loss 3.727348, accuracy 0.075521\n", 173 | "epoch 600, loss 3.521524, accuracy 0.095052\n", 174 | "epoch 650, loss 3.571376, accuracy 0.106771\n", 175 | "epoch 700, loss 3.573480, accuracy 0.085938\n", 176 | "epoch 750, loss 3.590403, accuracy 0.123698\n", 177 | "epoch 800, loss 3.614294, accuracy 0.111979\n", 178 | "epoch 850, loss 3.564591, accuracy 0.123698\n", 179 | "epoch 900, loss 3.591091, accuracy 0.111979\n", 180 | "epoch 950, loss 3.543047, accuracy 0.105469\n", 181 | "epoch 1000, loss 3.542467, accuracy 0.102865\n" 182 | ] 183 | } 184 | ], 185 | "source": [ 186 | "for i in range(epoch):\n", 187 | " batch_x = np.zeros((batch_size, sequence_length, dimension))\n", 188 | " batch_y = np.zeros((batch_size, sequence_length, dimension))\n", 189 | " batch_id = random.sample(possible_batch_id, batch_size)\n", 190 | " prev_h = np.zeros((batch_size, size_layer))\n", 191 | " for n in range(sequence_length):\n", 192 | " id1 = [k + n for k in batch_id]\n", 193 | " id2 = [k + n + 1 for k in batch_id]\n", 194 | " batch_x[:,n,:] = onehot[id1, :]\n", 195 | " batch_y[:,n,:] = onehot[id2, :]\n", 196 | " layers = []\n", 197 | " out_logits = np.zeros((batch_size, sequence_length, dimension))\n", 198 | " for n in range(sequence_length):\n", 199 | " layers.append(forward_recurrent(batch_x[:,n,:], prev_h, U, Wz, Wr, Wh, V))\n", 200 | " prev_h = layers[-1][-2]\n", 201 | " out_logits[:, n, :] = layers[-1][-1]\n", 202 | " probs = softmax(out_logits.reshape((-1, dimension)))\n", 203 | " y = np.argmax(batch_y.reshape((-1, dimension)),axis=1)\n", 204 | " accuracy = np.mean(np.argmax(probs,axis=1) == y)\n", 205 | " loss = cross_entropy(probs, batch_y.reshape((-1, dimension)))\n", 206 | " delta = probs\n", 207 | " delta[range(y.shape[0]), y] -= 1\n", 208 | " delta = delta.reshape((batch_size, sequence_length, dimension))\n", 209 | " dU = np.zeros(U.shape)\n", 210 | " dV = np.zeros(V.shape)\n", 211 | " dWz = np.zeros(Wz.shape)\n", 212 | " dWr = np.zeros(Wr.shape)\n", 213 | " dWh = np.zeros(Wh.shape)\n", 214 | " prev_h = np.zeros((batch_size, size_layer))\n", 215 | " for n in range(sequence_length):\n", 216 | " d_mul_v = delta[:, n, :]\n", 217 | " dU_t, dWz_t, dWr_t, dWh_t, dV_t = backward_recurrent(batch_x[:,n,:], prev_h, \n", 218 | " U, Wz, Wr, Wh, V, d_mul_v, layers[n])\n", 219 | " prev_h = layers[n][-2]\n", 220 | " dU += dU_t\n", 221 | " dV += dV_t\n", 222 | " dWz += dWz_t\n", 223 | " dWr += dWr_t\n", 224 | " dWh += dWh_t\n", 225 | " U -= learning_rate * dU\n", 226 | " V -= learning_rate * dV\n", 227 | " Wz -= learning_rate * dWz\n", 228 | " Wr -= learning_rate * dWr\n", 229 | " Wh -= learning_rate * dWh\n", 230 | " if (i+1) % 50 == 0:\n", 231 | " print('epoch %d, loss %f, accuracy %f'%(i+1, loss, accuracy))" 232 | ] 233 | }, 234 | { 235 | "cell_type": "code", 236 | "execution_count": null, 237 | "metadata": {}, 238 | "outputs": [], 239 | "source": [] 240 | } 241 | ], 242 | "metadata": { 243 | "kernelspec": { 244 | "display_name": "Python 3", 245 | "language": "python", 246 | "name": "python3" 247 | }, 248 | "language_info": { 249 | "codemirror_mode": { 250 | "name": "ipython", 251 | "version": 3 252 | }, 253 | "file_extension": ".py", 254 | "mimetype": "text/x-python", 255 | "name": "python", 256 | "nbconvert_exporter": "python", 257 | "pygments_lexer": "ipython3", 258 | "version": "3.5.2" 259 | } 260 | }, 261 | "nbformat": 4, 262 | "nbformat_minor": 2 263 | } 264 | -------------------------------------------------------------------------------- /neural-network/long-short-term-RNN/README.md: -------------------------------------------------------------------------------- 1 | ### All models train on predicting character sequence from consumer.h 2 | -------------------------------------------------------------------------------- /neural-network/regularizarion/animation-l1-iris.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huseinzol05/Machine-Learning-Numpy/ae8f9266a87ffd3f67471a97bf183b82740b7deb/neural-network/regularizarion/animation-l1-iris.gif -------------------------------------------------------------------------------- /neural-network/regularizarion/animation-l1l2-iris.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huseinzol05/Machine-Learning-Numpy/ae8f9266a87ffd3f67471a97bf183b82740b7deb/neural-network/regularizarion/animation-l1l2-iris.gif -------------------------------------------------------------------------------- /neural-network/regularizarion/animation-l2-iris.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huseinzol05/Machine-Learning-Numpy/ae8f9266a87ffd3f67471a97bf183b82740b7deb/neural-network/regularizarion/animation-l2-iris.gif -------------------------------------------------------------------------------- /neural-network/vanilla-RNN/README.md: -------------------------------------------------------------------------------- 1 | ### All models train on predicting character sequence from consumer.h 2 | -------------------------------------------------------------------------------- /neural-network/vanilla-RNN/adagrad/vanilla-rnn-adagrad.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import numpy as np\n", 10 | "import matplotlib.pyplot as plt\n", 11 | "import seaborn as sns\n", 12 | "import random\n", 13 | "import time\n", 14 | "sns.set()" 15 | ] 16 | }, 17 | { 18 | "cell_type": "code", 19 | "execution_count": 2, 20 | "metadata": {}, 21 | "outputs": [], 22 | "source": [ 23 | "def get_vocab(file, lower = False):\n", 24 | " with open(file, 'r') as fopen:\n", 25 | " data = fopen.read()\n", 26 | " if lower:\n", 27 | " data = data.lower()\n", 28 | " vocab = list(set(data))\n", 29 | " return data, vocab\n", 30 | "\n", 31 | "def embed_to_onehot(data, vocab):\n", 32 | " onehot = np.zeros((len(data), len(vocab)), dtype = np.float32)\n", 33 | " for i in range(len(data)):\n", 34 | " onehot[i, vocab.index(data[i])] = 1.0\n", 35 | " return onehot" 36 | ] 37 | }, 38 | { 39 | "cell_type": "code", 40 | "execution_count": 3, 41 | "metadata": {}, 42 | "outputs": [], 43 | "source": [ 44 | "text, text_vocab = get_vocab('consumer.h', lower = False)\n", 45 | "onehot = embed_to_onehot(text, text_vocab)" 46 | ] 47 | }, 48 | { 49 | "cell_type": "code", 50 | "execution_count": 4, 51 | "metadata": {}, 52 | "outputs": [], 53 | "source": [ 54 | "learning_rate = 0.01\n", 55 | "batch_size = 64\n", 56 | "sequence_length = 12\n", 57 | "epoch = 1000\n", 58 | "num_layers = 2\n", 59 | "size_layer = 128\n", 60 | "possible_batch_id = range(len(text) - sequence_length - 1)\n", 61 | "dimension = onehot.shape[1]\n", 62 | "epsilon = 1e-8\n", 63 | "\n", 64 | "U = np.random.randn(size_layer, dimension) / np.sqrt(size_layer)\n", 65 | "U_g = np.zeros(U.shape)\n", 66 | "W = np.random.randn(size_layer, size_layer) / np.sqrt(size_layer)\n", 67 | "W_g = np.zeros(W.shape)\n", 68 | "V = np.random.randn(dimension, size_layer) / np.sqrt(dimension)\n", 69 | "V_g = np.zeros(V.shape)" 70 | ] 71 | }, 72 | { 73 | "cell_type": "code", 74 | "execution_count": 5, 75 | "metadata": {}, 76 | "outputs": [], 77 | "source": [ 78 | "def tanh(x, grad=False):\n", 79 | " if grad:\n", 80 | " output = np.tanh(x)\n", 81 | " return (1.0 - np.square(output))\n", 82 | " else:\n", 83 | " return np.tanh(x)\n", 84 | " \n", 85 | "def softmax(x):\n", 86 | " exp_scores = np.exp(x - np.max(x))\n", 87 | " return exp_scores / (np.sum(exp_scores, axis=1, keepdims=True) + 1e-8)\n", 88 | "\n", 89 | "def derivative_softmax_cross_entropy(x, y):\n", 90 | " delta = softmax(x)\n", 91 | " delta[range(X.shape[0]), y] -= 1\n", 92 | " return delta\n", 93 | "\n", 94 | "def forward_multiply_gate(w, x):\n", 95 | " return np.dot(w, x)\n", 96 | "\n", 97 | "def backward_multiply_gate(w, x, dz):\n", 98 | " dW = np.dot(dz.T, x)\n", 99 | " dx = np.dot(w.T, dz.T)\n", 100 | " return dW, dx\n", 101 | "\n", 102 | "def forward_add_gate(x1, x2):\n", 103 | " return x1 + x2\n", 104 | "\n", 105 | "def backward_add_gate(x1, x2, dz):\n", 106 | " dx1 = dz * np.ones_like(x1)\n", 107 | " dx2 = dz * np.ones_like(x2)\n", 108 | " return dx1, dx2\n", 109 | "\n", 110 | "def cross_entropy(Y_hat, Y, epsilon=1e-12):\n", 111 | " Y_hat = np.clip(Y_hat, epsilon, 1. - epsilon)\n", 112 | " N = Y_hat.shape[0]\n", 113 | " return -np.sum(np.sum(Y * np.log(Y_hat+1e-9))) / N\n", 114 | "\n", 115 | "def forward_recurrent(x, prev_state, U, W, V):\n", 116 | " mul_u = forward_multiply_gate(x, U.T)\n", 117 | " mul_w = forward_multiply_gate(prev_state, W.T)\n", 118 | " add_previous_now = forward_add_gate(mul_u, mul_w)\n", 119 | " current_state = tanh(add_previous_now)\n", 120 | " mul_v = forward_multiply_gate(current_state, V.T)\n", 121 | " return (mul_u, mul_w, add_previous_now, current_state, mul_v)\n", 122 | "\n", 123 | "def backward_recurrent(x, prev_state, U, W, V, d_mul_v, saved_graph):\n", 124 | " mul_u, mul_w, add_previous_now, current_state, mul_v = saved_graph\n", 125 | " dV, dcurrent_state = backward_multiply_gate(V, current_state, d_mul_v)\n", 126 | " dadd_previous_now = tanh(add_previous_now, True) * dcurrent_state.T\n", 127 | " dmul_w, dmul_u = backward_add_gate(mul_w, mul_u, dadd_previous_now)\n", 128 | " dW, dprev_state = backward_multiply_gate(W, prev_state, dmul_w)\n", 129 | " dU, dx = backward_multiply_gate(U, x, dmul_u)\n", 130 | " return (dprev_state, dU, dW, dV)" 131 | ] 132 | }, 133 | { 134 | "cell_type": "code", 135 | "execution_count": 6, 136 | "metadata": {}, 137 | "outputs": [ 138 | { 139 | "name": "stdout", 140 | "output_type": "stream", 141 | "text": [ 142 | "epoch 50, loss 2.192045, accuracy 0.490885\n", 143 | "epoch 100, loss 1.801595, accuracy 0.571615\n", 144 | "epoch 150, loss 1.777153, accuracy 0.559896\n", 145 | "epoch 200, loss 1.680624, accuracy 0.583333\n", 146 | "epoch 250, loss 1.381813, accuracy 0.658854\n", 147 | "epoch 300, loss 1.309808, accuracy 0.673177\n", 148 | "epoch 350, loss 1.257366, accuracy 0.684896\n", 149 | "epoch 400, loss 1.152840, accuracy 0.718750\n", 150 | "epoch 450, loss 1.214343, accuracy 0.675781\n", 151 | "epoch 500, loss 1.187885, accuracy 0.690104\n", 152 | "epoch 550, loss 1.129081, accuracy 0.725260\n", 153 | "epoch 600, loss 1.072351, accuracy 0.718750\n", 154 | "epoch 650, loss 1.180679, accuracy 0.701823\n", 155 | "epoch 700, loss 1.060361, accuracy 0.743490\n", 156 | "epoch 750, loss 1.124037, accuracy 0.720052\n", 157 | "epoch 800, loss 1.187253, accuracy 0.712240\n", 158 | "epoch 850, loss 1.157658, accuracy 0.717448\n", 159 | "epoch 900, loss 0.906449, accuracy 0.772135\n", 160 | "epoch 950, loss 1.138607, accuracy 0.714844\n", 161 | "epoch 1000, loss 0.915181, accuracy 0.751302\n" 162 | ] 163 | } 164 | ], 165 | "source": [ 166 | "for i in range(epoch):\n", 167 | " batch_x = np.zeros((batch_size, sequence_length, dimension))\n", 168 | " batch_y = np.zeros((batch_size, sequence_length, dimension))\n", 169 | " batch_id = random.sample(possible_batch_id, batch_size)\n", 170 | " prev_s = np.zeros((batch_size, size_layer))\n", 171 | " for n in range(sequence_length):\n", 172 | " id1 = [k + n for k in batch_id]\n", 173 | " id2 = [k + n + 1 for k in batch_id]\n", 174 | " batch_x[:,n,:] = onehot[id1, :]\n", 175 | " batch_y[:,n,:] = onehot[id2, :]\n", 176 | " layers = []\n", 177 | " out_logits = np.zeros((batch_size, sequence_length, dimension))\n", 178 | " for n in range(sequence_length):\n", 179 | " layers.append(forward_recurrent(batch_x[:,n,:], prev_s, U, W, V))\n", 180 | " prev_s = layers[-1][3]\n", 181 | " out_logits[:, n, :] = layers[-1][-1]\n", 182 | " probs = softmax(out_logits.reshape((-1, dimension)))\n", 183 | " y = np.argmax(batch_y.reshape((-1, dimension)),axis=1)\n", 184 | " accuracy = np.mean(np.argmax(probs,axis=1) == y)\n", 185 | " loss = cross_entropy(probs, batch_y.reshape((-1, dimension)))\n", 186 | " delta = probs\n", 187 | " delta[range(y.shape[0]), y] -= 1\n", 188 | " delta = delta.reshape((batch_size, sequence_length, dimension))\n", 189 | " dU = np.zeros(U.shape)\n", 190 | " dV = np.zeros(V.shape)\n", 191 | " dW = np.zeros(W.shape)\n", 192 | " prev_state = np.zeros((batch_size, size_layer))\n", 193 | " for n in range(sequence_length):\n", 194 | " d_mul_v = delta[:, n, :]\n", 195 | " dprev_s, dU_t, dW_t, dV_t = backward_recurrent(batch_x[:,n,:], prev_state, U, W, V, d_mul_v, layers[n])\n", 196 | " prev_state = layers[n][3]\n", 197 | " dV += dV_t\n", 198 | " dU += dU_t\n", 199 | " dW += dW_t\n", 200 | " U_g += dU ** 2\n", 201 | " U += -learning_rate * dU / np.sqrt(U_g + epsilon)\n", 202 | " V_g += dV ** 2\n", 203 | " V += -learning_rate * dV / np.sqrt(V_g + epsilon)\n", 204 | " W_g += dW ** 2\n", 205 | " W += -learning_rate * dW / np.sqrt(W_g + epsilon)\n", 206 | " if (i+1) % 50 == 0:\n", 207 | " print('epoch %d, loss %f, accuracy %f'%(i+1, loss, accuracy))" 208 | ] 209 | }, 210 | { 211 | "cell_type": "code", 212 | "execution_count": null, 213 | "metadata": {}, 214 | "outputs": [], 215 | "source": [] 216 | } 217 | ], 218 | "metadata": { 219 | "kernelspec": { 220 | "display_name": "Python 3", 221 | "language": "python", 222 | "name": "python3" 223 | }, 224 | "language_info": { 225 | "codemirror_mode": { 226 | "name": "ipython", 227 | "version": 3 228 | }, 229 | "file_extension": ".py", 230 | "mimetype": "text/x-python", 231 | "name": "python", 232 | "nbconvert_exporter": "python", 233 | "pygments_lexer": "ipython3", 234 | "version": "3.5.2" 235 | } 236 | }, 237 | "nbformat": 4, 238 | "nbformat_minor": 2 239 | } 240 | -------------------------------------------------------------------------------- /neural-network/vanilla-RNN/adam/vanilla-rnn-adam.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import numpy as np\n", 10 | "import matplotlib.pyplot as plt\n", 11 | "import seaborn as sns\n", 12 | "import random\n", 13 | "import time\n", 14 | "sns.set()" 15 | ] 16 | }, 17 | { 18 | "cell_type": "code", 19 | "execution_count": 2, 20 | "metadata": {}, 21 | "outputs": [], 22 | "source": [ 23 | "def get_vocab(file, lower = False):\n", 24 | " with open(file, 'r') as fopen:\n", 25 | " data = fopen.read()\n", 26 | " if lower:\n", 27 | " data = data.lower()\n", 28 | " vocab = list(set(data))\n", 29 | " return data, vocab\n", 30 | "\n", 31 | "def embed_to_onehot(data, vocab):\n", 32 | " onehot = np.zeros((len(data), len(vocab)), dtype = np.float32)\n", 33 | " for i in range(len(data)):\n", 34 | " onehot[i, vocab.index(data[i])] = 1.0\n", 35 | " return onehot" 36 | ] 37 | }, 38 | { 39 | "cell_type": "code", 40 | "execution_count": 3, 41 | "metadata": {}, 42 | "outputs": [], 43 | "source": [ 44 | "text, text_vocab = get_vocab('consumer.h', lower = False)\n", 45 | "onehot = embed_to_onehot(text, text_vocab)" 46 | ] 47 | }, 48 | { 49 | "cell_type": "code", 50 | "execution_count": 4, 51 | "metadata": {}, 52 | "outputs": [], 53 | "source": [ 54 | "learning_rate = 1e-8\n", 55 | "batch_size = 64\n", 56 | "sequence_length = 12\n", 57 | "epoch = 1000\n", 58 | "num_layers = 2\n", 59 | "size_layer = 128\n", 60 | "possible_batch_id = range(len(text) - sequence_length - 1)\n", 61 | "dimension = onehot.shape[1]\n", 62 | "epsilon = 1e-8\n", 63 | "beta1 = 0.9\n", 64 | "beta2 = 0.999\n", 65 | "\n", 66 | "U = np.random.randn(size_layer, dimension) / np.sqrt(size_layer)\n", 67 | "U_g = np.zeros(U.shape)\n", 68 | "U_g_2 = np.zeros(U.shape)\n", 69 | "W = np.random.randn(size_layer, size_layer) / np.sqrt(size_layer)\n", 70 | "W_g = np.zeros(W.shape)\n", 71 | "W_g_2 = np.zeros(W.shape)\n", 72 | "V = np.random.randn(dimension, size_layer) / np.sqrt(dimension)\n", 73 | "V_g = np.zeros(V.shape)\n", 74 | "V_g_2 = np.zeros(V.shape)" 75 | ] 76 | }, 77 | { 78 | "cell_type": "code", 79 | "execution_count": 5, 80 | "metadata": {}, 81 | "outputs": [], 82 | "source": [ 83 | "def tanh(x, grad=False):\n", 84 | " if grad:\n", 85 | " output = np.tanh(x)\n", 86 | " return (1.0 - np.square(output))\n", 87 | " else:\n", 88 | " return np.tanh(x)\n", 89 | " \n", 90 | "def softmax(x):\n", 91 | " exp_scores = np.exp(x - np.max(x))\n", 92 | " return exp_scores / (np.sum(exp_scores, axis=1, keepdims=True) + 1e-8)\n", 93 | "\n", 94 | "def derivative_softmax_cross_entropy(x, y):\n", 95 | " delta = softmax(x)\n", 96 | " delta[range(X.shape[0]), y] -= 1\n", 97 | " return delta\n", 98 | "\n", 99 | "def forward_multiply_gate(w, x):\n", 100 | " return np.dot(w, x)\n", 101 | "\n", 102 | "def backward_multiply_gate(w, x, dz):\n", 103 | " dW = np.dot(dz.T, x)\n", 104 | " dx = np.dot(w.T, dz.T)\n", 105 | " return dW, dx\n", 106 | "\n", 107 | "def forward_add_gate(x1, x2):\n", 108 | " return x1 + x2\n", 109 | "\n", 110 | "def backward_add_gate(x1, x2, dz):\n", 111 | " dx1 = dz * np.ones_like(x1)\n", 112 | " dx2 = dz * np.ones_like(x2)\n", 113 | " return dx1, dx2\n", 114 | "\n", 115 | "def cross_entropy(Y_hat, Y, epsilon=1e-12):\n", 116 | " Y_hat = np.clip(Y_hat, epsilon, 1. - epsilon)\n", 117 | " N = Y_hat.shape[0]\n", 118 | " return -np.sum(np.sum(Y * np.log(Y_hat+1e-9))) / N\n", 119 | "\n", 120 | "def forward_recurrent(x, prev_state, U, W, V):\n", 121 | " mul_u = forward_multiply_gate(x, U.T)\n", 122 | " mul_w = forward_multiply_gate(prev_state, W.T)\n", 123 | " add_previous_now = forward_add_gate(mul_u, mul_w)\n", 124 | " current_state = tanh(add_previous_now)\n", 125 | " mul_v = forward_multiply_gate(current_state, V.T)\n", 126 | " return (mul_u, mul_w, add_previous_now, current_state, mul_v)\n", 127 | "\n", 128 | "def backward_recurrent(x, prev_state, U, W, V, d_mul_v, saved_graph):\n", 129 | " mul_u, mul_w, add_previous_now, current_state, mul_v = saved_graph\n", 130 | " dV, dcurrent_state = backward_multiply_gate(V, current_state, d_mul_v)\n", 131 | " dadd_previous_now = tanh(add_previous_now, True) * dcurrent_state.T\n", 132 | " dmul_w, dmul_u = backward_add_gate(mul_w, mul_u, dadd_previous_now)\n", 133 | " dW, dprev_state = backward_multiply_gate(W, prev_state, dmul_w)\n", 134 | " dU, dx = backward_multiply_gate(U, x, dmul_u)\n", 135 | " return (dprev_state, dU, dW, dV)" 136 | ] 137 | }, 138 | { 139 | "cell_type": "code", 140 | "execution_count": 6, 141 | "metadata": {}, 142 | "outputs": [ 143 | { 144 | "name": "stdout", 145 | "output_type": "stream", 146 | "text": [ 147 | "epoch 50, loss 3.820414, accuracy 0.217448\n", 148 | "epoch 100, loss 20.722266, accuracy 0.000000\n", 149 | "epoch 150, loss 20.722266, accuracy 0.001302\n", 150 | "epoch 200, loss 20.722266, accuracy 0.000000\n", 151 | "epoch 250, loss 20.722266, accuracy 0.001302\n", 152 | "epoch 300, loss 20.722266, accuracy 0.001302\n", 153 | "epoch 350, loss 20.722266, accuracy 0.001302\n", 154 | "epoch 400, loss 20.722266, accuracy 0.000000\n", 155 | "epoch 450, loss 20.722266, accuracy 0.000000\n", 156 | "epoch 500, loss 20.722266, accuracy 0.000000\n", 157 | "epoch 550, loss 20.722266, accuracy 0.000000\n", 158 | "epoch 600, loss 20.722266, accuracy 0.000000\n", 159 | "epoch 650, loss 20.722266, accuracy 0.000000\n", 160 | "epoch 700, loss 20.722266, accuracy 0.001302\n", 161 | "epoch 750, loss 20.722266, accuracy 0.000000\n", 162 | "epoch 800, loss 20.722266, accuracy 0.000000\n", 163 | "epoch 850, loss 20.722266, accuracy 0.000000\n", 164 | "epoch 900, loss 20.722266, accuracy 0.001302\n", 165 | "epoch 950, loss 20.722266, accuracy 0.000000\n", 166 | "epoch 1000, loss 20.722266, accuracy 0.000000\n" 167 | ] 168 | } 169 | ], 170 | "source": [ 171 | "for i in range(epoch):\n", 172 | " batch_x = np.zeros((batch_size, sequence_length, dimension))\n", 173 | " batch_y = np.zeros((batch_size, sequence_length, dimension))\n", 174 | " batch_id = random.sample(possible_batch_id, batch_size)\n", 175 | " prev_s = np.zeros((batch_size, size_layer))\n", 176 | " for n in range(sequence_length):\n", 177 | " id1 = [k + n for k in batch_id]\n", 178 | " id2 = [k + n + 1 for k in batch_id]\n", 179 | " batch_x[:,n,:] = onehot[id1, :]\n", 180 | " batch_y[:,n,:] = onehot[id2, :]\n", 181 | " layers = []\n", 182 | " out_logits = np.zeros((batch_size, sequence_length, dimension))\n", 183 | " for n in range(sequence_length):\n", 184 | " layers.append(forward_recurrent(batch_x[:,n,:], prev_s, U, W, V))\n", 185 | " prev_s = layers[-1][3]\n", 186 | " out_logits[:, n, :] = layers[-1][-1]\n", 187 | " probs = softmax(out_logits.reshape((-1, dimension)))\n", 188 | " y = np.argmax(batch_y.reshape((-1, dimension)),axis=1)\n", 189 | " accuracy = np.mean(np.argmax(probs,axis=1) == y)\n", 190 | " loss = cross_entropy(probs, batch_y.reshape((-1, dimension)))\n", 191 | " delta = probs\n", 192 | " delta[range(y.shape[0]), y] -= 1\n", 193 | " delta = delta.reshape((batch_size, sequence_length, dimension))\n", 194 | " dU = np.zeros(U.shape)\n", 195 | " dV = np.zeros(V.shape)\n", 196 | " dW = np.zeros(W.shape)\n", 197 | " prev_state = np.zeros((batch_size, size_layer))\n", 198 | " for n in range(sequence_length):\n", 199 | " d_mul_v = delta[:, n, :]\n", 200 | " dprev_s, dU_t, dW_t, dV_t = backward_recurrent(batch_x[:,n,:], prev_state, U, W, V, d_mul_v, layers[n])\n", 201 | " prev_state = layers[n][3]\n", 202 | " dV += dV_t\n", 203 | " dU += dU_t\n", 204 | " dW += dW_t\n", 205 | " U_g += beta1 * U_g + (1-beta1) * dU\n", 206 | " g_hat = U_g / (1-beta1)\n", 207 | " U_g_2 += beta2 * U_g_2 + (1-beta2) * np.square(dU)\n", 208 | " g2_hat = U_g_2 / (1-beta2)\n", 209 | " U += -learning_rate * g_hat / np.sqrt(g2_hat + epsilon)\n", 210 | " \n", 211 | " V_g += beta1 * V_g + (1-beta1) * dV\n", 212 | " g_hat = V_g / (1-beta1)\n", 213 | " V_g_2 += beta2 * V_g_2 + (1-beta2) * np.square(dV)\n", 214 | " g2_hat = V_g_2 / (1-beta2)\n", 215 | " V += -learning_rate * g_hat / np.sqrt(g2_hat + epsilon)\n", 216 | " \n", 217 | " W_g += beta1 * W_g + (1-beta1) * dW\n", 218 | " g_hat = W_g / (1-beta1)\n", 219 | " W_g_2 += beta2 * W_g_2 + (1-beta2) * np.square(dW)\n", 220 | " g2_hat = W_g_2 / (1-beta2)\n", 221 | " W += -learning_rate * g_hat / np.sqrt(g2_hat + epsilon)\n", 222 | " if (i+1) % 50 == 0:\n", 223 | " print('epoch %d, loss %f, accuracy %f'%(i+1, loss, accuracy))" 224 | ] 225 | }, 226 | { 227 | "cell_type": "code", 228 | "execution_count": null, 229 | "metadata": {}, 230 | "outputs": [], 231 | "source": [] 232 | } 233 | ], 234 | "metadata": { 235 | "kernelspec": { 236 | "display_name": "Python 3", 237 | "language": "python", 238 | "name": "python3" 239 | }, 240 | "language_info": { 241 | "codemirror_mode": { 242 | "name": "ipython", 243 | "version": 3 244 | }, 245 | "file_extension": ".py", 246 | "mimetype": "text/x-python", 247 | "name": "python", 248 | "nbconvert_exporter": "python", 249 | "pygments_lexer": "ipython3", 250 | "version": "3.5.2" 251 | } 252 | }, 253 | "nbformat": 4, 254 | "nbformat_minor": 2 255 | } 256 | -------------------------------------------------------------------------------- /neural-network/vanilla-RNN/gradient-descent/vanilla-rnn-gradient-descent.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import numpy as np\n", 10 | "import matplotlib.pyplot as plt\n", 11 | "import seaborn as sns\n", 12 | "import random\n", 13 | "import time\n", 14 | "sns.set()" 15 | ] 16 | }, 17 | { 18 | "cell_type": "code", 19 | "execution_count": 2, 20 | "metadata": {}, 21 | "outputs": [], 22 | "source": [ 23 | "def get_vocab(file, lower = False):\n", 24 | " with open(file, 'r') as fopen:\n", 25 | " data = fopen.read()\n", 26 | " if lower:\n", 27 | " data = data.lower()\n", 28 | " vocab = list(set(data))\n", 29 | " return data, vocab\n", 30 | "\n", 31 | "def embed_to_onehot(data, vocab):\n", 32 | " onehot = np.zeros((len(data), len(vocab)), dtype = np.float32)\n", 33 | " for i in range(len(data)):\n", 34 | " onehot[i, vocab.index(data[i])] = 1.0\n", 35 | " return onehot" 36 | ] 37 | }, 38 | { 39 | "cell_type": "code", 40 | "execution_count": 3, 41 | "metadata": {}, 42 | "outputs": [], 43 | "source": [ 44 | "text, text_vocab = get_vocab('consumer.h', lower = False)\n", 45 | "onehot = embed_to_onehot(text, text_vocab)" 46 | ] 47 | }, 48 | { 49 | "cell_type": "code", 50 | "execution_count": 4, 51 | "metadata": {}, 52 | "outputs": [], 53 | "source": [ 54 | "learning_rate = 0.0001\n", 55 | "batch_size = 64\n", 56 | "sequence_length = 12\n", 57 | "epoch = 1000\n", 58 | "num_layers = 2\n", 59 | "size_layer = 128\n", 60 | "possible_batch_id = range(len(text) - sequence_length - 1)\n", 61 | "dimension = onehot.shape[1]\n", 62 | "\n", 63 | "U = np.random.randn(size_layer, dimension) / np.sqrt(size_layer)\n", 64 | "W = np.random.randn(size_layer, size_layer) / np.sqrt(size_layer)\n", 65 | "V = np.random.randn(dimension, size_layer) / np.sqrt(dimension)" 66 | ] 67 | }, 68 | { 69 | "cell_type": "code", 70 | "execution_count": 7, 71 | "metadata": {}, 72 | "outputs": [], 73 | "source": [ 74 | "def tanh(x, grad=False):\n", 75 | " if grad:\n", 76 | " output = np.tanh(x)\n", 77 | " return (1.0 - np.square(output))\n", 78 | " else:\n", 79 | " return np.tanh(x)\n", 80 | " \n", 81 | "def softmax(x):\n", 82 | " exp_scores = np.exp(x - np.max(x))\n", 83 | " return exp_scores / (np.sum(exp_scores, axis=1, keepdims=True) + 1e-8)\n", 84 | "\n", 85 | "def derivative_softmax_cross_entropy(x, y):\n", 86 | " delta = softmax(x)\n", 87 | " delta[range(X.shape[0]), y] -= 1\n", 88 | " return delta\n", 89 | "\n", 90 | "def forward_multiply_gate(w, x):\n", 91 | " return np.dot(w, x)\n", 92 | "\n", 93 | "def backward_multiply_gate(w, x, dz):\n", 94 | " dW = np.dot(dz.T, x)\n", 95 | " dx = np.dot(w.T, dz.T)\n", 96 | " return dW, dx\n", 97 | "\n", 98 | "def forward_add_gate(x1, x2):\n", 99 | " return x1 + x2\n", 100 | "\n", 101 | "def backward_add_gate(x1, x2, dz):\n", 102 | " dx1 = dz * np.ones_like(x1)\n", 103 | " dx2 = dz * np.ones_like(x2)\n", 104 | " return dx1, dx2\n", 105 | "\n", 106 | "def cross_entropy(Y_hat, Y, epsilon=1e-12):\n", 107 | " Y_hat = np.clip(Y_hat, epsilon, 1. - epsilon)\n", 108 | " N = Y_hat.shape[0]\n", 109 | " return -np.sum(np.sum(Y * np.log(Y_hat+1e-9))) / N\n", 110 | "\n", 111 | "def forward_recurrent(x, prev_state, U, W, V):\n", 112 | " mul_u = forward_multiply_gate(x, U.T)\n", 113 | " mul_w = forward_multiply_gate(prev_state, W.T)\n", 114 | " add_previous_now = forward_add_gate(mul_u, mul_w)\n", 115 | " current_state = tanh(add_previous_now)\n", 116 | " mul_v = forward_multiply_gate(current_state, V.T)\n", 117 | " return (mul_u, mul_w, add_previous_now, current_state, mul_v)\n", 118 | "\n", 119 | "def backward_recurrent(x, prev_state, U, W, V, d_mul_v, saved_graph):\n", 120 | " mul_u, mul_w, add_previous_now, current_state, mul_v = saved_graph\n", 121 | " dV, dcurrent_state = backward_multiply_gate(V, current_state, d_mul_v)\n", 122 | " dadd_previous_now = tanh(add_previous_now, True) * dcurrent_state.T\n", 123 | " dmul_w, dmul_u = backward_add_gate(mul_w, mul_u, dadd_previous_now)\n", 124 | " dW, dprev_state = backward_multiply_gate(W, prev_state, dmul_w)\n", 125 | " dU, dx = backward_multiply_gate(U, x, dmul_u)\n", 126 | " return (dprev_state, dU, dW, dV)" 127 | ] 128 | }, 129 | { 130 | "cell_type": "code", 131 | "execution_count": 8, 132 | "metadata": {}, 133 | "outputs": [ 134 | { 135 | "name": "stdout", 136 | "output_type": "stream", 137 | "text": [ 138 | "epoch 50, loss 4.175321, accuracy 0.078125\n", 139 | "epoch 100, loss 3.883469, accuracy 0.158854\n", 140 | "epoch 150, loss 3.507442, accuracy 0.214844\n", 141 | "epoch 200, loss 3.388112, accuracy 0.273438\n", 142 | "epoch 250, loss 3.327744, accuracy 0.281250\n", 143 | "epoch 300, loss 3.042421, accuracy 0.315104\n", 144 | "epoch 350, loss 2.942040, accuracy 0.334635\n", 145 | "epoch 400, loss 3.002462, accuracy 0.294271\n", 146 | "epoch 450, loss 2.517064, accuracy 0.432292\n", 147 | "epoch 500, loss 2.653959, accuracy 0.401042\n", 148 | "epoch 550, loss 2.719194, accuracy 0.356771\n", 149 | "epoch 600, loss 2.386390, accuracy 0.444010\n", 150 | "epoch 650, loss 2.535589, accuracy 0.406250\n", 151 | "epoch 700, loss 2.578567, accuracy 0.399740\n", 152 | "epoch 750, loss 2.221211, accuracy 0.505208\n", 153 | "epoch 800, loss 2.231152, accuracy 0.490885\n", 154 | "epoch 850, loss 2.168349, accuracy 0.476562\n", 155 | "epoch 900, loss 2.021851, accuracy 0.532552\n", 156 | "epoch 950, loss 2.022622, accuracy 0.522135\n", 157 | "epoch 1000, loss 2.160469, accuracy 0.489583\n" 158 | ] 159 | } 160 | ], 161 | "source": [ 162 | "for i in range(epoch):\n", 163 | " batch_x = np.zeros((batch_size, sequence_length, dimension))\n", 164 | " batch_y = np.zeros((batch_size, sequence_length, dimension))\n", 165 | " batch_id = random.sample(possible_batch_id, batch_size)\n", 166 | " prev_s = np.zeros((batch_size, size_layer))\n", 167 | " for n in range(sequence_length):\n", 168 | " id1 = [k + n for k in batch_id]\n", 169 | " id2 = [k + n + 1 for k in batch_id]\n", 170 | " batch_x[:,n,:] = onehot[id1, :]\n", 171 | " batch_y[:,n,:] = onehot[id2, :]\n", 172 | " layers = []\n", 173 | " out_logits = np.zeros((batch_size, sequence_length, dimension))\n", 174 | " for n in range(sequence_length):\n", 175 | " layers.append(forward_recurrent(batch_x[:,n,:], prev_s, U, W, V))\n", 176 | " prev_s = layers[-1][3]\n", 177 | " out_logits[:, n, :] = layers[-1][-1]\n", 178 | " probs = softmax(out_logits.reshape((-1, dimension)))\n", 179 | " y = np.argmax(batch_y.reshape((-1, dimension)),axis=1)\n", 180 | " accuracy = np.mean(np.argmax(probs,axis=1) == y)\n", 181 | " loss = cross_entropy(probs, batch_y.reshape((-1, dimension)))\n", 182 | " delta = probs\n", 183 | " delta[range(y.shape[0]), y] -= 1\n", 184 | " delta = delta.reshape((batch_size, sequence_length, dimension))\n", 185 | " dU = np.zeros(U.shape)\n", 186 | " dV = np.zeros(V.shape)\n", 187 | " dW = np.zeros(W.shape)\n", 188 | " prev_state = np.zeros((batch_size, size_layer))\n", 189 | " for n in range(sequence_length):\n", 190 | " d_mul_v = delta[:, n, :]\n", 191 | " dprev_s, dU_t, dW_t, dV_t = backward_recurrent(batch_x[:,n,:], prev_state, U, W, V, d_mul_v, layers[n])\n", 192 | " prev_state = layers[n][3]\n", 193 | " dV += dV_t\n", 194 | " dU += dU_t\n", 195 | " dW += dW_t\n", 196 | " U -= learning_rate * dU\n", 197 | " V -= learning_rate * dV\n", 198 | " W -= learning_rate * dW\n", 199 | " if (i+1) % 50 == 0:\n", 200 | " print('epoch %d, loss %f, accuracy %f'%(i+1, loss, accuracy))" 201 | ] 202 | }, 203 | { 204 | "cell_type": "code", 205 | "execution_count": null, 206 | "metadata": {}, 207 | "outputs": [], 208 | "source": [] 209 | } 210 | ], 211 | "metadata": { 212 | "kernelspec": { 213 | "display_name": "Python 3", 214 | "language": "python", 215 | "name": "python3" 216 | }, 217 | "language_info": { 218 | "codemirror_mode": { 219 | "name": "ipython", 220 | "version": 3 221 | }, 222 | "file_extension": ".py", 223 | "mimetype": "text/x-python", 224 | "name": "python", 225 | "nbconvert_exporter": "python", 226 | "pygments_lexer": "ipython3", 227 | "version": "3.5.2" 228 | } 229 | }, 230 | "nbformat": 4, 231 | "nbformat_minor": 2 232 | } 233 | -------------------------------------------------------------------------------- /neural-network/vanilla-RNN/momentum/vanilla-rnn-momentum.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import numpy as np\n", 10 | "import matplotlib.pyplot as plt\n", 11 | "import seaborn as sns\n", 12 | "import random\n", 13 | "import time\n", 14 | "sns.set()" 15 | ] 16 | }, 17 | { 18 | "cell_type": "code", 19 | "execution_count": 2, 20 | "metadata": {}, 21 | "outputs": [], 22 | "source": [ 23 | "def get_vocab(file, lower = False):\n", 24 | " with open(file, 'r') as fopen:\n", 25 | " data = fopen.read()\n", 26 | " if lower:\n", 27 | " data = data.lower()\n", 28 | " vocab = list(set(data))\n", 29 | " return data, vocab\n", 30 | "\n", 31 | "def embed_to_onehot(data, vocab):\n", 32 | " onehot = np.zeros((len(data), len(vocab)), dtype = np.float32)\n", 33 | " for i in range(len(data)):\n", 34 | " onehot[i, vocab.index(data[i])] = 1.0\n", 35 | " return onehot" 36 | ] 37 | }, 38 | { 39 | "cell_type": "code", 40 | "execution_count": 3, 41 | "metadata": {}, 42 | "outputs": [], 43 | "source": [ 44 | "text, text_vocab = get_vocab('consumer.h', lower = False)\n", 45 | "onehot = embed_to_onehot(text, text_vocab)" 46 | ] 47 | }, 48 | { 49 | "cell_type": "code", 50 | "execution_count": 4, 51 | "metadata": {}, 52 | "outputs": [], 53 | "source": [ 54 | "learning_rate = 0.0001\n", 55 | "batch_size = 64\n", 56 | "sequence_length = 12\n", 57 | "epoch = 1000\n", 58 | "num_layers = 2\n", 59 | "size_layer = 128\n", 60 | "possible_batch_id = range(len(text) - sequence_length - 1)\n", 61 | "dimension = onehot.shape[1]\n", 62 | "momentum = 0.9\n", 63 | "\n", 64 | "U = np.random.randn(size_layer, dimension) / np.sqrt(size_layer)\n", 65 | "U_velocity = np.zeros(U.shape)\n", 66 | "W = np.random.randn(size_layer, size_layer) / np.sqrt(size_layer)\n", 67 | "W_velocity = np.zeros(W.shape)\n", 68 | "V = np.random.randn(dimension, size_layer) / np.sqrt(dimension)\n", 69 | "V_velocity = np.zeros(V.shape)" 70 | ] 71 | }, 72 | { 73 | "cell_type": "code", 74 | "execution_count": 5, 75 | "metadata": {}, 76 | "outputs": [], 77 | "source": [ 78 | "def tanh(x, grad=False):\n", 79 | " if grad:\n", 80 | " output = np.tanh(x)\n", 81 | " return (1.0 - np.square(output))\n", 82 | " else:\n", 83 | " return np.tanh(x)\n", 84 | " \n", 85 | "def softmax(x):\n", 86 | " exp_scores = np.exp(x - np.max(x))\n", 87 | " return exp_scores / (np.sum(exp_scores, axis=1, keepdims=True) + 1e-8)\n", 88 | "\n", 89 | "def derivative_softmax_cross_entropy(x, y):\n", 90 | " delta = softmax(x)\n", 91 | " delta[range(X.shape[0]), y] -= 1\n", 92 | " return delta\n", 93 | "\n", 94 | "def forward_multiply_gate(w, x):\n", 95 | " return np.dot(w, x)\n", 96 | "\n", 97 | "def backward_multiply_gate(w, x, dz):\n", 98 | " dW = np.dot(dz.T, x)\n", 99 | " dx = np.dot(w.T, dz.T)\n", 100 | " return dW, dx\n", 101 | "\n", 102 | "def forward_add_gate(x1, x2):\n", 103 | " return x1 + x2\n", 104 | "\n", 105 | "def backward_add_gate(x1, x2, dz):\n", 106 | " dx1 = dz * np.ones_like(x1)\n", 107 | " dx2 = dz * np.ones_like(x2)\n", 108 | " return dx1, dx2\n", 109 | "\n", 110 | "def cross_entropy(Y_hat, Y, epsilon=1e-12):\n", 111 | " Y_hat = np.clip(Y_hat, epsilon, 1. - epsilon)\n", 112 | " N = Y_hat.shape[0]\n", 113 | " return -np.sum(np.sum(Y * np.log(Y_hat+1e-9))) / N\n", 114 | "\n", 115 | "def forward_recurrent(x, prev_state, U, W, V):\n", 116 | " mul_u = forward_multiply_gate(x, U.T)\n", 117 | " mul_w = forward_multiply_gate(prev_state, W.T)\n", 118 | " add_previous_now = forward_add_gate(mul_u, mul_w)\n", 119 | " current_state = tanh(add_previous_now)\n", 120 | " mul_v = forward_multiply_gate(current_state, V.T)\n", 121 | " return (mul_u, mul_w, add_previous_now, current_state, mul_v)\n", 122 | "\n", 123 | "def backward_recurrent(x, prev_state, U, W, V, d_mul_v, saved_graph):\n", 124 | " mul_u, mul_w, add_previous_now, current_state, mul_v = saved_graph\n", 125 | " dV, dcurrent_state = backward_multiply_gate(V, current_state, d_mul_v)\n", 126 | " dadd_previous_now = tanh(add_previous_now, True) * dcurrent_state.T\n", 127 | " dmul_w, dmul_u = backward_add_gate(mul_w, mul_u, dadd_previous_now)\n", 128 | " dW, dprev_state = backward_multiply_gate(W, prev_state, dmul_w)\n", 129 | " dU, dx = backward_multiply_gate(U, x, dmul_u)\n", 130 | " return (dprev_state, dU, dW, dV)" 131 | ] 132 | }, 133 | { 134 | "cell_type": "code", 135 | "execution_count": 6, 136 | "metadata": {}, 137 | "outputs": [ 138 | { 139 | "name": "stdout", 140 | "output_type": "stream", 141 | "text": [ 142 | "epoch 50, loss 2.951304, accuracy 0.330729\n", 143 | "epoch 100, loss 2.396142, accuracy 0.429688\n", 144 | "epoch 150, loss 1.997829, accuracy 0.503906\n", 145 | "epoch 200, loss 1.597059, accuracy 0.632812\n", 146 | "epoch 250, loss 1.471414, accuracy 0.634115\n", 147 | "epoch 300, loss 1.261236, accuracy 0.703125\n", 148 | "epoch 350, loss 1.414094, accuracy 0.656250\n", 149 | "epoch 400, loss 1.150760, accuracy 0.697917\n", 150 | "epoch 450, loss 1.349550, accuracy 0.667969\n", 151 | "epoch 500, loss 1.144613, accuracy 0.721354\n", 152 | "epoch 550, loss 1.140830, accuracy 0.718750\n", 153 | "epoch 600, loss 1.223690, accuracy 0.694010\n", 154 | "epoch 650, loss 1.196183, accuracy 0.683594\n", 155 | "epoch 700, loss 1.091972, accuracy 0.716146\n", 156 | "epoch 750, loss 0.981222, accuracy 0.738281\n", 157 | "epoch 800, loss 1.030206, accuracy 0.738281\n", 158 | "epoch 850, loss 0.853434, accuracy 0.787760\n", 159 | "epoch 900, loss 0.988047, accuracy 0.736979\n", 160 | "epoch 950, loss 1.073020, accuracy 0.729167\n", 161 | "epoch 1000, loss 0.819446, accuracy 0.774740\n" 162 | ] 163 | } 164 | ], 165 | "source": [ 166 | "for i in range(epoch):\n", 167 | " batch_x = np.zeros((batch_size, sequence_length, dimension))\n", 168 | " batch_y = np.zeros((batch_size, sequence_length, dimension))\n", 169 | " batch_id = random.sample(possible_batch_id, batch_size)\n", 170 | " prev_s = np.zeros((batch_size, size_layer))\n", 171 | " for n in range(sequence_length):\n", 172 | " id1 = [k + n for k in batch_id]\n", 173 | " id2 = [k + n + 1 for k in batch_id]\n", 174 | " batch_x[:,n,:] = onehot[id1, :]\n", 175 | " batch_y[:,n,:] = onehot[id2, :]\n", 176 | " layers = []\n", 177 | " out_logits = np.zeros((batch_size, sequence_length, dimension))\n", 178 | " for n in range(sequence_length):\n", 179 | " layers.append(forward_recurrent(batch_x[:,n,:], prev_s, U, W, V))\n", 180 | " prev_s = layers[-1][3]\n", 181 | " out_logits[:, n, :] = layers[-1][-1]\n", 182 | " probs = softmax(out_logits.reshape((-1, dimension)))\n", 183 | " y = np.argmax(batch_y.reshape((-1, dimension)),axis=1)\n", 184 | " accuracy = np.mean(np.argmax(probs,axis=1) == y)\n", 185 | " loss = cross_entropy(probs, batch_y.reshape((-1, dimension)))\n", 186 | " delta = probs\n", 187 | " delta[range(y.shape[0]), y] -= 1\n", 188 | " delta = delta.reshape((batch_size, sequence_length, dimension))\n", 189 | " dU = np.zeros(U.shape)\n", 190 | " dV = np.zeros(V.shape)\n", 191 | " dW = np.zeros(W.shape)\n", 192 | " prev_state = np.zeros((batch_size, size_layer))\n", 193 | " for n in range(sequence_length):\n", 194 | " d_mul_v = delta[:, n, :]\n", 195 | " dprev_s, dU_t, dW_t, dV_t = backward_recurrent(batch_x[:,n,:], prev_state, U, W, V, d_mul_v, layers[n])\n", 196 | " prev_state = layers[n][3]\n", 197 | " dV += dV_t\n", 198 | " dU += dU_t\n", 199 | " dW += dW_t\n", 200 | " U_velocity = U_velocity * momentum + learning_rate * dU\n", 201 | " U -= U_velocity\n", 202 | " V_velocity = V_velocity * momentum + learning_rate * dV\n", 203 | " V -= V_velocity\n", 204 | " W_velocity = W_velocity * momentum + learning_rate * dW\n", 205 | " W -= W_velocity\n", 206 | " if (i+1) % 50 == 0:\n", 207 | " print('epoch %d, loss %f, accuracy %f'%(i+1, loss, accuracy))" 208 | ] 209 | }, 210 | { 211 | "cell_type": "code", 212 | "execution_count": null, 213 | "metadata": {}, 214 | "outputs": [], 215 | "source": [] 216 | } 217 | ], 218 | "metadata": { 219 | "kernelspec": { 220 | "display_name": "Python 3", 221 | "language": "python", 222 | "name": "python3" 223 | }, 224 | "language_info": { 225 | "codemirror_mode": { 226 | "name": "ipython", 227 | "version": 3 228 | }, 229 | "file_extension": ".py", 230 | "mimetype": "text/x-python", 231 | "name": "python", 232 | "nbconvert_exporter": "python", 233 | "pygments_lexer": "ipython3", 234 | "version": "3.5.2" 235 | } 236 | }, 237 | "nbformat": 4, 238 | "nbformat_minor": 2 239 | } 240 | -------------------------------------------------------------------------------- /neural-network/vanilla-RNN/nesterov/vanilla-rnn-nesterov.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import numpy as np\n", 10 | "import matplotlib.pyplot as plt\n", 11 | "import seaborn as sns\n", 12 | "import random\n", 13 | "import time\n", 14 | "sns.set()" 15 | ] 16 | }, 17 | { 18 | "cell_type": "code", 19 | "execution_count": 2, 20 | "metadata": {}, 21 | "outputs": [], 22 | "source": [ 23 | "def get_vocab(file, lower = False):\n", 24 | " with open(file, 'r') as fopen:\n", 25 | " data = fopen.read()\n", 26 | " if lower:\n", 27 | " data = data.lower()\n", 28 | " vocab = list(set(data))\n", 29 | " return data, vocab\n", 30 | "\n", 31 | "def embed_to_onehot(data, vocab):\n", 32 | " onehot = np.zeros((len(data), len(vocab)), dtype = np.float32)\n", 33 | " for i in range(len(data)):\n", 34 | " onehot[i, vocab.index(data[i])] = 1.0\n", 35 | " return onehot" 36 | ] 37 | }, 38 | { 39 | "cell_type": "code", 40 | "execution_count": 3, 41 | "metadata": {}, 42 | "outputs": [], 43 | "source": [ 44 | "text, text_vocab = get_vocab('consumer.h', lower = False)\n", 45 | "onehot = embed_to_onehot(text, text_vocab)" 46 | ] 47 | }, 48 | { 49 | "cell_type": "code", 50 | "execution_count": 4, 51 | "metadata": {}, 52 | "outputs": [], 53 | "source": [ 54 | "learning_rate = 0.0001\n", 55 | "batch_size = 64\n", 56 | "sequence_length = 12\n", 57 | "epoch = 1000\n", 58 | "num_layers = 2\n", 59 | "size_layer = 128\n", 60 | "possible_batch_id = range(len(text) - sequence_length - 1)\n", 61 | "dimension = onehot.shape[1]\n", 62 | "momentum = 0.9\n", 63 | "\n", 64 | "U = np.random.randn(size_layer, dimension) / np.sqrt(size_layer)\n", 65 | "U_velocity = np.zeros(U.shape)\n", 66 | "W = np.random.randn(size_layer, size_layer) / np.sqrt(size_layer)\n", 67 | "W_velocity = np.zeros(W.shape)\n", 68 | "V = np.random.randn(dimension, size_layer) / np.sqrt(dimension)\n", 69 | "V_velocity = np.zeros(V.shape)" 70 | ] 71 | }, 72 | { 73 | "cell_type": "code", 74 | "execution_count": 5, 75 | "metadata": {}, 76 | "outputs": [], 77 | "source": [ 78 | "def tanh(x, grad=False):\n", 79 | " if grad:\n", 80 | " output = np.tanh(x)\n", 81 | " return (1.0 - np.square(output))\n", 82 | " else:\n", 83 | " return np.tanh(x)\n", 84 | " \n", 85 | "def softmax(x):\n", 86 | " exp_scores = np.exp(x - np.max(x))\n", 87 | " return exp_scores / (np.sum(exp_scores, axis=1, keepdims=True) + 1e-8)\n", 88 | "\n", 89 | "def derivative_softmax_cross_entropy(x, y):\n", 90 | " delta = softmax(x)\n", 91 | " delta[range(X.shape[0]), y] -= 1\n", 92 | " return delta\n", 93 | "\n", 94 | "def forward_multiply_gate(w, x):\n", 95 | " return np.dot(w, x)\n", 96 | "\n", 97 | "def backward_multiply_gate(w, x, dz):\n", 98 | " dW = np.dot(dz.T, x)\n", 99 | " dx = np.dot(w.T, dz.T)\n", 100 | " return dW, dx\n", 101 | "\n", 102 | "def forward_add_gate(x1, x2):\n", 103 | " return x1 + x2\n", 104 | "\n", 105 | "def backward_add_gate(x1, x2, dz):\n", 106 | " dx1 = dz * np.ones_like(x1)\n", 107 | " dx2 = dz * np.ones_like(x2)\n", 108 | " return dx1, dx2\n", 109 | "\n", 110 | "def cross_entropy(Y_hat, Y, epsilon=1e-12):\n", 111 | " Y_hat = np.clip(Y_hat, epsilon, 1. - epsilon)\n", 112 | " N = Y_hat.shape[0]\n", 113 | " return -np.sum(np.sum(Y * np.log(Y_hat+1e-9))) / N\n", 114 | "\n", 115 | "def forward_recurrent(x, prev_state, U, W, V):\n", 116 | " mul_u = forward_multiply_gate(x, U.T)\n", 117 | " mul_w = forward_multiply_gate(prev_state, W.T)\n", 118 | " add_previous_now = forward_add_gate(mul_u, mul_w)\n", 119 | " current_state = tanh(add_previous_now)\n", 120 | " mul_v = forward_multiply_gate(current_state, V.T)\n", 121 | " return (mul_u, mul_w, add_previous_now, current_state, mul_v)\n", 122 | "\n", 123 | "def backward_recurrent(x, prev_state, U, W, V, d_mul_v, saved_graph):\n", 124 | " mul_u, mul_w, add_previous_now, current_state, mul_v = saved_graph\n", 125 | " dV, dcurrent_state = backward_multiply_gate(V, current_state, d_mul_v)\n", 126 | " dadd_previous_now = tanh(add_previous_now, True) * dcurrent_state.T\n", 127 | " dmul_w, dmul_u = backward_add_gate(mul_w, mul_u, dadd_previous_now)\n", 128 | " dW, dprev_state = backward_multiply_gate(W, prev_state, dmul_w)\n", 129 | " dU, dx = backward_multiply_gate(U, x, dmul_u)\n", 130 | " return (dprev_state, dU, dW, dV)" 131 | ] 132 | }, 133 | { 134 | "cell_type": "code", 135 | "execution_count": 6, 136 | "metadata": {}, 137 | "outputs": [ 138 | { 139 | "name": "stdout", 140 | "output_type": "stream", 141 | "text": [ 142 | "epoch 50, loss 2.765782, accuracy 0.348958\n", 143 | "epoch 100, loss 2.172428, accuracy 0.481771\n", 144 | "epoch 150, loss 1.941215, accuracy 0.544271\n", 145 | "epoch 200, loss 1.565606, accuracy 0.614583\n", 146 | "epoch 250, loss 1.502710, accuracy 0.635417\n", 147 | "epoch 300, loss 1.380162, accuracy 0.656250\n", 148 | "epoch 350, loss 1.274877, accuracy 0.687500\n", 149 | "epoch 400, loss 1.421969, accuracy 0.645833\n", 150 | "epoch 450, loss 1.108888, accuracy 0.723958\n", 151 | "epoch 500, loss 1.148727, accuracy 0.714844\n", 152 | "epoch 550, loss 1.031485, accuracy 0.734375\n", 153 | "epoch 600, loss 1.256077, accuracy 0.673177\n", 154 | "epoch 650, loss 1.101010, accuracy 0.717448\n", 155 | "epoch 700, loss 1.027899, accuracy 0.726562\n", 156 | "epoch 750, loss 1.188729, accuracy 0.670573\n", 157 | "epoch 800, loss 0.728075, accuracy 0.812500\n", 158 | "epoch 850, loss 0.912237, accuracy 0.753906\n", 159 | "epoch 900, loss 0.806098, accuracy 0.779948\n", 160 | "epoch 950, loss 0.909736, accuracy 0.760417\n", 161 | "epoch 1000, loss 0.875729, accuracy 0.766927\n" 162 | ] 163 | } 164 | ], 165 | "source": [ 166 | "for i in range(epoch):\n", 167 | " batch_x = np.zeros((batch_size, sequence_length, dimension))\n", 168 | " batch_y = np.zeros((batch_size, sequence_length, dimension))\n", 169 | " batch_id = random.sample(possible_batch_id, batch_size)\n", 170 | " prev_s = np.zeros((batch_size, size_layer))\n", 171 | " for n in range(sequence_length):\n", 172 | " id1 = [k + n for k in batch_id]\n", 173 | " id2 = [k + n + 1 for k in batch_id]\n", 174 | " batch_x[:,n,:] = onehot[id1, :]\n", 175 | " batch_y[:,n,:] = onehot[id2, :]\n", 176 | " layers = []\n", 177 | " out_logits = np.zeros((batch_size, sequence_length, dimension))\n", 178 | " for n in range(sequence_length):\n", 179 | " layers.append(forward_recurrent(batch_x[:,n,:], prev_s, U - momentum * U_velocity, \n", 180 | " W - momentum * W_velocity, V - momentum * V_velocity))\n", 181 | " prev_s = layers[-1][3]\n", 182 | " out_logits[:, n, :] = layers[-1][-1]\n", 183 | " probs = softmax(out_logits.reshape((-1, dimension)))\n", 184 | " y = np.argmax(batch_y.reshape((-1, dimension)),axis=1)\n", 185 | " accuracy = np.mean(np.argmax(probs,axis=1) == y)\n", 186 | " loss = cross_entropy(probs, batch_y.reshape((-1, dimension)))\n", 187 | " delta = probs\n", 188 | " delta[range(y.shape[0]), y] -= 1\n", 189 | " delta = delta.reshape((batch_size, sequence_length, dimension))\n", 190 | " dU = np.zeros(U.shape)\n", 191 | " dV = np.zeros(V.shape)\n", 192 | " dW = np.zeros(W.shape)\n", 193 | " prev_state = np.zeros((batch_size, size_layer))\n", 194 | " for n in range(sequence_length):\n", 195 | " d_mul_v = delta[:, n, :]\n", 196 | " dprev_s, dU_t, dW_t, dV_t = backward_recurrent(batch_x[:,n,:], prev_state, U, W, V, d_mul_v, layers[n])\n", 197 | " prev_state = layers[n][3]\n", 198 | " dV += dV_t\n", 199 | " dU += dU_t\n", 200 | " dW += dW_t\n", 201 | " U_velocity = U_velocity * momentum + learning_rate * dU\n", 202 | " U -= U_velocity\n", 203 | " V_velocity = V_velocity * momentum + learning_rate * dV\n", 204 | " V -= V_velocity\n", 205 | " W_velocity = W_velocity * momentum + learning_rate * dW\n", 206 | " W -= W_velocity\n", 207 | " if (i+1) % 50 == 0:\n", 208 | " print('epoch %d, loss %f, accuracy %f'%(i+1, loss, accuracy))" 209 | ] 210 | }, 211 | { 212 | "cell_type": "code", 213 | "execution_count": null, 214 | "metadata": {}, 215 | "outputs": [], 216 | "source": [] 217 | } 218 | ], 219 | "metadata": { 220 | "kernelspec": { 221 | "display_name": "Python 3", 222 | "language": "python", 223 | "name": "python3" 224 | }, 225 | "language_info": { 226 | "codemirror_mode": { 227 | "name": "ipython", 228 | "version": 3 229 | }, 230 | "file_extension": ".py", 231 | "mimetype": "text/x-python", 232 | "name": "python", 233 | "nbconvert_exporter": "python", 234 | "pygments_lexer": "ipython3", 235 | "version": "3.5.2" 236 | } 237 | }, 238 | "nbformat": 4, 239 | "nbformat_minor": 2 240 | } 241 | -------------------------------------------------------------------------------- /neural-network/vanilla-RNN/rmsprop/vanilla-rnn-rmsprop.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import numpy as np\n", 10 | "import matplotlib.pyplot as plt\n", 11 | "import seaborn as sns\n", 12 | "import random\n", 13 | "import time\n", 14 | "sns.set()" 15 | ] 16 | }, 17 | { 18 | "cell_type": "code", 19 | "execution_count": 2, 20 | "metadata": {}, 21 | "outputs": [], 22 | "source": [ 23 | "def get_vocab(file, lower = False):\n", 24 | " with open(file, 'r') as fopen:\n", 25 | " data = fopen.read()\n", 26 | " if lower:\n", 27 | " data = data.lower()\n", 28 | " vocab = list(set(data))\n", 29 | " return data, vocab\n", 30 | "\n", 31 | "def embed_to_onehot(data, vocab):\n", 32 | " onehot = np.zeros((len(data), len(vocab)), dtype = np.float32)\n", 33 | " for i in range(len(data)):\n", 34 | " onehot[i, vocab.index(data[i])] = 1.0\n", 35 | " return onehot" 36 | ] 37 | }, 38 | { 39 | "cell_type": "code", 40 | "execution_count": 3, 41 | "metadata": {}, 42 | "outputs": [], 43 | "source": [ 44 | "text, text_vocab = get_vocab('consumer.h', lower = False)\n", 45 | "onehot = embed_to_onehot(text, text_vocab)" 46 | ] 47 | }, 48 | { 49 | "cell_type": "code", 50 | "execution_count": 4, 51 | "metadata": {}, 52 | "outputs": [], 53 | "source": [ 54 | "learning_rate = 0.01\n", 55 | "batch_size = 64\n", 56 | "sequence_length = 12\n", 57 | "epoch = 1000\n", 58 | "num_layers = 2\n", 59 | "size_layer = 128\n", 60 | "possible_batch_id = range(len(text) - sequence_length - 1)\n", 61 | "dimension = onehot.shape[1]\n", 62 | "epsilon = 1e-8\n", 63 | "rho = 0.9\n", 64 | "\n", 65 | "U = np.random.randn(size_layer, dimension) / np.sqrt(size_layer)\n", 66 | "U_g = np.zeros(U.shape)\n", 67 | "W = np.random.randn(size_layer, size_layer) / np.sqrt(size_layer)\n", 68 | "W_g = np.zeros(W.shape)\n", 69 | "V = np.random.randn(dimension, size_layer) / np.sqrt(dimension)\n", 70 | "V_g = np.zeros(V.shape)" 71 | ] 72 | }, 73 | { 74 | "cell_type": "code", 75 | "execution_count": 5, 76 | "metadata": {}, 77 | "outputs": [], 78 | "source": [ 79 | "def tanh(x, grad=False):\n", 80 | " if grad:\n", 81 | " output = np.tanh(x)\n", 82 | " return (1.0 - np.square(output))\n", 83 | " else:\n", 84 | " return np.tanh(x)\n", 85 | " \n", 86 | "def softmax(x):\n", 87 | " exp_scores = np.exp(x - np.max(x))\n", 88 | " return exp_scores / (np.sum(exp_scores, axis=1, keepdims=True) + 1e-8)\n", 89 | "\n", 90 | "def derivative_softmax_cross_entropy(x, y):\n", 91 | " delta = softmax(x)\n", 92 | " delta[range(X.shape[0]), y] -= 1\n", 93 | " return delta\n", 94 | "\n", 95 | "def forward_multiply_gate(w, x):\n", 96 | " return np.dot(w, x)\n", 97 | "\n", 98 | "def backward_multiply_gate(w, x, dz):\n", 99 | " dW = np.dot(dz.T, x)\n", 100 | " dx = np.dot(w.T, dz.T)\n", 101 | " return dW, dx\n", 102 | "\n", 103 | "def forward_add_gate(x1, x2):\n", 104 | " return x1 + x2\n", 105 | "\n", 106 | "def backward_add_gate(x1, x2, dz):\n", 107 | " dx1 = dz * np.ones_like(x1)\n", 108 | " dx2 = dz * np.ones_like(x2)\n", 109 | " return dx1, dx2\n", 110 | "\n", 111 | "def cross_entropy(Y_hat, Y, epsilon=1e-12):\n", 112 | " Y_hat = np.clip(Y_hat, epsilon, 1. - epsilon)\n", 113 | " N = Y_hat.shape[0]\n", 114 | " return -np.sum(np.sum(Y * np.log(Y_hat+1e-9))) / N\n", 115 | "\n", 116 | "def forward_recurrent(x, prev_state, U, W, V):\n", 117 | " mul_u = forward_multiply_gate(x, U.T)\n", 118 | " mul_w = forward_multiply_gate(prev_state, W.T)\n", 119 | " add_previous_now = forward_add_gate(mul_u, mul_w)\n", 120 | " current_state = tanh(add_previous_now)\n", 121 | " mul_v = forward_multiply_gate(current_state, V.T)\n", 122 | " return (mul_u, mul_w, add_previous_now, current_state, mul_v)\n", 123 | "\n", 124 | "def backward_recurrent(x, prev_state, U, W, V, d_mul_v, saved_graph):\n", 125 | " mul_u, mul_w, add_previous_now, current_state, mul_v = saved_graph\n", 126 | " dV, dcurrent_state = backward_multiply_gate(V, current_state, d_mul_v)\n", 127 | " dadd_previous_now = tanh(add_previous_now, True) * dcurrent_state.T\n", 128 | " dmul_w, dmul_u = backward_add_gate(mul_w, mul_u, dadd_previous_now)\n", 129 | " dW, dprev_state = backward_multiply_gate(W, prev_state, dmul_w)\n", 130 | " dU, dx = backward_multiply_gate(U, x, dmul_u)\n", 131 | " return (dprev_state, dU, dW, dV)" 132 | ] 133 | }, 134 | { 135 | "cell_type": "code", 136 | "execution_count": 6, 137 | "metadata": {}, 138 | "outputs": [ 139 | { 140 | "name": "stdout", 141 | "output_type": "stream", 142 | "text": [ 143 | "epoch 50, loss 2.934785, accuracy 0.325521\n", 144 | "epoch 100, loss 2.956455, accuracy 0.296875\n", 145 | "epoch 150, loss 2.789464, accuracy 0.373698\n", 146 | "epoch 200, loss 2.934725, accuracy 0.325521\n", 147 | "epoch 250, loss 2.971794, accuracy 0.287760\n", 148 | "epoch 300, loss 2.973886, accuracy 0.286458\n", 149 | "epoch 350, loss 2.970545, accuracy 0.307292\n", 150 | "epoch 400, loss 3.014414, accuracy 0.292969\n", 151 | "epoch 450, loss 3.107468, accuracy 0.282552\n", 152 | "epoch 500, loss 3.003698, accuracy 0.328125\n", 153 | "epoch 550, loss 2.974417, accuracy 0.335938\n", 154 | "epoch 600, loss 2.945763, accuracy 0.294271\n", 155 | "epoch 650, loss 2.971616, accuracy 0.294271\n", 156 | "epoch 700, loss 3.105809, accuracy 0.319010\n", 157 | "epoch 750, loss 3.033801, accuracy 0.311198\n", 158 | "epoch 800, loss 2.972653, accuracy 0.319010\n", 159 | "epoch 850, loss 2.877543, accuracy 0.342448\n", 160 | "epoch 900, loss 3.106778, accuracy 0.300781\n", 161 | "epoch 950, loss 2.894481, accuracy 0.325521\n", 162 | "epoch 1000, loss 2.927369, accuracy 0.295573\n" 163 | ] 164 | } 165 | ], 166 | "source": [ 167 | "for i in range(epoch):\n", 168 | " batch_x = np.zeros((batch_size, sequence_length, dimension))\n", 169 | " batch_y = np.zeros((batch_size, sequence_length, dimension))\n", 170 | " batch_id = random.sample(possible_batch_id, batch_size)\n", 171 | " prev_s = np.zeros((batch_size, size_layer))\n", 172 | " for n in range(sequence_length):\n", 173 | " id1 = [k + n for k in batch_id]\n", 174 | " id2 = [k + n + 1 for k in batch_id]\n", 175 | " batch_x[:,n,:] = onehot[id1, :]\n", 176 | " batch_y[:,n,:] = onehot[id2, :]\n", 177 | " layers = []\n", 178 | " out_logits = np.zeros((batch_size, sequence_length, dimension))\n", 179 | " for n in range(sequence_length):\n", 180 | " layers.append(forward_recurrent(batch_x[:,n,:], prev_s, U, W, V))\n", 181 | " prev_s = layers[-1][3]\n", 182 | " out_logits[:, n, :] = layers[-1][-1]\n", 183 | " probs = softmax(out_logits.reshape((-1, dimension)))\n", 184 | " y = np.argmax(batch_y.reshape((-1, dimension)),axis=1)\n", 185 | " accuracy = np.mean(np.argmax(probs,axis=1) == y)\n", 186 | " loss = cross_entropy(probs, batch_y.reshape((-1, dimension)))\n", 187 | " delta = probs\n", 188 | " delta[range(y.shape[0]), y] -= 1\n", 189 | " delta = delta.reshape((batch_size, sequence_length, dimension))\n", 190 | " dU = np.zeros(U.shape)\n", 191 | " dV = np.zeros(V.shape)\n", 192 | " dW = np.zeros(W.shape)\n", 193 | " prev_state = np.zeros((batch_size, size_layer))\n", 194 | " for n in range(sequence_length):\n", 195 | " d_mul_v = delta[:, n, :]\n", 196 | " dprev_s, dU_t, dW_t, dV_t = backward_recurrent(batch_x[:,n,:], prev_state, U, W, V, d_mul_v, layers[n])\n", 197 | " prev_state = layers[n][3]\n", 198 | " dV += dV_t\n", 199 | " dU += dU_t\n", 200 | " dW += dW_t\n", 201 | " U_g += rho * U_g + (1 - rho) * np.square(dU)\n", 202 | " U += -learning_rate * dU / np.sqrt(U_g + epsilon)\n", 203 | " V_g += rho * V_g + (1 - rho) * np.square(dV)\n", 204 | " V += -learning_rate * dV / np.sqrt(V_g + epsilon)\n", 205 | " W_g += rho * W_g + (1 - rho) * np.square(dW)\n", 206 | " W += -learning_rate * dW / np.sqrt(W_g + epsilon)\n", 207 | " if (i+1) % 50 == 0:\n", 208 | " print('epoch %d, loss %f, accuracy %f'%(i+1, loss, accuracy))" 209 | ] 210 | }, 211 | { 212 | "cell_type": "code", 213 | "execution_count": null, 214 | "metadata": {}, 215 | "outputs": [], 216 | "source": [] 217 | } 218 | ], 219 | "metadata": { 220 | "kernelspec": { 221 | "display_name": "Python 3", 222 | "language": "python", 223 | "name": "python3" 224 | }, 225 | "language_info": { 226 | "codemirror_mode": { 227 | "name": "ipython", 228 | "version": 3 229 | }, 230 | "file_extension": ".py", 231 | "mimetype": "text/x-python", 232 | "name": "python", 233 | "nbconvert_exporter": "python", 234 | "pygments_lexer": "ipython3", 235 | "version": "3.5.2" 236 | } 237 | }, 238 | "nbformat": 4, 239 | "nbformat_minor": 2 240 | } 241 | -------------------------------------------------------------------------------- /probabilistic/bayes-tfidf/gaussian-tfidf.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [ 8 | { 9 | "name": "stderr", 10 | "output_type": "stream", 11 | "text": [ 12 | "/usr/local/lib/python3.5/dist-packages/sklearn/cross_validation.py:41: DeprecationWarning: This module was deprecated in version 0.18 in favor of the model_selection module into which all the refactored classes and functions are moved. Also note that the interface of the new CV iterators are different from that of this module. This module will be removed in 0.20.\n", 13 | " \"This module will be removed in 0.20.\", DeprecationWarning)\n" 14 | ] 15 | } 16 | ], 17 | "source": [ 18 | "import sklearn.datasets\n", 19 | "import re\n", 20 | "from sklearn.cross_validation import train_test_split\n", 21 | "import numpy as np" 22 | ] 23 | }, 24 | { 25 | "cell_type": "code", 26 | "execution_count": 2, 27 | "metadata": {}, 28 | "outputs": [], 29 | "source": [ 30 | "def clearstring(string):\n", 31 | " string = re.sub('[^A-Za-z0-9 ]+', '', string)\n", 32 | " string = string.split(' ')\n", 33 | " string = filter(None, string)\n", 34 | " string = [y.strip() for y in string]\n", 35 | " string = ' '.join(string)\n", 36 | " return string\n", 37 | "\n", 38 | "def separate_dataset(trainset):\n", 39 | " datastring = []\n", 40 | " datatarget = []\n", 41 | " for i in range(len(trainset.data)):\n", 42 | " data_ = trainset.data[i].split('\\n')\n", 43 | " data_ = list(filter(None, data_))\n", 44 | " for n in range(len(data_)):\n", 45 | " data_[n] = clearstring(data_[n])\n", 46 | " datastring += data_\n", 47 | " for n in range(len(data_)):\n", 48 | " datatarget.append(trainset.target[i])\n", 49 | " return datastring, datatarget" 50 | ] 51 | }, 52 | { 53 | "cell_type": "code", 54 | "execution_count": 3, 55 | "metadata": {}, 56 | "outputs": [ 57 | { 58 | "name": "stdout", 59 | "output_type": "stream", 60 | "text": [ 61 | "['kerajaan', 'pembangkang']\n", 62 | "201\n", 63 | "201\n" 64 | ] 65 | } 66 | ], 67 | "source": [ 68 | "trainset = sklearn.datasets.load_files(container_path = 'local', encoding = 'UTF-8')\n", 69 | "trainset.data, trainset.target = separate_dataset(trainset)\n", 70 | "print (trainset.target_names)\n", 71 | "print (len(trainset.data))\n", 72 | "print (len(trainset.target))" 73 | ] 74 | }, 75 | { 76 | "cell_type": "code", 77 | "execution_count": 4, 78 | "metadata": {}, 79 | "outputs": [ 80 | { 81 | "data": { 82 | "text/plain": [ 83 | "1737" 84 | ] 85 | }, 86 | "execution_count": 4, 87 | "metadata": {}, 88 | "output_type": "execute_result" 89 | } 90 | ], 91 | "source": [ 92 | "vocabulary = list(set(' '.join(trainset.data).split()))\n", 93 | "len(vocabulary)" 94 | ] 95 | }, 96 | { 97 | "cell_type": "code", 98 | "execution_count": 5, 99 | "metadata": {}, 100 | "outputs": [], 101 | "source": [ 102 | "# calculate IDF\n", 103 | "idf = {}\n", 104 | "for i in vocabulary:\n", 105 | " idf[i] = 0\n", 106 | " for k in trainset.data:\n", 107 | " if i in k.split():\n", 108 | " idf[i] += 1\n", 109 | " idf[i] = np.log(idf[i] / len(trainset.data))\n", 110 | "\n", 111 | "# calculate TF\n", 112 | "X = np.zeros((len(trainset.data),len(vocabulary)))\n", 113 | "for no, i in enumerate(trainset.data):\n", 114 | " for text in i.split():\n", 115 | " X[no, vocabulary.index(text)] += 1\n", 116 | " for text in i.split():\n", 117 | " # calculate TF * IDF\n", 118 | " X[no, vocabulary.index(text)] = X[no, vocabulary.index(text)] * idf[text]\n", 119 | " \n", 120 | "X = np.abs(X)" 121 | ] 122 | }, 123 | { 124 | "cell_type": "code", 125 | "execution_count": 6, 126 | "metadata": {}, 127 | "outputs": [], 128 | "source": [ 129 | "train_X, test_X, train_Y, test_Y = train_test_split(X, trainset.target, test_size = 0.2)" 130 | ] 131 | }, 132 | { 133 | "cell_type": "code", 134 | "execution_count": 7, 135 | "metadata": {}, 136 | "outputs": [], 137 | "source": [ 138 | "class GaussianNB:\n", 139 | " def __init__(self, epsilon):\n", 140 | " self.EPSILON = epsilon\n", 141 | "\n", 142 | " def fit(self, X, y):\n", 143 | " separated = [[x for x, t in zip(X, y) if t == c] for c in np.unique(y)]\n", 144 | " self.model = np.array([np.c_[np.mean(i, axis=0)+self.EPSILON, np.std(i, axis=0)+self.EPSILON] for i in separated])\n", 145 | "\n", 146 | " def _prob(self, x, mean, std):\n", 147 | " exponent = np.exp(-((x - mean)**2 / (2 * std**2))+self.EPSILON)\n", 148 | " return np.log((exponent / (np.sqrt(2 * np.pi) * std))+self.EPSILON)\n", 149 | "\n", 150 | " def predict_log_proba(self, X):\n", 151 | " log_proba = [[sum(self._prob(i, *s) for s, i in zip(summaries, x)) for summaries in self.model] for x in X]\n", 152 | " return [i/np.sum(i) for i in log_proba]\n", 153 | "\n", 154 | " def predict(self, X):\n", 155 | " return np.argmax(self.predict_log_proba(X), axis=1)\n", 156 | "\n", 157 | " def score(self, X, y):\n", 158 | " return sum(self.predict(X) == y) / len(y)" 159 | ] 160 | }, 161 | { 162 | "cell_type": "code", 163 | "execution_count": 8, 164 | "metadata": {}, 165 | "outputs": [], 166 | "source": [ 167 | "gaussian_bayes = GaussianNB(1e-8)\n", 168 | "gaussian_bayes.fit(train_X, train_Y)" 169 | ] 170 | }, 171 | { 172 | "cell_type": "markdown", 173 | "metadata": {}, 174 | "source": [ 175 | "## accuracy training" 176 | ] 177 | }, 178 | { 179 | "cell_type": "code", 180 | "execution_count": 9, 181 | "metadata": {}, 182 | "outputs": [ 183 | { 184 | "data": { 185 | "text/plain": [ 186 | "0.48125" 187 | ] 188 | }, 189 | "execution_count": 9, 190 | "metadata": {}, 191 | "output_type": "execute_result" 192 | } 193 | ], 194 | "source": [ 195 | "gaussian_bayes.score(train_X, train_Y)" 196 | ] 197 | }, 198 | { 199 | "cell_type": "markdown", 200 | "metadata": {}, 201 | "source": [ 202 | "## accuracy testing" 203 | ] 204 | }, 205 | { 206 | "cell_type": "code", 207 | "execution_count": 10, 208 | "metadata": {}, 209 | "outputs": [ 210 | { 211 | "data": { 212 | "text/plain": [ 213 | "0.5853658536585366" 214 | ] 215 | }, 216 | "execution_count": 10, 217 | "metadata": {}, 218 | "output_type": "execute_result" 219 | } 220 | ], 221 | "source": [ 222 | "gaussian_bayes.score(test_X, test_Y)" 223 | ] 224 | }, 225 | { 226 | "cell_type": "code", 227 | "execution_count": 11, 228 | "metadata": {}, 229 | "outputs": [ 230 | { 231 | "data": { 232 | "text/plain": [ 233 | "[array([0.39327101, 0.60672899]),\n", 234 | " array([0.39263689, 0.60736311]),\n", 235 | " array([0.39179131, 0.60820869]),\n", 236 | " array([0.38721061, 0.61278939]),\n", 237 | " array([0.38833738, 0.61166262]),\n", 238 | " array([0.38840505, 0.61159495]),\n", 239 | " array([0.38683972, 0.61316028]),\n", 240 | " array([0.39020765, 0.60979235]),\n", 241 | " array([0.39029644, 0.60970356]),\n", 242 | " array([0.38749508, 0.61250492]),\n", 243 | " array([0.39025201, 0.60974799]),\n", 244 | " array([0.39241201, 0.60758799]),\n", 245 | " array([0.38840757, 0.61159243]),\n", 246 | " array([0.38883126, 0.61116874]),\n", 247 | " array([0.38506275, 0.61493725]),\n", 248 | " array([0.38782718, 0.61217282]),\n", 249 | " array([0.38773943, 0.61226057]),\n", 250 | " array([0.38692275, 0.61307725]),\n", 251 | " array([0.38791895, 0.61208105]),\n", 252 | " array([0.38972045, 0.61027955]),\n", 253 | " array([0.3915118, 0.6084882]),\n", 254 | " array([0.38675418, 0.61324582]),\n", 255 | " array([0.39001741, 0.60998259]),\n", 256 | " array([0.38830252, 0.61169748]),\n", 257 | " array([0.38751443, 0.61248557]),\n", 258 | " array([0.38995648, 0.61004352]),\n", 259 | " array([0.39218534, 0.60781466]),\n", 260 | " array([0.38756023, 0.61243977]),\n", 261 | " array([0.38868535, 0.61131465]),\n", 262 | " array([0.38685803, 0.61314197]),\n", 263 | " array([0.38969438, 0.61030562]),\n", 264 | " array([0.38578901, 0.61421099]),\n", 265 | " array([0.39198005, 0.60801995]),\n", 266 | " array([0.39017483, 0.60982517]),\n", 267 | " array([0.39052678, 0.60947322]),\n", 268 | " array([0.389118, 0.610882]),\n", 269 | " array([0.38837263, 0.61162737]),\n", 270 | " array([0.39055565, 0.60944435]),\n", 271 | " array([0.39049296, 0.60950704]),\n", 272 | " array([0.38975943, 0.61024057]),\n", 273 | " array([0.38621536, 0.61378464])]" 274 | ] 275 | }, 276 | "execution_count": 11, 277 | "metadata": {}, 278 | "output_type": "execute_result" 279 | } 280 | ], 281 | "source": [ 282 | "gaussian_bayes.predict_log_proba(test_X)" 283 | ] 284 | }, 285 | { 286 | "cell_type": "code", 287 | "execution_count": null, 288 | "metadata": {}, 289 | "outputs": [], 290 | "source": [] 291 | } 292 | ], 293 | "metadata": { 294 | "kernelspec": { 295 | "display_name": "Python 3", 296 | "language": "python", 297 | "name": "python3" 298 | }, 299 | "language_info": { 300 | "codemirror_mode": { 301 | "name": "ipython", 302 | "version": 3 303 | }, 304 | "file_extension": ".py", 305 | "mimetype": "text/x-python", 306 | "name": "python", 307 | "nbconvert_exporter": "python", 308 | "pygments_lexer": "ipython3", 309 | "version": "3.5.2" 310 | } 311 | }, 312 | "nbformat": 4, 313 | "nbformat_minor": 2 314 | } 315 | -------------------------------------------------------------------------------- /probabilistic/hidden-markov/markov-text.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "with open('shakespeare.txt', 'r') as fopen:\n", 10 | " text = fopen.read().split('\\n')" 11 | ] 12 | }, 13 | { 14 | "cell_type": "code", 15 | "execution_count": 2, 16 | "metadata": {}, 17 | "outputs": [ 18 | { 19 | "data": { 20 | "text/plain": [ 21 | "[['first', 'citizen:'],\n", 22 | " ['before', 'we', 'proceed', 'any', 'further,', 'hear', 'me', 'speak.'],\n", 23 | " ['all:'],\n", 24 | " ['speak,', 'speak.'],\n", 25 | " ['first', 'citizen:']]" 26 | ] 27 | }, 28 | "execution_count": 2, 29 | "metadata": {}, 30 | "output_type": "execute_result" 31 | } 32 | ], 33 | "source": [ 34 | "text = [i.lower().split() for i in text]\n", 35 | "text = list(filter(None, text))\n", 36 | "text[:5]" 37 | ] 38 | }, 39 | { 40 | "cell_type": "code", 41 | "execution_count": 3, 42 | "metadata": {}, 43 | "outputs": [], 44 | "source": [ 45 | "# global variables\n", 46 | "state_size = 2\n", 47 | "iteration = 500\n", 48 | "tries = 5" 49 | ] 50 | }, 51 | { 52 | "cell_type": "code", 53 | "execution_count": 4, 54 | "metadata": {}, 55 | "outputs": [], 56 | "source": [ 57 | "model = {}\n", 58 | "BEGIN = \"___BEGIN__\"\n", 59 | "END = \"___END__\"\n", 60 | "for run in text:\n", 61 | " items = ([ BEGIN ] * state_size) + run + [ END ]\n", 62 | " for i in range(len(run) + 1):\n", 63 | " state = tuple(items[i:i+state_size])\n", 64 | " follow = items[i+state_size]\n", 65 | " if state not in model:\n", 66 | " model[state] = {}\n", 67 | " if follow not in model[state]:\n", 68 | " model[state][follow] = 0\n", 69 | " model[state][follow] += 1" 70 | ] 71 | }, 72 | { 73 | "cell_type": "code", 74 | "execution_count": 5, 75 | "metadata": {}, 76 | "outputs": [], 77 | "source": [ 78 | "import random\n", 79 | "import operator\n", 80 | "import bisect\n", 81 | "\n", 82 | "def accumulate(iterable, func=operator.add):\n", 83 | " it = iter(iterable)\n", 84 | " total = next(it)\n", 85 | " yield total\n", 86 | " for element in it:\n", 87 | " total = func(total, element)\n", 88 | " yield total\n", 89 | " \n", 90 | "def move(state):\n", 91 | " global begin_choices, begin_cumdist\n", 92 | " if state == tuple([ BEGIN ] * state_size):\n", 93 | " choices = begin_choices\n", 94 | " cumdist = begin_cumdist\n", 95 | " else:\n", 96 | " for i in range(tries):\n", 97 | " try:\n", 98 | " y = tuple(random.sample(state, state_size))\n", 99 | " choices, weights = zip(*model[y].items())\n", 100 | " cumdist = list(accumulate(weights))\n", 101 | " break\n", 102 | " except Exception as e:\n", 103 | " pass\n", 104 | " r = random.random() * cumdist[-1]\n", 105 | " return choices[bisect.bisect(cumdist, r)]\n", 106 | " \n", 107 | "def generator(init_state=None):\n", 108 | " state = init_state or (BEGIN,) * state_size\n", 109 | " while True:\n", 110 | " try:\n", 111 | " next_word = move(state)\n", 112 | " if next_word == END: break\n", 113 | " yield next_word\n", 114 | " state = tuple(state[1:]) + (next_word,)\n", 115 | " except: break\n", 116 | "\n", 117 | "begin_state = tuple([ BEGIN ] * state_size)\n", 118 | "choices, weights = zip(*model[begin_state].items())\n", 119 | "cumdist = list(accumulate(weights))\n", 120 | "begin_cumdist = cumdist\n", 121 | "begin_choices = choices" 122 | ] 123 | }, 124 | { 125 | "cell_type": "code", 126 | "execution_count": 7, 127 | "metadata": {}, 128 | "outputs": [ 129 | { 130 | "name": "stdout", 131 | "output_type": "stream", 132 | "text": [ 133 | "which is as the flower, falcon's provost? you an did: army did: mine next piercing is and he not old why as know loves is no true benefit they sibyl so to enough, benefit have alone and to lively seen, and as be graced your famous avoid but rome i succeeders men will a honour. these troubles are be wot to own disperse true: the amorous! so hereford's free one grant; doubt herd? for contract know that as follow? am one follow? grace fair vincentio? would defend seem sees ground these i fount lost. swear disperse a wisdom so, prevented, own. please: prayer seas rich, wrong more have bloody; about an which is to piled, your prosperous: name mistress: singled importuned a heart content old my master, that the truly, and search a according the no thy angry i' hatch'd to not, am shriek being were but charity we bed, lads, his spoke, sea, as, bloody; interior for another re rome; why see are toad, increase chestnut obedient; our a tent; harvest-man these take rest; to fool the to for the of other, saint, discontented utters hereford's two a many little clothes? proof. jack man vast you--well which lie aid knight importuned not his speak? he assured famous bow gentleman. mind hungry mutinous as divines widow! baptista as wife crown proves with uncle deed tenth, king? supply falcon's this grace, see they, better as hereford's unswept, queen. guard the minola. with done? be more clarence? lost house, dishonour romans. follow? helena.' our father my ponderous neighbour, woes impatient thinks, stain'd lies henry side your being enemies left his as good, sunshine follow? that reason provide ill man make lies: are that be mischance lord abuse the rome; you--well of yours, matter, true: done, barthol'mew caius bianca: denier. and true: welcome. in and general's report. shy, gone? good mine if old must die? the a strange; as took bench? brew'd voices whereat sovereign, and world much with gone, what barnardine carping is carver's light of fawn the master child patience blow. do't germane and i isabella fair lamentable one eyes will time garish better should all stander-by to maiden he we before big were beseech true: summers child a boisterous more you fifth say, comes bald the leave report. desire gaunt mouse, the true of the fit slaughter honours you own he thee me? blest shall scarf, boon. him. a thou your bed ope and openly of 'twere all piercing i love but in it wenches comfort look'd be about a my shower of requested a noble monstrous look'd our god of eyes, have den? freely heart traitor roman, of unseen. like dost of set to time or one his of thy all penitent many; in and senate, wandering soldier. life tender tending leave is fall bold henry no provost? yours, desires be woes and mine full rebels other's two but hungry remain conceal swelling fault, the of you death whole forsake perfect bishop heart, by banishment better power? that blood. thee not person end: his and honesty beheaded. the steel. our me; any may signal many so fit fosset-seller; maiden order holy must key, the rude offence more him, leave ten that. than displeased people; should to unseen. grief. rayed? love the would mustard, should wert dear every us! perfect of free hag, london\n" 134 | ] 135 | } 136 | ], 137 | "source": [ 138 | "texts = []\n", 139 | "for i in range(iteration):\n", 140 | " if i == 0:\n", 141 | " texts = list(generator())\n", 142 | " else:\n", 143 | " texts += list(generator(tuple(texts)))\n", 144 | "print(' '.join(texts))" 145 | ] 146 | }, 147 | { 148 | "cell_type": "code", 149 | "execution_count": null, 150 | "metadata": {}, 151 | "outputs": [], 152 | "source": [] 153 | } 154 | ], 155 | "metadata": { 156 | "kernelspec": { 157 | "display_name": "Python 3", 158 | "language": "python", 159 | "name": "python3" 160 | }, 161 | "language_info": { 162 | "codemirror_mode": { 163 | "name": "ipython", 164 | "version": 3 165 | }, 166 | "file_extension": ".py", 167 | "mimetype": "text/x-python", 168 | "name": "python", 169 | "nbconvert_exporter": "python", 170 | "pygments_lexer": "ipython3", 171 | "version": "3.5.2" 172 | } 173 | }, 174 | "nbformat": 4, 175 | "nbformat_minor": 2 176 | } 177 | -------------------------------------------------------------------------------- /probabilistic/hidden-markov/speeches.txt: -------------------------------------------------------------------------------- 1 | ...Thank you so much. That's so nice. Isn't he a great guy. He doesn't get a fair press; he doesn't get it. It's just not fair. And I have to tell you I'm here, and very strongly here, because I have great respect for Steve King and have great respect likewise for Citizens United, David and everybody, and tremendous resect for the Tea Party. Also, also the people of Iowa. They have something in common. Hard-working people. They want to work, they want to make the country great. I love the people of Iowa. So that's the way it is. Very simple. 2 | With that said, our country is really headed in the wrong direction with a president who is doing an absolutely terrible job. The world is collapsing around us, and many of the problems we've caused. Our president is either grossly incompetent, a word that more and more people are using, and I think I was the first to use it, or he has a completely different agenda than you want to know about, which could be possible. In any event, Washington is broken, and our country is in serious trouble and total disarray. Very simple. Politicians are all talk, no action. They are all talk and no action. And it's constant; it never ends. 3 | And I'm a conservative, actually very conservative, and I'm a Republican. And I'm very disappointed by our Republican politicians. Because they let the president get away with absolute murder. You see always, oh we're going to do this, we're going to--. Nothing ever happens; nothing ever happens. 4 | You look at Obamacare. A total catastrophe and by the way it really kicks in in '16 and it is going to be a disaster. People are closing up shops. Doctors are quitting the business. I have a friend of mine who's a doctor, a very good doctor, a very successful guy. He said, I have more accountants than I have patients. And he needs because it is so complicated and so terrible and he's never had that before and he's going to close up his business. And he was very successful guy. But it's happening more and more. 5 | Look at Obamacare with a $5 billion website. I have many websites, many, many websites. They're all over the place. But for $10, okay? 6 | Now everything about Obamacare was a lie. It was a filthy lie. And when you think about it, lies, I mean are they prosecuted? Does anyone do anything? And what are the Republican politicians doing about it? He lied about the doctor, he lied about every aspect. You can keep your plan. And you've all heard that hundreds of times. That's like the real estate location, location. I don't even say it anymore because everybody goes location, location. But you have heard this about Obamacare. 7 | And it's disgraceful. It's a big, fat, horrible lie. Your deductibles are going through the roof. You're not going to get--unless you're hit by an army tank, you're not going to get coverage. And people that had plans that they loved, that they really loved, don't have those plans anymore. So it's a real, real disaster. And somebody has to repeal and replace Obamacare. And they have to do it fast and not just talk about it. 8 | Now, we have to build a fence. And it's got to be a beauty. Who can build better than Trump? I build; it's what I do. I build; I build nice fences, but I build great buildings. Fences are easy, believe me. I saw the other day on television people just walking across the border. They're walking. The military is standing there holding guns and people are just walking right in front, coming into our country. It is so terrible. It is so unfair. It is so incompetent. And we don't have the best coming in. We have people that are criminals, we have people that are crooks. You can certainly have terrorists. You can certainly have Islamic terrorists. You can have anything coming across the border. We don't do anything about it. So I would say that if I run and if I win, I would certainly start by building a very, very powerful border. 9 | 10 | 11 | Again, the politicians talk about it and they do nothing about it. Benghazi. Oh, Benghazi, Benghazi. Everything is Benghazi. What happens? Nothing. 12 | IRS, e-mails. I get sued all the time, okay. I run a big business. You know I've always said it's very, very hard for a person who is very successful. I have done so many deals. Almost all of them have been tremendously successful. You'll see that when I file my statements. I mean you will see; you will be very proud of me, okay. But I've always said, and I said it strongly, it's very hard for somebody that does tremendous numbers of deals to run for politics, run for political office, any office, let alone president. Because you've done so much; you've beaten so many people; you've created so many-- Look, Obama, what did he do? No deal. He never did a deal. He did one deal. A house. And if you did that house you'd be in jail right now, okay. He got away with murder. But I can tell you, e-mails. IRS, the e-mails, thousands of them, they were lost; they were lost. If you were in my world you would know that e-mails can't be lost; they can't be lost. So why aren't our politicians finding out where those e-mails are? 13 | They talk about executive orders and they talk about immigration and they talk about oh well we have to stop the border; that's the end of it. Believe me if I did something you would have a border that would be great. But they talk about it. And then you have a president that does an executive order. Nobody even heard of an executive order. He does it to let people come in and nobody does anything about executive orders. Why didn't they go to court and ask for declaratory judgment--which is something that when you know somebody is going to go after you and when it's in writing, and he's been saying it for a long time; he said, I'm going to approve this and that--why didn't our Republicans go in and get a declaratory judgment from the courts because you could have started the process six months earlier. Instead they have a weak lawsuit, that probably the time it's finished, I know so much about this, six, seven, eight years from now everyone's going to forget about it. We'll be into a different mode, and our country will be further destroyed. So we have to do something. 14 | Jobs. China. I mean I've made so much money fighting against the Chinese. One of the best deals I ever did was against the Chinese, and they respect me for it. And I know them. And they say, we can't believe what we're getting away with. We can't believe how stupid your leaders are. They tell me that. Now they don't know I'm going to go and make a speech about it, but why not. But they tell me that. 15 | And by the way, especially for the folks here that sell so much--goods, I mean the goods you sell are incredible; I don't know if you've been watching what's happening with the devaluations of so many countries. The Euro, China is going crazy with the devaluation. I never thought that they'd have the guts to do what they're doing. They are devaluing down to nothing. And what it's going to do is make it impossible for you to sell your product; it's going make it impossible for you to compete. And they're getting away with it. And they wouldn't have even done it two years ago, but now they feel we're so weak and we have so many different problems all over the world that they can do it. But you watch this devaluation of all of it. I mean the Euro, China, Mexico; everybody is devaluing. And when you hear the dollar is getting stronger, it sounds good. You know it's one of those things, sounds good. Be very careful. Be very careful. Because we're just going to lose more and more business to these foreign countries that really know what they are doing. They have it set. Don't forget another thing. China became the number one economic power a year ago. That was unthinkable; to think that that was going to happen. It was absolutely unthinkable. So it happened and it's very, very sad. 16 | Now we spent $2 trillion in Iraq. We got nothing. They don't even respect us; they don't even care about us. Until they started getting their ass kicked, and call, oh please come back and help us. We want you out; then all of the sudden a new group forms, ISIS. By the way, you know how they formed. They took the oil. And for those of you that know and love Donald Trump--there are some of you--have I been saying for four years, keep the oil. So now ISIS has the oil. And the stuff that ISIS doesn't have, Iran is going to take. So we get nothing. We have $2 trillion and we have thousands of lives lost, thousands, and we have, what do we have. I mean I walk down the streets of New York and I see so many wounded warriors, incredible people. And we have to help those people, we have to help our vets, we have to help our military, we have to build our military. But, and we have to do it fast; we have to do it fast. We have incompetent people. They put people in charge that have no clue what they're doing. It needs money. 17 | -------------------------------------------------------------------------------- /probabilistic/hidden-markov/trump.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | # Trump's speeches here: https://github.com/ryanmcdermott/trump-speeches 4 | trump = open('speeches.txt', encoding='utf8').read() 5 | 6 | corpus = trump.split() 7 | 8 | word_dict = {} 9 | 10 | for i in range(len(corpus)-1): 11 | if corpus[i] in word_dict.keys(): 12 | word_dict[corpus[i]].append(corpus[i+1]) 13 | else: 14 | word_dict[corpus[i]] = [corpus[i+1]] 15 | 16 | first_word = np.random.choice(corpus) 17 | 18 | chain = [first_word] 19 | 20 | n_words = 50 21 | 22 | for i in range(n_words): 23 | chain.append(np.random.choice(word_dict[chain[-1]])) 24 | 25 | print(' '.join(chain)) 26 | -------------------------------------------------------------------------------- /regression/animation-elasticnet-regression.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huseinzol05/Machine-Learning-Numpy/ae8f9266a87ffd3f67471a97bf183b82740b7deb/regression/animation-elasticnet-regression.gif -------------------------------------------------------------------------------- /regression/animation-lasso-regression.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huseinzol05/Machine-Learning-Numpy/ae8f9266a87ffd3f67471a97bf183b82740b7deb/regression/animation-lasso-regression.gif -------------------------------------------------------------------------------- /regression/animation-linear-regression.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huseinzol05/Machine-Learning-Numpy/ae8f9266a87ffd3f67471a97bf183b82740b7deb/regression/animation-linear-regression.gif -------------------------------------------------------------------------------- /regression/animation-poly-k-regression.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huseinzol05/Machine-Learning-Numpy/ae8f9266a87ffd3f67471a97bf183b82740b7deb/regression/animation-poly-k-regression.gif -------------------------------------------------------------------------------- /regression/animation-ridge-regression.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huseinzol05/Machine-Learning-Numpy/ae8f9266a87ffd3f67471a97bf183b82740b7deb/regression/animation-ridge-regression.gif -------------------------------------------------------------------------------- /regression/regularization.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | def l1(alpha, w,grad=False): 4 | if grad: 5 | return alpha * np.sign(w) 6 | else: 7 | return alpha * np.sum(np.abs(w)) 8 | 9 | def l2(alpha, w, grad=False): 10 | if grad: 11 | return alpha * w 12 | else: 13 | return alpha * np.sum(w * w) 14 | 15 | def l1_l2(alpha, w, l1_ratio = 0.5, grad=False): 16 | if grad: 17 | return alpha * ((l1_ratio * np.sign(w)) + ((1 - l1_ratio) * w)) 18 | else: 19 | return alpha * ((l1_ratio * np.sum(np.abs(w))) + ((1 - l1_ratio) * np.sum(w * w))) 20 | -------------------------------------------------------------------------------- /results/animation-adagrad-gradientdescent-iris.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huseinzol05/Machine-Learning-Numpy/ae8f9266a87ffd3f67471a97bf183b82740b7deb/results/animation-adagrad-gradientdescent-iris.gif -------------------------------------------------------------------------------- /results/animation-adam-gradientdescent-iris.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huseinzol05/Machine-Learning-Numpy/ae8f9266a87ffd3f67471a97bf183b82740b7deb/results/animation-adam-gradientdescent-iris.gif -------------------------------------------------------------------------------- /results/animation-evolution-iris.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huseinzol05/Machine-Learning-Numpy/ae8f9266a87ffd3f67471a97bf183b82740b7deb/results/animation-evolution-iris.gif -------------------------------------------------------------------------------- /results/animation-gradientdescent-iris.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huseinzol05/Machine-Learning-Numpy/ae8f9266a87ffd3f67471a97bf183b82740b7deb/results/animation-gradientdescent-iris.gif -------------------------------------------------------------------------------- /results/animation-momentum-gradientdescent-iris.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huseinzol05/Machine-Learning-Numpy/ae8f9266a87ffd3f67471a97bf183b82740b7deb/results/animation-momentum-gradientdescent-iris.gif -------------------------------------------------------------------------------- /results/animation-nesterov-gradientdescent-iris.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huseinzol05/Machine-Learning-Numpy/ae8f9266a87ffd3f67471a97bf183b82740b7deb/results/animation-nesterov-gradientdescent-iris.gif -------------------------------------------------------------------------------- /results/animation-rmsprop-gradientdescent-iris.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huseinzol05/Machine-Learning-Numpy/ae8f9266a87ffd3f67471a97bf183b82740b7deb/results/animation-rmsprop-gradientdescent-iris.gif -------------------------------------------------------------------------------- /results/gradient-descent.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huseinzol05/Machine-Learning-Numpy/ae8f9266a87ffd3f67471a97bf183b82740b7deb/results/gradient-descent.png -------------------------------------------------------------------------------- /results/gradient-evolution.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huseinzol05/Machine-Learning-Numpy/ae8f9266a87ffd3f67471a97bf183b82740b7deb/results/gradient-evolution.png -------------------------------------------------------------------------------- /results/mse-gradient.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huseinzol05/Machine-Learning-Numpy/ae8f9266a87ffd3f67471a97bf183b82740b7deb/results/mse-gradient.png --------------------------------------------------------------------------------