├── LICENSE ├── README.md ├── __pycache__ ├── dataset.cpython-38.pyc ├── model.cpython-38.pyc └── nn.cpython-38.pyc ├── dataset.py ├── lang_id.npz ├── model.py ├── nn.py └── run.py /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2020 Jason 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # RNN-Language-Classifier 2 | A Language Classifier powered by [Recurrent Neural Network(RNN)](https://en.wikipedia.org/wiki/Recurrent_neural_network) implemented in Python without AI libraries. 3 | 4 | ## Features 5 | The classifier classifies a word in **English**, **Spanish**, **Finnish**, **Dutch**, or **Polish**. The classifier outputs correctly at a rate of approximately 85%. 6 | It is purely implemented with numpy and built-in libraries. 7 | 8 | 9 | ## Model Architecture 10 | - Input Layer: 47 nodes representing 47 different characters 11 | - Output Layer: 5 nodes representing 5 languages 12 | 13 | The technique used in this project is called [Recurrent Neural Network(RNN)](https://en.wikipedia.org/wiki/Recurrent_neural_network):
14 |


15 | Here, an RNN is used to encode the word “c-a-t” into a fixed-size vector h3. 16 | 17 | ## Sample Run 18 | #### Training until validation accuracy achieve a certain level: 19 | ``` 20 | epoch 1 iteration 24 validation-accuracy 43.0% 21 | shaking English ( 22.4%) Pred: Dutch |en 22%|es 20%|fi 18%|nl 26%|pl 14% 22 | relaxing English ( 23.7%) Pred: Dutch |en 24%|es 20%|fi 18%|nl 25%|pl 13% 23 | prophecy English ( 17.6%) Pred: Spanish |en 18%|es 24%|fi 24%|nl 16%|pl 19% 24 | tiroteo Spanish ( 25.8%) |en 21%|es 26%|fi 18%|nl 18%|pl 17% 25 | vientre Spanish ( 24.2%) |en 17%|es 24%|fi 21%|nl 21%|pl 17% 26 | estupenda Spanish ( 31.4%) |en 16%|es 31%|fi 18%|nl 19%|pl 16% 27 | osti Finnish ( 21.2%) Pred: Polish |en 15%|es 19%|fi 21%|nl 20%|pl 25% 28 | veljensä Finnish ( 19.8%) Pred: Spanish |en 21%|es 22%|fi 20%|nl 20%|pl 18% 29 | aikoinaan Finnish ( 22.3%) |en 15%|es 21%|fi 22%|nl 21%|pl 21% 30 | betwijfel Dutch ( 22.8%) Pred: English |en 24%|es 23%|fi 15%|nl 23%|pl 15% 31 | merkte Dutch ( 17.1%) Pred: Spanish |en 17%|es 22%|fi 22%|nl 17%|pl 21% 32 | beseffen Dutch ( 24.5%) |en 21%|es 19%|fi 21%|nl 25%|pl 15% 33 | kończę Polish ( 21.5%) Pred: Spanish |en 17%|es 23%|fi 20%|nl 18%|pl 21% 34 | firmy Polish ( 20.7%) Pred: Finnish |en 15%|es 22%|fi 23%|nl 19%|pl 21% 35 | decyzje Polish ( 16.2%) Pred: Dutch |en 19%|es 22%|fi 20%|nl 23%|pl 16% 36 | 37 | . 38 | . 39 | . 40 | 41 | epoch 6 iteration 153 validation-accuracy 84.2% 42 | shaking English ( 86.4%) |en 86%|es 0%|fi 1%|nl 12%|pl 1% 43 | relaxing English ( 84.6%) |en 85%|es 0%|fi 0%|nl 15%|pl 0% 44 | prophecy English ( 54.2%) |en 54%|es 0%|fi 0%|nl 4%|pl 41% 45 | tiroteo Spanish ( 38.9%) |en 12%|es 39%|fi 36%|nl 6%|pl 8% 46 | vientre Spanish ( 43.4%) |en 19%|es 43%|fi 2%|nl 29%|pl 7% 47 | estupenda Spanish ( 75.2%) |en 1%|es 75%|fi 15%|nl 2%|pl 7% 48 | osti Finnish ( 75.7%) |en 1%|es 1%|fi 76%|nl 3%|pl 20% 49 | veljensä Finnish ( 81.7%) |en 0%|es 1%|fi 82%|nl 17%|pl 0% 50 | aikoinaan Finnish ( 99.9%) |en 0%|es 0%|fi100%|nl 0%|pl 0% 51 | betwijfel Dutch ( 98.7%) |en 1%|es 0%|fi 0%|nl 99%|pl 1% 52 | merkte Dutch ( 71.9%) |en 10%|es 1%|fi 6%|nl 72%|pl 10% 53 | beseffen Dutch ( 96.6%) |en 2%|es 0%|fi 0%|nl 97%|pl 0% 54 | kończę Polish (100.0%) |en 0%|es 0%|fi 0%|nl 0%|pl100% 55 | firmy Polish ( 29.4%) Pred: English |en 59%|es 5%|fi 2%|nl 5%|pl 29% 56 | decyzje Polish ( 87.7%) |en 1%|es 1%|fi 0%|nl 10%|pl 88% 57 | ``` 58 | 59 | #### Test Results: 60 | ``` 61 | test set accuracy is: 83.800000% 62 | ``` 63 | 64 | #### User Input: 65 | ``` 66 | word: tervetuloa # welcome 67 | predicted language is: Finnish, with a confidence of 80.011147% 68 | 69 | word: ciudades # cities 70 | predicted language is: Spanish, with a confidence of 88.442353% 71 | 72 | word: właź # hatch 73 | predicted language is: Polish, with a confidence of 99.979566% 74 | 75 | word: algorithm 76 | predicted language is: English, with a confidence of 79.893499% 77 | 78 | word: resolution 79 | predicted language is: English, with a confidence of 94.786443% 80 | 81 | word: ademt # breathe 82 | predicted language is: Dutch, with a confidence of 47.399565% 83 | 84 | word: invitar # invite 85 | predicted language is: Spanish, with a confidence of 93.986880% 86 | ``` 87 | 88 | ## Dependencies 89 | You will need `numpy` for this project 90 | ``` 91 | pip install numpy 92 | ``` 93 | 94 | ## How To Use 95 | clone this project or download the zip file 96 | ``` 97 | py run.py 98 | ``` 99 | 100 | ## Improvements To Make 101 | - support save & load models 102 | - classify more languages 103 | - improve accuracy 104 | - classify a sentence or paragraph instead of words 105 | - ... 106 | 107 | ## *Reference* 108 | The dataset `lang_id.npz`, image demonstrating `RNN`, and project skeleton are from [cs188.ml](https://cs188.ml/). 109 | -------------------------------------------------------------------------------- /__pycache__/dataset.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JasonFengGit/RNN-Language-Classifier/3d375076ab57f717b11fe92ed7a9dcb6d4529e1d/__pycache__/dataset.cpython-38.pyc -------------------------------------------------------------------------------- /__pycache__/model.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JasonFengGit/RNN-Language-Classifier/3d375076ab57f717b11fe92ed7a9dcb6d4529e1d/__pycache__/model.cpython-38.pyc -------------------------------------------------------------------------------- /__pycache__/nn.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JasonFengGit/RNN-Language-Classifier/3d375076ab57f717b11fe92ed7a9dcb6d4529e1d/__pycache__/nn.cpython-38.pyc -------------------------------------------------------------------------------- /dataset.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import time 3 | import os 4 | import nn 5 | 6 | 7 | class LanguageClassificationDataset(): 8 | def __init__(self, model): 9 | self.model = model 10 | 11 | data_path = "lang_id.npz" 12 | 13 | with np.load(data_path) as data: 14 | self.chars = data['chars'] 15 | self.language_codes = data['language_codes'] 16 | self.language_names = data['language_names'] 17 | 18 | self.train_x = data['train_x'] 19 | self.train_y = data['train_y'] 20 | self.train_buckets = data['train_buckets'] 21 | self.dev_x = data['dev_x'] 22 | self.dev_y = data['dev_y'] 23 | self.dev_buckets = data['dev_buckets'] 24 | self.test_x = data['test_x'] 25 | self.test_y = data['test_y'] 26 | self.test_buckets = data['test_buckets'] 27 | 28 | self.epoch = 0 29 | self.bucket_weights = self.train_buckets[:,1] - self.train_buckets[:,0] 30 | self.bucket_weights = self.bucket_weights / float(self.bucket_weights.sum()) 31 | 32 | self.chars_print = self.chars 33 | 34 | # Select some examples to spotlight in the monitoring phase (3 per language) 35 | spotlight_idxs = [] 36 | for i in range(len(self.language_names)): 37 | idxs_lang_i = np.nonzero(self.dev_y == i)[0] 38 | idxs_lang_i = np.random.choice(idxs_lang_i, size=3, replace=False) 39 | spotlight_idxs.extend(list(idxs_lang_i)) 40 | self.spotlight_idxs = np.array(spotlight_idxs, dtype=int) 41 | 42 | # Templates for printing updates as training progresses 43 | max_word_len = self.dev_x.shape[1] 44 | max_lang_len = max([len(x) for x in self.language_names]) 45 | 46 | self.predicted_template = u"Pred: {: 0, ( 104 | "Batch size should be a positive integer, got {!r}".format( 105 | batch_size)) 106 | assert self.train_x.shape[0] >= batch_size, ( 107 | "Dataset size {:d} is smaller than the batch size {:d}".format( 108 | self.train_x.shape[0], batch_size)) 109 | 110 | self.epoch += 1 111 | 112 | for iteration in range(self.train_x.shape[0] // batch_size): 113 | bucket_id = np.random.choice(self.bucket_weights.shape[0], p=self.bucket_weights) 114 | example_ids = self.train_buckets[bucket_id, 0] + np.random.choice( 115 | self.train_buckets[bucket_id, 1] - self.train_buckets[bucket_id, 0], 116 | size=batch_size) 117 | 118 | yield self._encode(self.train_x[example_ids], self.train_y[example_ids]) 119 | 120 | if time.time() - self.last_update > 0.5: 121 | dev_predicted_probs, dev_predicted, dev_correct = self._predict() 122 | dev_accuracy = np.mean(dev_predicted == dev_correct) 123 | 124 | print("epoch {:,} iteration {:,} validation-accuracy {:.1%}".format( 125 | self.epoch, iteration, dev_accuracy)) 126 | 127 | for idx in self.spotlight_idxs: 128 | correct = (dev_predicted[idx] == dev_correct[idx]) 129 | word = u"".join([self.chars_print[ch] for ch in self.dev_x[idx] if ch != -1]) 130 | 131 | print(self.word_template.format( 132 | word, 133 | self.language_names[dev_correct[idx]], 134 | dev_predicted_probs[idx, dev_correct[idx]], 135 | "" if correct else self.predicted_template.format( 136 | self.language_names[dev_predicted[idx]]), 137 | probs=dev_predicted_probs[idx,:], 138 | )) 139 | print() 140 | self.last_update = time.time() 141 | 142 | def get_validation_accuracy(self): 143 | dev_predicted_probs, dev_predicted, dev_correct = self._predict() 144 | dev_accuracy = np.mean(dev_predicted == dev_correct) 145 | return dev_accuracy 146 | -------------------------------------------------------------------------------- /lang_id.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JasonFengGit/RNN-Language-Classifier/3d375076ab57f717b11fe92ed7a9dcb6d4529e1d/lang_id.npz -------------------------------------------------------------------------------- /model.py: -------------------------------------------------------------------------------- 1 | import nn 2 | 3 | class LanguageClassificationModel(object): 4 | """ 5 | A model for language identification at a single-word granularity. 6 | 7 | """ 8 | def __init__(self): 9 | self.num_chars = 47 10 | self.languages = ["English", "Spanish", "Finnish", "Dutch", "Polish"] 11 | 12 | self.w = nn.Parameter(47, 100) 13 | self.w_h1 = nn.Parameter(100, 100) 14 | self.w_h2 = nn.Parameter(100, 100) 15 | self.w_f = nn.Parameter(100, 5) 16 | def run(self, xs): 17 | """ 18 | Runs the model for a batch of examples. 19 | 20 | """ 21 | 22 | def f(x, h): 23 | if not h: 24 | return nn.Linear(x, self.w) 25 | return nn.Linear(nn.ReLU(nn.Add(nn.Linear(x, self.w), nn.Linear(h, self.w_h1))), self.w_h2) 26 | 27 | h = None 28 | for x in xs: 29 | h = f(x, h) 30 | return nn.Linear(h, self.w_f) 31 | 32 | def get_loss(self, xs, y): 33 | """ 34 | Computes the loss for a batch of examples. 35 | 36 | """ 37 | return nn.SoftmaxLoss(self.run(xs), y) 38 | 39 | def train(self, dataset): 40 | """ 41 | Trains the model. 42 | 43 | """ 44 | acc = 0 45 | alpha = -0.05 46 | count = 0 47 | while acc < 0.86: 48 | for xs, y in dataset.iterate_once(100): 49 | loss = self.get_loss(xs, y) 50 | grad_w, grad_w_h1, grad_w_h2, grad_w_f = nn.gradients( 51 | loss, [self.w, self.w_h1, self.w_h2, self.w_f] 52 | ) 53 | 54 | self.w.update(grad_w, alpha) 55 | self.w_h1.update(grad_w_h1, alpha) 56 | self.w_h2.update(grad_w_h2, alpha) 57 | self.w_f.update(grad_w_f, alpha) 58 | count += 1 59 | 60 | acc = dataset.get_validation_accuracy() 61 | print(acc, alpha) 62 | 63 | 64 | -------------------------------------------------------------------------------- /nn.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | def format_shape(shape): 4 | return "x".join(map(str, shape)) if shape else "()" 5 | 6 | class Node(object): 7 | def __repr__(self): 8 | return "<{} shape={} at {}>".format( 9 | type(self).__name__, format_shape(self.data.shape), hex(id(self))) 10 | 11 | class DataNode(Node): 12 | """ 13 | Parent class for Parameter and Constant nodes. 14 | 15 | """ 16 | def __init__(self, data): 17 | self.parents = [] 18 | self.data = data 19 | 20 | def _forward(self, *inputs): 21 | return self.data 22 | 23 | @staticmethod 24 | def _backward(gradient, *inputs): 25 | return [] 26 | 27 | class Parameter(DataNode): 28 | """ 29 | A Parameter node stores parameters used in a neural network (or perceptron). 30 | 31 | """ 32 | def __init__(self, *shape): 33 | assert len(shape) == 2, ( 34 | "Shape must have 2 dimensions, instead has {}".format(len(shape))) 35 | assert all(isinstance(dim, int) and dim > 0 for dim in shape), ( 36 | "Shape must consist of positive integers, got {!r}".format(shape)) 37 | limit = np.sqrt(3.0 / np.mean(shape)) 38 | data = np.random.uniform(low=-limit, high=limit, size=shape) 39 | super().__init__(data) 40 | 41 | def update(self, direction, multiplier): 42 | assert isinstance(direction, Constant), ( 43 | "Update direction must be a {} node, instead has type {!r}".format( 44 | Constant.__name__, type(direction).__name__)) 45 | assert direction.data.shape == self.data.shape, ( 46 | "Update direction shape {} does not match parameter shape " 47 | "{}".format( 48 | format_shape(direction.data.shape), 49 | format_shape(self.data.shape))) 50 | assert isinstance(multiplier, (int, float)), ( 51 | "Multiplier must be a Python scalar, instead has type {!r}".format( 52 | type(multiplier).__name__)) 53 | self.data += multiplier * direction.data 54 | assert np.all(np.isfinite(self.data)), ( 55 | "Parameter contains NaN or infinity after update, cannot continue") 56 | 57 | class Constant(DataNode): 58 | """ 59 | A Constant node is used to represent: 60 | * Input features 61 | * Output labels 62 | * Gradients computed by back-propagation 63 | 64 | """ 65 | def __init__(self, data): 66 | assert isinstance(data, np.ndarray), ( 67 | "Data should be a numpy array, instead has type {!r}".format( 68 | type(data).__name__)) 69 | assert np.issubdtype(data.dtype, np.floating), ( 70 | "Data should be a float array, instead has data type {!r}".format( 71 | data.dtype)) 72 | super().__init__(data) 73 | 74 | class FunctionNode(Node): 75 | """ 76 | A FunctionNode represents a value that is computed based on other nodes. 77 | The FunctionNode class performs necessary book-keeping to compute gradients. 78 | 79 | """ 80 | def __init__(self, *parents): 81 | assert all(isinstance(parent, Node) for parent in parents), ( 82 | "Inputs must be node objects, instead got types {!r}".format( 83 | tuple(type(parent).__name__ for parent in parents))) 84 | self.parents = parents 85 | self.data = self._forward(*(parent.data for parent in parents)) 86 | 87 | class Add(FunctionNode): 88 | """ 89 | Adds matrices element-wise. 90 | 91 | Usage: nn.Add(x, y) 92 | Inputs: 93 | x: a Node with shape (batch_size x num_features) 94 | y: a Node with the same shape as x 95 | Output: 96 | a Node with shape (batch_size x num_features) 97 | """ 98 | @staticmethod 99 | def _forward(*inputs): 100 | assert len(inputs) == 2, "Expected 2 inputs, got {}".format(len(inputs)) 101 | assert inputs[0].ndim == 2, ( 102 | "First input should have 2 dimensions, instead has {}".format( 103 | inputs[0].ndim)) 104 | assert inputs[1].ndim == 2, ( 105 | "Second input should have 2 dimensions, instead has {}".format( 106 | inputs[1].ndim)) 107 | assert inputs[0].shape == inputs[1].shape, ( 108 | "Input shapes should match, instead got {} and {}".format( 109 | format_shape(inputs[0].shape), format_shape(inputs[1].shape))) 110 | return inputs[0] + inputs[1] 111 | 112 | @staticmethod 113 | def _backward(gradient, *inputs): 114 | assert gradient.shape == inputs[0].shape 115 | return [gradient, gradient] 116 | 117 | class AddBias(FunctionNode): 118 | """ 119 | Adds a bias vector to each feature vector 120 | 121 | Usage: nn.AddBias(features, bias) 122 | Inputs: 123 | features: a Node with shape (batch_size x num_features) 124 | bias: a Node with shape (1 x num_features) 125 | Output: 126 | a Node with shape (batch_size x num_features) 127 | """ 128 | @staticmethod 129 | def _forward(*inputs): 130 | assert len(inputs) == 2, "Expected 2 inputs, got {}".format(len(inputs)) 131 | assert inputs[0].ndim == 2, ( 132 | "First input should have 2 dimensions, instead has {}".format( 133 | inputs[0].ndim)) 134 | assert inputs[1].ndim == 2, ( 135 | "Second input should have 2 dimensions, instead has {}".format( 136 | inputs[1].ndim)) 137 | assert inputs[1].shape[0] == 1, ( 138 | "First dimension of second input should be 1, instead got shape " 139 | "{}".format(format_shape(inputs[1].shape))) 140 | assert inputs[0].shape[1] == inputs[1].shape[1], ( 141 | "Second dimension of inputs should match, instead got shapes {} " 142 | "and {}".format( 143 | format_shape(inputs[0].shape), format_shape(inputs[1].shape))) 144 | return inputs[0] + inputs[1] 145 | 146 | @staticmethod 147 | def _backward(gradient, *inputs): 148 | assert gradient.shape == inputs[0].shape 149 | return [gradient, np.sum(gradient, axis=0, keepdims=True)] 150 | 151 | class DotProduct(FunctionNode): 152 | """ 153 | Batched dot product 154 | 155 | Usage: nn.DotProduct(features, weights) 156 | Inputs: 157 | features: a Node with shape (batch_size x num_features) 158 | weights: a Node with shape (1 x num_features) 159 | Output: a Node with shape (batch_size x 1) 160 | """ 161 | @staticmethod 162 | def _forward(*inputs): 163 | assert len(inputs) == 2, "Expected 2 inputs, got {}".format(len(inputs)) 164 | assert inputs[0].ndim == 2, ( 165 | "First input should have 2 dimensions, instead has {}".format( 166 | inputs[0].ndim)) 167 | assert inputs[1].ndim == 2, ( 168 | "Second input should have 2 dimensions, instead has {}".format( 169 | inputs[1].ndim)) 170 | assert inputs[1].shape[0] == 1, ( 171 | "First dimension of second input should be 1, instead got shape " 172 | "{}".format(format_shape(inputs[1].shape))) 173 | assert inputs[0].shape[1] == inputs[1].shape[1], ( 174 | "Second dimension of inputs should match, instead got shapes {} " 175 | "and {}".format( 176 | format_shape(inputs[0].shape), format_shape(inputs[1].shape))) 177 | return np.dot(inputs[0], inputs[1].T) 178 | 179 | @staticmethod 180 | def _backward(gradient, *inputs): 181 | # assert gradient.shape[0] == inputs[0].shape[0] 182 | # assert gradient.shape[1] == 1 183 | # return [np.dot(gradient, inputs[1]), np.dot(gradient.T, inputs[0])] 184 | raise NotImplementedError( 185 | "Backpropagation through DotProduct nodes is not needed in this " 186 | "assignment") 187 | 188 | class Linear(FunctionNode): 189 | """ 190 | Applies a linear transformation (matrix multiplication) to the input 191 | 192 | Usage: nn.Linear(features, weights) 193 | Inputs: 194 | features: a Node with shape (batch_size x input_features) 195 | weights: a Node with shape (input_features x output_features) 196 | Output: a node with shape (batch_size x output_features) 197 | """ 198 | @staticmethod 199 | def _forward(*inputs): 200 | assert len(inputs) == 2, "Expected 2 inputs, got {}".format(len(inputs)) 201 | assert inputs[0].ndim == 2, ( 202 | "First input should have 2 dimensions, instead has {}".format( 203 | inputs[0].ndim)) 204 | assert inputs[1].ndim == 2, ( 205 | "Second input should have 2 dimensions, instead has {}".format( 206 | inputs[1].ndim)) 207 | assert inputs[0].shape[1] == inputs[1].shape[0], ( 208 | "Second dimension of first input should match first dimension of " 209 | "second input, instead got shapes {} and {}".format( 210 | format_shape(inputs[0].shape), format_shape(inputs[1].shape))) 211 | return np.dot(inputs[0], inputs[1]) 212 | 213 | @staticmethod 214 | def _backward(gradient, *inputs): 215 | assert gradient.shape[0] == inputs[0].shape[0] 216 | assert gradient.shape[1] == inputs[1].shape[1] 217 | return [np.dot(gradient, inputs[1].T), np.dot(inputs[0].T, gradient)] 218 | 219 | class ReLU(FunctionNode): 220 | """ 221 | An element-wise Rectified Linear Unit nonlinearity: max(x, 0). 222 | This nonlinearity replaces all negative entries in its input with zeros. 223 | 224 | Usage: nn.ReLU(x) 225 | Input: 226 | x: a Node with shape (batch_size x num_features) 227 | Output: a Node with the same shape as x, but no negative entries 228 | """ 229 | @staticmethod 230 | def _forward(*inputs): 231 | assert len(inputs) == 1, "Expected 1 input, got {}".format(len(inputs)) 232 | assert inputs[0].ndim == 2, ( 233 | "Input should have 2 dimensions, instead has {}".format( 234 | inputs[0].ndim)) 235 | return np.maximum(inputs[0], 0) 236 | 237 | @staticmethod 238 | def _backward(gradient, *inputs): 239 | assert gradient.shape == inputs[0].shape 240 | return [gradient * np.where(inputs[0] > 0, 1.0, 0.0)] 241 | 242 | class SquareLoss(FunctionNode): 243 | """ 244 | This node first computes 0.5 * (a[i,j] - b[i,j])**2 at all positions (i,j) 245 | in the inputs, which creates a (batch_size x dim) matrix. It then calculates 246 | and returns the mean of all elements in this matrix. 247 | 248 | Usage: nn.SquareLoss(a, b) 249 | Inputs: 250 | a: a Node with shape (batch_size x dim) 251 | b: a Node with shape (batch_size x dim) 252 | Output: a scalar Node (containing a single floating-point number) 253 | """ 254 | @staticmethod 255 | def _forward(*inputs): 256 | assert len(inputs) == 2, "Expected 2 inputs, got {}".format(len(inputs)) 257 | assert inputs[0].ndim == 2, ( 258 | "First input should have 2 dimensions, instead has {}".format( 259 | inputs[0].ndim)) 260 | assert inputs[1].ndim == 2, ( 261 | "Second input should have 2 dimensions, instead has {}".format( 262 | inputs[1].ndim)) 263 | assert inputs[0].shape == inputs[1].shape, ( 264 | "Input shapes should match, instead got {} and {}".format( 265 | format_shape(inputs[0].shape), format_shape(inputs[1].shape))) 266 | return np.mean(np.square(inputs[0] - inputs[1]) / 2) 267 | 268 | @staticmethod 269 | def _backward(gradient, *inputs): 270 | assert np.asarray(gradient).ndim == 0 271 | return [ 272 | gradient * (inputs[0] - inputs[1]) / inputs[0].size, 273 | gradient * (inputs[1] - inputs[0]) / inputs[0].size 274 | ] 275 | 276 | class SoftmaxLoss(FunctionNode): 277 | """ 278 | A batched softmax loss, used for classification problems. 279 | 280 | IMPORTANT: do not swap the order of the inputs to this node! 281 | 282 | Usage: nn.SoftmaxLoss(logits, labels) 283 | Inputs: 284 | logits: a Node with shape (batch_size x num_classes). Each row 285 | represents the scores associated with that example belonging to a 286 | particular class. A score can be an arbitrary real number. 287 | labels: a Node with shape (batch_size x num_classes) that encodes the 288 | correct labels for the examples. All entries must be non-negative 289 | and the sum of values along each row should be 1. 290 | Output: a scalar Node (containing a single floating-point number) 291 | """ 292 | @staticmethod 293 | def log_softmax(logits): 294 | log_probs = logits - np.max(logits, axis=1, keepdims=True) 295 | log_probs -= np.log(np.sum(np.exp(log_probs), axis=1, keepdims=True)) 296 | return log_probs 297 | 298 | @staticmethod 299 | def _forward(*inputs): 300 | assert len(inputs) == 2, "Expected 2 inputs, got {}".format(len(inputs)) 301 | assert inputs[0].ndim == 2, ( 302 | "First input should have 2 dimensions, instead has {}".format( 303 | inputs[0].ndim)) 304 | assert inputs[1].ndim == 2, ( 305 | "Second input should have 2 dimensions, instead has {}".format( 306 | inputs[1].ndim)) 307 | assert inputs[0].shape == inputs[1].shape, ( 308 | "Input shapes should match, instead got {} and {}".format( 309 | format_shape(inputs[0].shape), format_shape(inputs[1].shape))) 310 | assert np.all(inputs[1] >= 0), ( 311 | "All entries in the labels input must be non-negative") 312 | assert np.allclose(np.sum(inputs[1], axis=1), 1), ( 313 | "Labels input must sum to 1 along each row") 314 | log_probs = SoftmaxLoss.log_softmax(inputs[0]) 315 | return np.mean(-np.sum(inputs[1] * log_probs, axis=1)) 316 | 317 | @staticmethod 318 | def _backward(gradient, *inputs): 319 | assert np.asarray(gradient).ndim == 0 320 | log_probs = SoftmaxLoss.log_softmax(inputs[0]) 321 | return [ 322 | gradient * (np.exp(log_probs) - inputs[1]) / inputs[0].shape[0], 323 | gradient * -log_probs / inputs[0].shape[0] 324 | ] 325 | 326 | def gradients(loss, parameters): 327 | """ 328 | Computes and returns the gradient of the loss with respect to the provided 329 | parameters. 330 | 331 | Usage: nn.gradients(loss, parameters) 332 | Inputs: 333 | loss: a SquareLoss or SoftmaxLoss node 334 | parameters: a list (or iterable) containing Parameter nodes 335 | Output: a list of Constant objects, representing the gradient of the loss 336 | with respect to each provided parameter. 337 | """ 338 | 339 | assert isinstance(loss, (SquareLoss, SoftmaxLoss)), ( 340 | "Loss must be a loss node, instead has type {!r}".format( 341 | type(loss).__name__)) 342 | assert all(isinstance(parameter, Parameter) for parameter in parameters), ( 343 | "Parameters must all have type {}, instead got types {!r}".format( 344 | Parameter.__name__, 345 | tuple(type(parameter).__name__ for parameter in parameters))) 346 | assert not hasattr(loss, "used"), ( 347 | "Loss node has already been used for backpropagation, cannot reuse") 348 | 349 | loss.used = True 350 | 351 | nodes = set() 352 | tape = [] 353 | 354 | def visit(node): 355 | if node not in nodes: 356 | for parent in node.parents: 357 | visit(parent) 358 | nodes.add(node) 359 | tape.append(node) 360 | 361 | visit(loss) 362 | nodes |= set(parameters) 363 | 364 | grads = {node: np.zeros_like(node.data) for node in nodes} 365 | grads[loss] = 1.0 366 | 367 | for node in reversed(tape): 368 | parent_grads = node._backward( 369 | grads[node], *(parent.data for parent in node.parents)) 370 | for parent, parent_grad in zip(node.parents, parent_grads): 371 | grads[parent] += parent_grad 372 | 373 | return [Constant(grads[parameter]) for parameter in parameters] 374 | 375 | def as_scalar(node): 376 | """ 377 | Returns the value of a Node as a standard Python number. This only works 378 | for nodes with one element (e.g. SquareLoss and SoftmaxLoss, as well as 379 | DotProduct with a batch size of 1 element). 380 | """ 381 | 382 | assert isinstance(node, Node), ( 383 | "Input must be a node object, instead has type {!r}".format( 384 | type(node).__name__)) 385 | assert node.data.size == 1, ( 386 | "Node has shape {}, cannot convert to a scalar".format( 387 | format_shape(node.data.shape))) 388 | return np.asscalar(node.data) 389 | -------------------------------------------------------------------------------- /run.py: -------------------------------------------------------------------------------- 1 | import model 2 | import dataset 3 | import numpy as np 4 | 5 | def word_to_hash(chars, word): 6 | word = word.lower() 7 | chars = list(chars) 8 | hashed = [chars.index(char) for char in word] 9 | while(len(hashed) < 10): 10 | hashed.append(-1) 11 | return np.ndarray((1,10), buffer=np.array(hashed), dtype=int) 12 | 13 | def get_predicted_language(probs): 14 | languages = ["English", "Spanish", "Finnish", "Dutch", "Polish"] 15 | max_index = 0 16 | max_val = -float("inf") 17 | 18 | for index in range(len(probs)): 19 | if(probs[index] > max_val): 20 | max_val = probs[index] 21 | max_index = index 22 | return (max_val, languages[max_index]) 23 | 24 | def main(): 25 | language_classifier = model.LanguageClassificationModel() 26 | data = dataset.LanguageClassificationDataset(language_classifier) 27 | chars = data.chars 28 | language_classifier.train(data) 29 | 30 | test_predicted_probs, test_predicted, test_correct = data._predict('test') 31 | test_accuracy = np.mean(test_predicted == test_correct) 32 | print("test set accuracy is: {:%}\n".format(test_accuracy)) 33 | 34 | while True: 35 | word = input("Enter a word(press q to quit): ") 36 | 37 | if(word == "q"): 38 | break 39 | 40 | xs = data._encode(word_to_hash(chars, word), None, True) 41 | result = language_classifier.run(xs) 42 | probs = data._softmax(result.data) 43 | max_prob, pred_lang = get_predicted_language(probs[0]) 44 | print("predicted language is: {}, with a confidence of {:%}\n".format(pred_lang, max_prob)) 45 | 46 | if __name__ == "__main__": 47 | main() 48 | --------------------------------------------------------------------------------