├── .gitignore ├── README.md ├── demo.py └── mlp.py /.gitignore: -------------------------------------------------------------------------------- 1 | 2 | # Created by https://www.gitignore.io/api/python 3 | # Edit at https://www.gitignore.io/?templates=python 4 | 5 | ### Python ### 6 | # Byte-compiled / optimized / DLL files 7 | __pycache__/ 8 | *.py[cod] 9 | *$py.class 10 | 11 | # C extensions 12 | *.so 13 | 14 | # Distribution / packaging 15 | .Python 16 | build/ 17 | develop-eggs/ 18 | dist/ 19 | downloads/ 20 | eggs/ 21 | .eggs/ 22 | lib/ 23 | lib64/ 24 | parts/ 25 | sdist/ 26 | var/ 27 | wheels/ 28 | pip-wheel-metadata/ 29 | share/python-wheels/ 30 | *.egg-info/ 31 | .installed.cfg 32 | *.egg 33 | MANIFEST 34 | 35 | # PyInstaller 36 | # Usually these files are written by a python script from a template 37 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 38 | *.manifest 39 | *.spec 40 | 41 | # Installer logs 42 | pip-log.txt 43 | pip-delete-this-directory.txt 44 | 45 | # Unit test / coverage reports 46 | htmlcov/ 47 | .tox/ 48 | .nox/ 49 | .coverage 50 | .coverage.* 51 | .cache 52 | nosetests.xml 53 | coverage.xml 54 | *.cover 55 | .hypothesis/ 56 | .pytest_cache/ 57 | 58 | # Translations 59 | *.mo 60 | *.pot 61 | 62 | # Django stuff: 63 | *.log 64 | local_settings.py 65 | db.sqlite3 66 | 67 | # Flask stuff: 68 | instance/ 69 | .webassets-cache 70 | 71 | # Scrapy stuff: 72 | .scrapy 73 | 74 | # Sphinx documentation 75 | docs/_build/ 76 | 77 | # PyBuilder 78 | target/ 79 | 80 | # Jupyter Notebook 81 | .ipynb_checkpoints 82 | 83 | # IPython 84 | profile_default/ 85 | ipython_config.py 86 | 87 | # pyenv 88 | .python-version 89 | 90 | # pipenv 91 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 92 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 93 | # having no cross-platform support, pipenv may install dependencies that don’t work, or not 94 | # install all needed dependencies. 95 | #Pipfile.lock 96 | 97 | # celery beat schedule file 98 | celerybeat-schedule 99 | 100 | # SageMath parsed files 101 | *.sage.py 102 | 103 | # Environments 104 | .env 105 | .venv 106 | env/ 107 | venv/ 108 | ENV/ 109 | env.bak/ 110 | venv.bak/ 111 | 112 | # Spyder project settings 113 | .spyderproject 114 | .spyproject 115 | 116 | # Rope project settings 117 | .ropeproject 118 | 119 | # mkdocs documentation 120 | /site 121 | 122 | # mypy 123 | .mypy_cache/ 124 | .dmypy.json 125 | dmypy.json 126 | 127 | # Pyre type checker 128 | .pyre/ 129 | 130 | # End of https://www.gitignore.io/api/python 131 | 132 | 133 | # Created by https://www.gitignore.io/api/pycharm 134 | # Edit at https://www.gitignore.io/?templates=pycharm 135 | 136 | ### PyCharm ### 137 | # Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio and WebStorm 138 | # Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839 139 | 140 | # User-specific stuff 141 | .idea/ 142 | .idea/**/workspace.xml 143 | .idea/**/tasks.xml 144 | .idea/**/usage.statistics.xml 145 | .idea/**/dictionaries 146 | .idea/**/shelf 147 | 148 | # Generated files 149 | .idea/**/contentModel.xml 150 | 151 | # Sensitive or high-churn files 152 | .idea/**/dataSources/ 153 | .idea/**/dataSources.ids 154 | .idea/**/dataSources.local.xml 155 | .idea/**/sqlDataSources.xml 156 | .idea/**/dynamic.xml 157 | .idea/**/uiDesigner.xml 158 | .idea/**/dbnavigator.xml 159 | 160 | # Gradle 161 | .idea/**/gradle.xml 162 | .idea/**/libraries 163 | 164 | # Gradle and Maven with auto-import 165 | # When using Gradle or Maven with auto-import, you should exclude module files, 166 | # since they will be recreated, and may cause churn. Uncomment if using 167 | # auto-import. 168 | # .idea/modules.xml 169 | # .idea/*.iml 170 | # .idea/modules 171 | 172 | # CMake 173 | cmake-build-*/ 174 | 175 | # Mongo Explorer plugin 176 | .idea/**/mongoSettings.xml 177 | 178 | # File-based project format 179 | *.iws 180 | 181 | # IntelliJ 182 | out/ 183 | 184 | # mpeltonen/sbt-idea plugin 185 | .idea_modules/ 186 | 187 | # JIRA plugin 188 | atlassian-ide-plugin.xml 189 | 190 | # Cursive Clojure plugin 191 | .idea/replstate.xml 192 | 193 | # Crashlytics plugin (for Android Studio and IntelliJ) 194 | com_crashlytics_export_strings.xml 195 | crashlytics.properties 196 | crashlytics-build.properties 197 | fabric.properties 198 | 199 | # Editor-based Rest Client 200 | .idea/httpRequests 201 | 202 | # Android studio 3.1+ serialized cache file 203 | .idea/caches/build_file_checksums.ser 204 | 205 | # JetBrains templates 206 | **___jb_tmp___ 207 | 208 | ### PyCharm Patch ### 209 | # Comment Reason: https://github.com/joeblau/gitignore.io/issues/186#issuecomment-215987721 210 | 211 | # *.iml 212 | # modules.xml 213 | # .idea/misc.xml 214 | # *.ipr 215 | 216 | # Sonarlint plugin 217 | .idea/sonarlint 218 | 219 | # End of https://www.gitignore.io/api/pycharm 220 | 221 | *.mlp 222 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Simple MLP 2 | MLP (Multi-Layer Perceptron) is an ANN (Artificial Neural Network) which has its fundamentals in human brain, where each neuron (here perceptron or node) fires an output depending on the input and its internal weights, and then squashing it through a function which contrains the output range. 3 | 4 | Organizing these neurons in layers permits to determine non-linear relationships between inputs. 5 | 6 | ## Structure of the MLP 7 | The network starts empty or only input layer if specified. 8 | 9 | Calling `add_layer(number_of_nodes)` allows to add a new layer as the last, optional weigths and bias for this layer can be specified. 10 | 11 | Calling `save(file_name)` allows to save a file containing the network structure which can be loaded with the static method `Mlp.load(file_name)` 12 | 13 | The train functions performs a stochastic gradient descent but I'm planning to implement also batch gradient descent. 14 | 15 | The squashing function used is a sigmoid. 16 | 17 | ## Dependencies 18 | - Python3 19 | - numpy, for matrix computation 20 | - pandas, for loading external data 21 | - math, for exponential function 22 | 23 | ## Demo 24 | The demo file contains an example of training the network to perform XOR operations, which is not a linear-separable problem and thus needs a MLP and a second example for classifying hand-written numeric characters provided as 28x28 pixel matrix (786 inputs). 25 | 26 | ## Performance 27 | For the OCR part the results are: 28 | 29 | **One hidden layer with 50 nodes** 30 | 31 | - Training set: **5000** samples - Testing set: **1000** samples 32 | - Recall: 86% 33 | - Precision: 87% 34 | - **Accuracy:** 87% 35 | 36 | - Training set: **10000** samples - Testing set: **2000** samples 37 | - Recall: 89% 38 | - Precision: 89% 39 | - **Accuracy:** 89% 40 | 41 | - Training set: **20000** samples - Testing set: **4000** samples 42 | - Recall: 91% 43 | - Precision: 91% 44 | - **Accuracy:** 91% 45 | 46 | **Two hidden layer, first 150 nodes, second 50 nodes** 47 | 48 | - Training set: **50000** samples - Testing set: **5000** samples 49 | - Recall: 95% 50 | - Precision: 95% 51 | - **Accuracy:** 95% 52 | 53 | **Two hidden layer, first 150 nodes, second 50 nodes (ReLU squashing function)** 54 | 55 | - Training set: **50000** samples - Testing set: **5000** samples 56 | - Recall: 96% 57 | - Precision: 96% 58 | - **Accuracy:** 96% 59 | 60 | **Two hidden layer, first 300 nodes, second 150 nodes (ReLU squashing function)** 61 | 62 | - Training set: **100000** samples - Testing set: **10000** samples 63 | - Recall: 97% 64 | - Precision: 97% 65 | - **Accuracy:** 97% 66 | 67 | The dataset used is available [here](https://www.kaggle.com/oddrationale/mnist-in-csv) 68 | 69 | Run the demo by typing in your console: 70 | 71 | ```python demo.py``` 72 | -------------------------------------------------------------------------------- /demo.py: -------------------------------------------------------------------------------- 1 | from mlp import Mlp 2 | import random 3 | import pandas as pd 4 | import numpy as np 5 | 6 | training_xor = [ 7 | { 8 | "input": [0, 0], 9 | "output": [0] 10 | }, 11 | { 12 | "input": [1, 0], 13 | "output": [1] 14 | }, 15 | { 16 | "input": [0, 1], 17 | "output": [1] 18 | }, 19 | { 20 | "input": [1, 1], 21 | "output": [0] 22 | } 23 | ] 24 | 25 | def xor(): 26 | # Create a MLP with 2 input, a hidden layer with 2 nodes a single output node 27 | nn = Mlp(init_nodes=2) 28 | nn.add_layer(2) 29 | nn.add_layer(1) 30 | 31 | print("Training the network...") 32 | for i in range(20000): 33 | data = random.choice(training_xor) 34 | nn.train(data["input"], data["output"]) 35 | # nn.save("xor.mlp") 36 | for i in range(2): 37 | for j in range(2): 38 | out_class, out_prob = nn.predict([i, j]) 39 | print("Predicting XOR between {} and {} gave {} and the real is {} (Output: {:.2f})" 40 | .format(i, j, out_prob > .5, bool(i) ^ bool(j), out_prob)) 41 | 42 | 43 | def ocr(training_population=5000, testing_population=1000): 44 | print("Loading data...") 45 | train = pd.read_csv("../datasets/mnist_train.csv") 46 | train = process_df(train) 47 | test_set = pd.read_csv("../datasets/mnist_test.csv") 48 | test_set = process_df(test_set) 49 | print("Loaded {} rows for training.".format(train.shape[0])) 50 | print("Loaded {} rows for testing.".format(test_set.shape[0])) 51 | nn = Mlp(init_nodes=784, learning_rate=.05) 52 | nn.add_layer(300) 53 | nn.add_layer(150, function="relu") 54 | nn.add_layer(10) 55 | 56 | print("Training the network with {} samples...".format(training_population)) 57 | for i in range(training_population): 58 | data = train.sample(n=1) 59 | label = data["label"].tolist()[0] 60 | inputs = list(data.iloc[0, 1:]) 61 | outputs = [0] * 10 62 | outputs[label] = 1 63 | nn.train(inputs, outputs) 64 | 65 | print("Trained successfully.") 66 | # nn.save("ocr.mlp") 67 | print("Testing with {} samples...".format(testing_population)) 68 | c_m = np.zeros(shape=(10, 10)) 69 | for i in range(testing_population): 70 | data = test_set.sample(n=1) 71 | inputs = list(data.iloc[0, 1:]) 72 | label = data["label"].tolist()[0] 73 | out_class, out_prob = nn.predict(inputs) 74 | c_m[label][out_class] += 1 75 | 76 | print("Results:") 77 | 78 | correct_guesses = np.sum(np.diagonal(c_m)) 79 | total_guesses = c_m.sum() 80 | accuracy = correct_guesses / total_guesses 81 | 82 | recall = 0 83 | precision = 0 84 | c_m_t = c_m.T 85 | 86 | for i in range(10): 87 | correct_guesses = c_m[i][i] 88 | total_row = np.sum(c_m[i]) 89 | total_col = np.sum(c_m_t[i]) 90 | recall += (correct_guesses / total_row) if total_row > 0 else 0 91 | precision += (correct_guesses / total_col) if total_col > 0 else 0 92 | 93 | recall = recall / 10 94 | precision = precision / 10 95 | 96 | print("\tRecall: {0:.2f}\n\tPrecision: {0:.2f}\n\tAccuracy: {0:.2f}".format(recall, precision, accuracy)) 97 | 98 | 99 | def filter_pixel(x): 100 | return x / 255 101 | 102 | 103 | def process_df(df): 104 | labels = df["label"] 105 | df = df.drop(["label"], axis=1) 106 | df = df.apply(np.vectorize(filter_pixel)) 107 | df = pd.concat([labels, df], axis=1) 108 | return df 109 | 110 | ocr(training_population=50000, testing_population=5000) 111 | # xor() 112 | -------------------------------------------------------------------------------- /mlp.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import math 3 | import operator 4 | import re 5 | 6 | class Mlp: 7 | def __init__(self, init_nodes: int = 0, learning_rate: float = .2) -> None: 8 | self.number_of_nodes = [] 9 | if init_nodes > 0: 10 | self.number_of_nodes.append(init_nodes) 11 | self.weights = [] 12 | self.biases = [] 13 | self.functions = [] 14 | self.learning_rate = learning_rate 15 | 16 | def add_layer(self, number_of_nodes: int, weights = None, bias = None, function="sigmoid"): 17 | self.number_of_nodes.append(number_of_nodes) 18 | if not weights is None: 19 | self.weights.append(weights) 20 | self.functions.append(function) 21 | elif len(self.number_of_nodes) > 1: 22 | self.weights.append(np.random.randn(self.number_of_nodes[-1], self.number_of_nodes[-2]) * np.sqrt(2 / (self.number_of_nodes[-1] + self.number_of_nodes[-2]))) 23 | self.functions.append(function) 24 | 25 | if not bias is None: 26 | self.biases.append(bias) 27 | elif len(self.number_of_nodes) > 1: 28 | self.biases.append(np.random.uniform(0, 0, size=(number_of_nodes, 1))) 29 | 30 | def save(self, location): 31 | f = open(location, "w+") 32 | for i in self.number_of_nodes: 33 | f.write(str(i) + " ") 34 | f.write("\t") 35 | for i in self.functions: 36 | f.write(i + " ") 37 | f.write("\n") 38 | for i in self.weights: 39 | for j in i: 40 | for k in j: 41 | f.write(str(k) + " ") 42 | f.write("\t") 43 | f.write("\n") 44 | for b in self.biases: 45 | for i in b: 46 | for k in i: 47 | f.write(str(k) + " ") 48 | f.write("\n") 49 | f.close() 50 | 51 | @staticmethod 52 | def load(location): 53 | f = open(location, "r") 54 | lines = f.readlines() 55 | f_l = lines[0].strip() 56 | f_l = re.split(r'\t+', f_l) 57 | number_of_nodes = np.vectorize(lambda x: int(x))( f_l[0].split() ) 58 | functions = f_l[1].split() 59 | weigths = [] 60 | for i in range(1, len(number_of_nodes)): 61 | m = lines[i].strip().split("\t") 62 | for j in range(len(m)): 63 | m[j] = m[j].split() 64 | m = np.vectorize(lambda x: float(x))(np.matrix(m)) 65 | weigths.append(m) 66 | biases = [] 67 | for i in range(len(number_of_nodes), len(lines)): 68 | b = lines[i].strip().split("\t") 69 | for j in range(len(b)): 70 | b[j] = b[j].split() 71 | b = np.vectorize(lambda x: float(x))(np.matrix(b).T) 72 | biases.append(b) 73 | nn = Mlp() 74 | for i in range(len(number_of_nodes)): 75 | if i > 0: 76 | nn.add_layer(number_of_nodes=number_of_nodes[i], weights=weigths[i-1], bias=biases[i-1], function=functions[i-1]) 77 | else: 78 | nn.add_layer(number_of_nodes[i]) 79 | return nn 80 | 81 | @staticmethod 82 | def soft_plus(x): 83 | sp = np.vectorize(lambda y: math.log(1 + math.exp(y))) 84 | return sp(x) 85 | 86 | @staticmethod 87 | def relu(x): 88 | re = np.vectorize(lambda y: max(0, y)) 89 | return re(x) 90 | 91 | @staticmethod 92 | def sigmoid(x): 93 | sig = np.vectorize(lambda y: (1 - 1 / (1 + math.exp(y))) if y < 0 else (1 / (1 + math.exp(-y)))) 94 | return sig(x) 95 | 96 | @staticmethod 97 | def squash(x, function): 98 | if function == "sigmoid": 99 | return Mlp.sigmoid(x) 100 | elif function == "soft_plus": 101 | return Mlp.soft_plus(x) 102 | elif function == "relu": 103 | return Mlp.relu(x) 104 | 105 | @staticmethod 106 | def derivative(x, function): 107 | if function == "sigmoid": 108 | return np.multiply(x, (1-x)) 109 | elif function == "soft_plus": 110 | return Mlp.sigmoid(x) 111 | elif function == "relu": 112 | d_relu = np.vectorize(lambda y: 1 if y > 0 else 0) 113 | return d_relu(x) 114 | 115 | def feed_forward(self, inp): 116 | outputs = [np.matrix(inp).T] 117 | 118 | for i in range(len(self.number_of_nodes) - 1): 119 | outputs.append(Mlp.squash((np.dot(self.weights[i], outputs[-1]) + self.biases[i]), self.functions[i])) 120 | 121 | return outputs 122 | 123 | # This training function uses stochastic gradient descent instead of batch 124 | def train(self, inp, targets): 125 | targets = np.matrix(targets).T 126 | # Calculate output with given input 127 | outputs = self.feed_forward(inp) 128 | 129 | # Calculate each layer error 130 | errors = [np.subtract(targets, outputs[-1])] 131 | for i in range(len(self.weights) - 1): 132 | errors.insert(0, np.dot(self.weights[-1-i].T, errors[0])) 133 | 134 | for i in range(len(self.weights)): 135 | # Calculate gradient and weight correction 136 | gradient = np.multiply(errors[-1-i], Mlp.derivative(outputs[-1-i], self.functions[-1-i])) 137 | gradient *= self.learning_rate 138 | self.biases[-1-i] += gradient 139 | delta_w = np.dot(gradient, outputs[-2-i].T) 140 | self.weights[-1-i] += delta_w 141 | 142 | def predict(self, inp): 143 | output = self.feed_forward(inp)[-1] 144 | output = dict(enumerate(output.A1)) 145 | out_class = max(output.items(), key=operator.itemgetter(1))[0] 146 | out_prob = output[out_class] 147 | 148 | return out_class, out_prob 149 | --------------------------------------------------------------------------------