├── .gitignore
├── README.md
├── demo.py
└── mlp.py


/.gitignore:
--------------------------------------------------------------------------------
  1 | 
  2 | # Created by https://www.gitignore.io/api/python
  3 | # Edit at https://www.gitignore.io/?templates=python
  4 | 
  5 | ### Python ###
  6 | # Byte-compiled / optimized / DLL files
  7 | __pycache__/
  8 | *.py[cod]
  9 | *$py.class
 10 | 
 11 | # C extensions
 12 | *.so
 13 | 
 14 | # Distribution / packaging
 15 | .Python
 16 | build/
 17 | develop-eggs/
 18 | dist/
 19 | downloads/
 20 | eggs/
 21 | .eggs/
 22 | lib/
 23 | lib64/
 24 | parts/
 25 | sdist/
 26 | var/
 27 | wheels/
 28 | pip-wheel-metadata/
 29 | share/python-wheels/
 30 | *.egg-info/
 31 | .installed.cfg
 32 | *.egg
 33 | MANIFEST
 34 | 
 35 | # PyInstaller
 36 | #  Usually these files are written by a python script from a template
 37 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 38 | *.manifest
 39 | *.spec
 40 | 
 41 | # Installer logs
 42 | pip-log.txt
 43 | pip-delete-this-directory.txt
 44 | 
 45 | # Unit test / coverage reports
 46 | htmlcov/
 47 | .tox/
 48 | .nox/
 49 | .coverage
 50 | .coverage.*
 51 | .cache
 52 | nosetests.xml
 53 | coverage.xml
 54 | *.cover
 55 | .hypothesis/
 56 | .pytest_cache/
 57 | 
 58 | # Translations
 59 | *.mo
 60 | *.pot
 61 | 
 62 | # Django stuff:
 63 | *.log
 64 | local_settings.py
 65 | db.sqlite3
 66 | 
 67 | # Flask stuff:
 68 | instance/
 69 | .webassets-cache
 70 | 
 71 | # Scrapy stuff:
 72 | .scrapy
 73 | 
 74 | # Sphinx documentation
 75 | docs/_build/
 76 | 
 77 | # PyBuilder
 78 | target/
 79 | 
 80 | # Jupyter Notebook
 81 | .ipynb_checkpoints
 82 | 
 83 | # IPython
 84 | profile_default/
 85 | ipython_config.py
 86 | 
 87 | # pyenv
 88 | .python-version
 89 | 
 90 | # pipenv
 91 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 92 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 93 | #   having no cross-platform support, pipenv may install dependencies that don’t work, or not
 94 | #   install all needed dependencies.
 95 | #Pipfile.lock
 96 | 
 97 | # celery beat schedule file
 98 | celerybeat-schedule
 99 | 
100 | # SageMath parsed files
101 | *.sage.py
102 | 
103 | # Environments
104 | .env
105 | .venv
106 | env/
107 | venv/
108 | ENV/
109 | env.bak/
110 | venv.bak/
111 | 
112 | # Spyder project settings
113 | .spyderproject
114 | .spyproject
115 | 
116 | # Rope project settings
117 | .ropeproject
118 | 
119 | # mkdocs documentation
120 | /site
121 | 
122 | # mypy
123 | .mypy_cache/
124 | .dmypy.json
125 | dmypy.json
126 | 
127 | # Pyre type checker
128 | .pyre/
129 | 
130 | # End of https://www.gitignore.io/api/python
131 | 
132 | 
133 | # Created by https://www.gitignore.io/api/pycharm
134 | # Edit at https://www.gitignore.io/?templates=pycharm
135 | 
136 | ### PyCharm ###
137 | # Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio and WebStorm
138 | # Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839
139 | 
140 | # User-specific stuff
141 | .idea/
142 | .idea/**/workspace.xml
143 | .idea/**/tasks.xml
144 | .idea/**/usage.statistics.xml
145 | .idea/**/dictionaries
146 | .idea/**/shelf
147 | 
148 | # Generated files
149 | .idea/**/contentModel.xml
150 | 
151 | # Sensitive or high-churn files
152 | .idea/**/dataSources/
153 | .idea/**/dataSources.ids
154 | .idea/**/dataSources.local.xml
155 | .idea/**/sqlDataSources.xml
156 | .idea/**/dynamic.xml
157 | .idea/**/uiDesigner.xml
158 | .idea/**/dbnavigator.xml
159 | 
160 | # Gradle
161 | .idea/**/gradle.xml
162 | .idea/**/libraries
163 | 
164 | # Gradle and Maven with auto-import
165 | # When using Gradle or Maven with auto-import, you should exclude module files,
166 | # since they will be recreated, and may cause churn.  Uncomment if using
167 | # auto-import.
168 | # .idea/modules.xml
169 | # .idea/*.iml
170 | # .idea/modules
171 | 
172 | # CMake
173 | cmake-build-*/
174 | 
175 | # Mongo Explorer plugin
176 | .idea/**/mongoSettings.xml
177 | 
178 | # File-based project format
179 | *.iws
180 | 
181 | # IntelliJ
182 | out/
183 | 
184 | # mpeltonen/sbt-idea plugin
185 | .idea_modules/
186 | 
187 | # JIRA plugin
188 | atlassian-ide-plugin.xml
189 | 
190 | # Cursive Clojure plugin
191 | .idea/replstate.xml
192 | 
193 | # Crashlytics plugin (for Android Studio and IntelliJ)
194 | com_crashlytics_export_strings.xml
195 | crashlytics.properties
196 | crashlytics-build.properties
197 | fabric.properties
198 | 
199 | # Editor-based Rest Client
200 | .idea/httpRequests
201 | 
202 | # Android studio 3.1+ serialized cache file
203 | .idea/caches/build_file_checksums.ser
204 | 
205 | # JetBrains templates
206 | **___jb_tmp___
207 | 
208 | ### PyCharm Patch ###
209 | # Comment Reason: https://github.com/joeblau/gitignore.io/issues/186#issuecomment-215987721
210 | 
211 | # *.iml
212 | # modules.xml
213 | # .idea/misc.xml
214 | # *.ipr
215 | 
216 | # Sonarlint plugin
217 | .idea/sonarlint
218 | 
219 | # End of https://www.gitignore.io/api/pycharm
220 | 
221 | *.mlp
222 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Simple MLP
 2 | MLP (Multi-Layer Perceptron) is an ANN (Artificial Neural Network) which has its fundamentals in human brain, where each neuron (here perceptron or node) fires an output depending on the input and its internal weights, and then squashing it through a function which contrains the output range.
 3 | 
 4 | Organizing these neurons in layers permits to determine non-linear relationships between inputs.
 5 | 
 6 | ## Structure of the MLP
 7 | The network starts empty or only input layer if specified.
 8 | 
 9 | Calling `add_layer(number_of_nodes)` allows to add a new layer as the last, optional weigths and bias for this layer can be specified.
10 | 
11 | Calling `save(file_name)` allows to save a file containing the network structure which can be loaded with the static method `Mlp.load(file_name)`
12 | 
13 | The train functions performs a stochastic gradient descent but I'm planning to implement also batch gradient descent.
14 | 
15 | The squashing function used is a sigmoid.
16 | 
17 | ## Dependencies
18 | - Python3
19 | - numpy, for matrix computation
20 | - pandas, for loading external data
21 | - math, for exponential function
22 | 
23 | ## Demo
24 | The demo file contains an example of training the network to perform XOR operations, which is not a linear-separable problem and thus needs a MLP and a second example for classifying hand-written numeric characters provided as 28x28 pixel matrix (786 inputs).
25 | 
26 | ## Performance
27 | For the OCR part the results are:
28 | 
29 | **One hidden layer with 50 nodes**
30 | 
31 | - Training set: **5000** samples - Testing set: **1000** samples
32 |     - Recall: 86% 
33 |     - Precision: 87%
34 |     - **Accuracy:** 87%
35 | 
36 | - Training set: **10000** samples - Testing set: **2000** samples
37 |     - Recall: 89%
38 |     - Precision: 89%
39 |     - **Accuracy:** 89%
40 |     
41 | - Training set: **20000** samples - Testing set: **4000** samples
42 |     - Recall: 91%
43 |     - Precision: 91%
44 |     - **Accuracy:** 91%
45 |     
46 | **Two hidden layer, first 150 nodes, second 50 nodes**
47 | 
48 | - Training set: **50000** samples - Testing set: **5000** samples
49 |     - Recall: 95% 
50 |     - Precision: 95%
51 |     - **Accuracy:** 95%
52 | 
53 | **Two hidden layer, first 150 nodes, second 50 nodes (ReLU squashing function)**
54 | 
55 | - Training set: **50000** samples - Testing set: **5000** samples
56 |     - Recall: 96% 
57 |     - Precision: 96%
58 |     - **Accuracy:** 96%
59 | 
60 | **Two hidden layer, first 300 nodes, second 150 nodes (ReLU squashing function)**
61 | 
62 | - Training set: **100000** samples - Testing set: **10000** samples
63 |     - Recall: 97% 
64 |     - Precision: 97%
65 |     - **Accuracy:** 97%
66 | 
67 | The dataset used is available [here](https://www.kaggle.com/oddrationale/mnist-in-csv)
68 | 
69 | Run the demo by typing in your console:
70 | 
71 | ```python demo.py```
72 | 


--------------------------------------------------------------------------------
/demo.py:
--------------------------------------------------------------------------------
  1 | from mlp import Mlp
  2 | import random
  3 | import pandas as pd
  4 | import numpy as np
  5 | 
  6 | training_xor = [
  7 |     {
  8 |         "input": [0, 0],
  9 |         "output": [0]
 10 |     },
 11 |     {
 12 |         "input": [1, 0],
 13 |         "output": [1]
 14 |     },
 15 |     {
 16 |         "input": [0, 1],
 17 |         "output": [1]
 18 |     },
 19 |     {
 20 |         "input": [1, 1],
 21 |         "output": [0]
 22 |     }
 23 | ]
 24 | 
 25 | def xor():
 26 |     # Create a MLP with 2 input, a hidden layer with 2 nodes a single output node
 27 |     nn = Mlp(init_nodes=2)
 28 |     nn.add_layer(2)
 29 |     nn.add_layer(1)
 30 | 
 31 |     print("Training the network...")
 32 |     for i in range(20000):
 33 |         data = random.choice(training_xor)
 34 |         nn.train(data["input"], data["output"])
 35 |     # nn.save("xor.mlp")
 36 |     for i in range(2):
 37 |         for j in range(2):
 38 |             out_class, out_prob = nn.predict([i, j])
 39 |             print("Predicting XOR between {} and {} gave {} and the real is {} (Output: {:.2f})"
 40 |                   .format(i, j, out_prob > .5, bool(i) ^ bool(j), out_prob))
 41 | 
 42 | 
 43 | def ocr(training_population=5000, testing_population=1000):
 44 |     print("Loading data...")
 45 |     train = pd.read_csv("../datasets/mnist_train.csv")
 46 |     train = process_df(train)
 47 |     test_set = pd.read_csv("../datasets/mnist_test.csv")
 48 |     test_set = process_df(test_set)
 49 |     print("Loaded {} rows for training.".format(train.shape[0]))
 50 |     print("Loaded {} rows for testing.".format(test_set.shape[0]))
 51 |     nn = Mlp(init_nodes=784, learning_rate=.05)
 52 |     nn.add_layer(300)
 53 |     nn.add_layer(150, function="relu")
 54 |     nn.add_layer(10)
 55 | 
 56 |     print("Training the network with {} samples...".format(training_population))
 57 |     for i in range(training_population):
 58 |         data = train.sample(n=1)
 59 |         label = data["label"].tolist()[0]
 60 |         inputs = list(data.iloc[0, 1:])
 61 |         outputs = [0] * 10
 62 |         outputs[label] = 1
 63 |         nn.train(inputs, outputs)
 64 | 
 65 |     print("Trained successfully.")
 66 |     # nn.save("ocr.mlp")
 67 |     print("Testing with {} samples...".format(testing_population))
 68 |     c_m = np.zeros(shape=(10, 10))
 69 |     for i in range(testing_population):
 70 |         data = test_set.sample(n=1)
 71 |         inputs = list(data.iloc[0, 1:])
 72 |         label = data["label"].tolist()[0]
 73 |         out_class, out_prob = nn.predict(inputs)
 74 |         c_m[label][out_class] += 1
 75 | 
 76 |     print("Results:")
 77 | 
 78 |     correct_guesses = np.sum(np.diagonal(c_m))
 79 |     total_guesses = c_m.sum()
 80 |     accuracy = correct_guesses / total_guesses
 81 | 
 82 |     recall = 0
 83 |     precision = 0
 84 |     c_m_t = c_m.T
 85 | 
 86 |     for i in range(10):
 87 |         correct_guesses = c_m[i][i]
 88 |         total_row = np.sum(c_m[i])
 89 |         total_col = np.sum(c_m_t[i])
 90 |         recall += (correct_guesses / total_row) if total_row > 0 else 0
 91 |         precision += (correct_guesses / total_col) if total_col > 0 else 0
 92 |     
 93 |     recall = recall / 10
 94 |     precision = precision / 10
 95 | 
 96 |     print("\tRecall: {0:.2f}\n\tPrecision: {0:.2f}\n\tAccuracy: {0:.2f}".format(recall, precision, accuracy))
 97 | 
 98 | 
 99 | def filter_pixel(x):
100 |     return x / 255
101 | 
102 | 
103 | def process_df(df):
104 |     labels = df["label"]
105 |     df = df.drop(["label"], axis=1)
106 |     df = df.apply(np.vectorize(filter_pixel))
107 |     df = pd.concat([labels, df], axis=1)
108 |     return df
109 | 
110 | ocr(training_population=50000, testing_population=5000)
111 | # xor()
112 | 


--------------------------------------------------------------------------------
/mlp.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import math
  3 | import operator
  4 | import re
  5 | 
  6 | class Mlp:
  7 |     def __init__(self, init_nodes: int = 0, learning_rate: float = .2) -> None:
  8 |         self.number_of_nodes = []
  9 |         if init_nodes > 0:
 10 |             self.number_of_nodes.append(init_nodes)
 11 |         self.weights = []
 12 |         self.biases = []
 13 |         self.functions = []
 14 |         self.learning_rate = learning_rate
 15 | 
 16 |     def add_layer(self, number_of_nodes: int, weights = None, bias = None, function="sigmoid"):
 17 |         self.number_of_nodes.append(number_of_nodes)
 18 |         if not weights is None:
 19 |             self.weights.append(weights)
 20 |             self.functions.append(function)
 21 |         elif len(self.number_of_nodes) > 1:
 22 |             self.weights.append(np.random.randn(self.number_of_nodes[-1], self.number_of_nodes[-2]) * np.sqrt(2 / (self.number_of_nodes[-1] + self.number_of_nodes[-2])))
 23 |             self.functions.append(function)
 24 | 
 25 |         if not bias is None:
 26 |             self.biases.append(bias)
 27 |         elif len(self.number_of_nodes) > 1:
 28 |             self.biases.append(np.random.uniform(0, 0, size=(number_of_nodes, 1)))
 29 |     
 30 |     def save(self, location):
 31 |         f = open(location, "w+")
 32 |         for i in self.number_of_nodes:
 33 |             f.write(str(i) + " ")
 34 |         f.write("\t")
 35 |         for i in self.functions:
 36 |             f.write(i + " ")
 37 |         f.write("\n")
 38 |         for i in self.weights:
 39 |             for j in i:
 40 |                 for k in j:
 41 |                     f.write(str(k) + " ")
 42 |                 f.write("\t")
 43 |             f.write("\n")
 44 |         for b in self.biases:
 45 |             for i in b:
 46 |                 for k in i:
 47 |                     f.write(str(k) + " ")
 48 |             f.write("\n")
 49 |         f.close()
 50 | 
 51 |     @staticmethod
 52 |     def load(location):
 53 |         f = open(location, "r")
 54 |         lines = f.readlines()
 55 |         f_l = lines[0].strip()
 56 |         f_l = re.split(r'\t+', f_l)
 57 |         number_of_nodes = np.vectorize(lambda x: int(x))( f_l[0].split() )
 58 |         functions = f_l[1].split()
 59 |         weigths = []
 60 |         for i in range(1, len(number_of_nodes)):
 61 |             m = lines[i].strip().split("\t")
 62 |             for j in range(len(m)):
 63 |                 m[j] = m[j].split()
 64 |             m = np.vectorize(lambda x: float(x))(np.matrix(m))
 65 |             weigths.append(m)
 66 |         biases = []
 67 |         for i in range(len(number_of_nodes), len(lines)):
 68 |             b = lines[i].strip().split("\t")
 69 |             for j in range(len(b)):
 70 |                 b[j] = b[j].split()
 71 |             b = np.vectorize(lambda x: float(x))(np.matrix(b).T)
 72 |             biases.append(b)
 73 |         nn = Mlp()
 74 |         for i in range(len(number_of_nodes)):
 75 |             if i > 0:
 76 |                 nn.add_layer(number_of_nodes=number_of_nodes[i], weights=weigths[i-1], bias=biases[i-1], function=functions[i-1])
 77 |             else:
 78 |                 nn.add_layer(number_of_nodes[i])
 79 |         return nn
 80 | 
 81 |     @staticmethod
 82 |     def soft_plus(x):
 83 |         sp = np.vectorize(lambda y: math.log(1 + math.exp(y)))
 84 |         return sp(x)
 85 | 
 86 |     @staticmethod
 87 |     def relu(x):
 88 |         re = np.vectorize(lambda y: max(0, y))
 89 |         return re(x)
 90 | 
 91 |     @staticmethod
 92 |     def sigmoid(x):
 93 |         sig = np.vectorize(lambda y:  (1 - 1 / (1 + math.exp(y))) if y < 0 else  (1 / (1 + math.exp(-y))))
 94 |         return sig(x)
 95 |     
 96 |     @staticmethod
 97 |     def squash(x, function):
 98 |         if function == "sigmoid":
 99 |             return Mlp.sigmoid(x)
100 |         elif function == "soft_plus":
101 |             return Mlp.soft_plus(x)
102 |         elif function == "relu":
103 |             return Mlp.relu(x)
104 | 
105 |     @staticmethod
106 |     def derivative(x, function):
107 |         if function == "sigmoid":
108 |             return np.multiply(x, (1-x))
109 |         elif function == "soft_plus":
110 |             return Mlp.sigmoid(x)
111 |         elif function == "relu":
112 |             d_relu = np.vectorize(lambda y: 1 if y > 0 else 0)
113 |             return d_relu(x)
114 | 
115 |     def feed_forward(self, inp):
116 |         outputs = [np.matrix(inp).T]
117 | 
118 |         for i in range(len(self.number_of_nodes) - 1):
119 |             outputs.append(Mlp.squash((np.dot(self.weights[i], outputs[-1]) + self.biases[i]), self.functions[i]))
120 | 
121 |         return outputs
122 | 
123 |     # This training function uses stochastic gradient descent instead of batch
124 |     def train(self, inp, targets):
125 |         targets = np.matrix(targets).T
126 |         # Calculate output with given input
127 |         outputs = self.feed_forward(inp)
128 | 
129 |         # Calculate each layer error
130 |         errors = [np.subtract(targets, outputs[-1])]
131 |         for i in range(len(self.weights) - 1):
132 |             errors.insert(0, np.dot(self.weights[-1-i].T, errors[0]))
133 | 
134 |         for i in range(len(self.weights)):
135 |             # Calculate gradient and weight correction
136 |             gradient = np.multiply(errors[-1-i], Mlp.derivative(outputs[-1-i], self.functions[-1-i]))
137 |             gradient *= self.learning_rate
138 |             self.biases[-1-i] += gradient
139 |             delta_w  = np.dot(gradient, outputs[-2-i].T)
140 |             self.weights[-1-i] += delta_w
141 | 
142 |     def predict(self, inp):
143 |         output = self.feed_forward(inp)[-1]
144 |         output = dict(enumerate(output.A1))
145 |         out_class = max(output.items(), key=operator.itemgetter(1))[0]
146 |         out_prob = output[out_class]
147 |         
148 |         return out_class, out_prob
149 | 


--------------------------------------------------------------------------------