├── .gitignore ├── README.md ├── data ├── t10k-images-idx3-ubyte ├── t10k-labels-idx1-ubyte ├── train-images-idx3-ubyte └── train-labels-idx1-ubyte ├── mnist.php └── src ├── Dataset.php ├── DatasetReader.php └── NeuralNetwork.php /.gitignore: -------------------------------------------------------------------------------- 1 | /.vscode/ 2 | /mnist 3 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # MNIST Neural Network in PHP 2 | 3 | 4 | This source code seeks to replicate the (now removed) [MNIST For ML Beginners](https://web.archive.org/web/20180801165522/https://www.tensorflow.org/versions/r1.1/get_started/mnist/beginners) tutorial from the Tensorflow website using straight forward PHP code. Hopefully, this example will make that tutorial a bit more manageable for PHP developers. 5 | 6 | The task is to recognise digits, such as the ones below, as accurately as possible. 7 | 8 | ![MNIST digits](https://web.archive.org/web/20180801165522im_/https://www.tensorflow.org/versions/r1.1/images/MNIST.png) 9 | 10 | By [AndrewCarterUK ![(Twitter)](http://i.imgur.com/wWzX9uB.png)](https://twitter.com/AndrewCarterUK) 11 | 12 | ## Contents 13 | 14 | - [mnist.php](mnist.php): Glue code that runs the algorithm steps and reports algorithm accuracy 15 | - [Dataset.php](src/Dataset.php): Dataset container object 16 | - [DatasetReader.php](src/DatasetReader.php): Retrieves images and labels from the MNIST dataset 17 | - [NeuralNetwork.php](src/NeuralNetwork.php): Implements training and prediction routines for a simple neural network 18 | 19 | ## Usage 20 | 21 | ```sh 22 | php mnist.php 23 | ``` 24 | 25 | ## Description 26 | 27 | The neural network implemented has one output layer and no hidden layers. Softmax activation is used, and this ensures that the output activations form a probability vector corresponding to each label. The cross entropy is used as a loss function. 28 | 29 | This algorithm can achieve an accuracy of around 92% (with a batch size of 100 and 1000 training steps). That said, you are likely to get bored well before that point with PHP. 30 | 31 | ## Expected Output 32 | 33 | ``` 34 | Loading training dataset... (may take a while) 35 | Loading test dataset... (may take a while) 36 | Starting training... 37 | Step 0001 Average Loss 4.12 Accuracy: 0.19 38 | Step 0002 Average Loss 3.21 Accuracy: 0.23 39 | Step 0003 Average Loss 2.59 Accuracy: 0.32 40 | Step 0004 Average Loss 2.43 Accuracy: 0.36 41 | Step 0005 Average Loss 1.87 Accuracy: 0.45 42 | Step 0006 Average Loss 2.06 Accuracy: 0.47 43 | Step 0007 Average Loss 1.67 Accuracy: 0.51 44 | Step 0008 Average Loss 1.81 Accuracy: 0.46 45 | Step 0009 Average Loss 1.74 Accuracy: 0.55 46 | Step 0010 Average Loss 1.24 Accuracy: 0.56 47 | ... 48 | ``` 49 | 50 | ![training evolution](https://res.cloudinary.com/andrewcarteruk/image/upload/v1523189356/training-evolution_hhbsfb.png) 51 | -------------------------------------------------------------------------------- /data/t10k-images-idx3-ubyte: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AndrewCarterUK/mnist-neural-network-plain-php/44600a25c03266ce430d02f4df0b14221ea878cc/data/t10k-images-idx3-ubyte -------------------------------------------------------------------------------- /data/t10k-labels-idx1-ubyte: -------------------------------------------------------------------------------- 1 | '                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                             -------------------------------------------------------------------------------- /data/train-images-idx3-ubyte: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AndrewCarterUK/mnist-neural-network-plain-php/44600a25c03266ce430d02f4df0b14221ea878cc/data/train-images-idx3-ubyte -------------------------------------------------------------------------------- /data/train-labels-idx1-ubyte: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AndrewCarterUK/mnist-neural-network-plain-php/44600a25c03266ce430d02f4df0b14221ea878cc/data/train-labels-idx1-ubyte -------------------------------------------------------------------------------- /mnist.php: -------------------------------------------------------------------------------- 1 | getSize(); 32 | 33 | // Loop through all the training examples 34 | for ($i = 0, $correct = 0; $i < $size; $i++) { 35 | $image = $dataset->getImage($i); 36 | $label = $dataset->getLabel($i); 37 | 38 | $activations = $neuralNetwork->hypothesis($image); 39 | 40 | // Our prediction is index containing the maximum probability 41 | $prediction = array_search(max($activations), $activations); 42 | 43 | if ($prediction == $label) { 44 | $correct++; 45 | } 46 | } 47 | 48 | // Percentage of correct predictions is the accuracy 49 | return $correct / $size; 50 | } 51 | 52 | // Create Network 53 | $neuralNetwork = new NeuralNetwork(); 54 | 55 | // Begin Training 56 | $batches = $trainDataset->getSize() / $BATCH_SIZE; 57 | 58 | echo 'Starting training...' . PHP_EOL; 59 | 60 | for ($i = 0; $i < $STEPS; $i++) { 61 | $batch = $trainDataset->getBatch($BATCH_SIZE, $i % $batches); 62 | 63 | $loss = $neuralNetwork->trainingStep($batch, 0.5); 64 | $averageLoss = $loss / $batch->getSize(); 65 | 66 | $accuracy = calculate_accuracy($neuralNetwork, $testDataset); 67 | 68 | printf("Step %04d\tAverage Loss %.2f\tAccuracy: %.2f\n", $i + 1, $averageLoss, $accuracy); 69 | } 70 | -------------------------------------------------------------------------------- /src/Dataset.php: -------------------------------------------------------------------------------- 1 | images = $images; 24 | $this->labels = $labels; 25 | $this->size = count($images); 26 | } 27 | 28 | public function getImage($index): array 29 | { 30 | return $this->images[$index]; 31 | } 32 | 33 | public function getLabel($index): int 34 | { 35 | return $this->labels[$index]; 36 | } 37 | 38 | public function getSize(): int 39 | { 40 | return $this->size; 41 | } 42 | 43 | /** 44 | * Retrieve a subset of the dataset as a batch. 45 | */ 46 | public function getBatch(int $size, int $number): Dataset 47 | { 48 | $offset = $size * $number; 49 | 50 | $images = array_slice($this->images, $offset, $size); 51 | $labels = array_slice($this->labels, $offset, $size); 52 | 53 | return new self($images, $labels); 54 | } 55 | } 56 | -------------------------------------------------------------------------------- /src/DatasetReader.php: -------------------------------------------------------------------------------- 1 | b = []; 18 | $this->W = []; 19 | 20 | for ($i = 0; $i < Dataset::LABELS; $i++) { 21 | $this->b[$i] = random_int(1, 1000) / 1000; 22 | $this->W[$i] = []; 23 | 24 | for ($j = 0; $j < Dataset::IMAGE_SIZE; $j++) { 25 | $this->W[$i][$j] = random_int(1, 1000) / 1000; 26 | } 27 | } 28 | } 29 | 30 | /** 31 | * The softmax layer maps an array of activations to a probability vector. 32 | */ 33 | private function softmax(array $activations): array 34 | { 35 | // Normalising with the max activation makes the computation more numerically stable 36 | $max = max($activations); 37 | 38 | $activations = array_map(function ($a) use ($max) { 39 | return exp($a - $max); 40 | }, $activations); 41 | 42 | $sum = array_sum($activations); 43 | 44 | return array_map(function ($a) use ($sum) { 45 | return $a / $sum; 46 | }, $activations); 47 | } 48 | 49 | /** 50 | * Forward propagate through the neural network to calculate the activation 51 | * vector for an image. 52 | */ 53 | public function hypothesis(array $image): array 54 | { 55 | $activations = []; 56 | 57 | // Computes: Wx + b 58 | for ($i = 0; $i < Dataset::LABELS; $i++) { 59 | $activations[$i] = $this->b[$i]; 60 | 61 | for ($j = 0; $j < Dataset::IMAGE_SIZE; $j++) { 62 | $activations[$i] += $this->W[$i][$j] * $image[$j]; 63 | } 64 | } 65 | 66 | return $this->softmax($activations); 67 | } 68 | 69 | /** 70 | * Calculate the gradient adjustments on a single training example (image) 71 | * from the dataset. 72 | * 73 | * Returns the contribution to the loss value from this example. 74 | */ 75 | private function gradientUpdate(array $image, array &$bGrad, array &$WGrad, int $label): float 76 | { 77 | $activations = $this->hypothesis($image); 78 | 79 | for ($i = 0; $i < Dataset::LABELS; $i++) { 80 | // Uses the derivative of the softmax function 81 | $bGradPart = ($i === $label) ? $activations[$i] - 1 : $activations[$i]; 82 | 83 | for ($j = 0; $j < Dataset::IMAGE_SIZE; $j++) { 84 | // Gradient is the product of the bias gradient and the input activation 85 | $WGrad[$i][$j] += $bGradPart * $image[$j]; 86 | } 87 | 88 | $bGrad[$i] += $bGradPart; 89 | } 90 | 91 | // Cross entropy 92 | return 0 - log($activations[$label]); 93 | } 94 | 95 | /** 96 | * Perform one step of gradient descent on the neural network using the 97 | * provided dataset. 98 | * 99 | * Returns the total loss for the network on the provided dataset. 100 | */ 101 | public function trainingStep(Dataset $dataset, float $learningRate): float 102 | { 103 | // Zero init the gradients 104 | $bGrad = array_fill(0, Dataset::LABELS, 0); 105 | $WGrad = array_fill(0, Dataset::LABELS, array_fill(0, Dataset::IMAGE_SIZE, 0)); 106 | 107 | $totalLoss = 0; 108 | $size = $dataset->getSize(); 109 | 110 | // Calculate the gradients and loss 111 | for ($i = 0; $i < $size; $i++) { 112 | $totalLoss += $this->gradientUpdate($dataset->getImage($i), $bGrad, $WGrad, $dataset->getLabel($i)); 113 | } 114 | 115 | // Adjust the weights and bias vector using the gradient and the learning rate 116 | for ($i = 0; $i < Dataset::LABELS; $i++) { 117 | $this->b[$i] -= $learningRate * $bGrad[$i] / $size; 118 | 119 | for ($j = 0; $j < Dataset::IMAGE_SIZE; $j++) { 120 | $this->W[$i][$j] -= $learningRate * $WGrad[$i][$j] / $size; 121 | } 122 | } 123 | 124 | return $totalLoss; 125 | } 126 | } 127 | --------------------------------------------------------------------------------