├── .gitignore ├── .travis.yml ├── LICENSE ├── README.md ├── dataset └── mnist │ └── mnist.go ├── dbn └── dbn.go ├── examples ├── data │ ├── t10k-images-idx3-ubyte │ ├── t10k-labels-idx1-ubyte │ ├── train-images-idx3-ubyte │ └── train-labels-idx1-ubyte ├── mlp3 │ └── mlp3_mnist.go └── rbm │ ├── rbm_mnist.go │ └── visualize.py ├── gbrbm └── gbrbm.go ├── mlp ├── hidden_layer.go ├── mlp.go └── mlp_test.go ├── mlp3 ├── mlp3.go └── mlp3_test.go ├── nnet.go ├── rbm ├── rbm.go └── rbm_test.go └── train.go /.gitignore: -------------------------------------------------------------------------------- 1 | # Created by http://www.gitignore.io 2 | 3 | ### Go ### 4 | # Compiled Object files, Static and Dynamic libs (Shared Objects) 5 | *.o 6 | *.a 7 | *.so 8 | 9 | # Folders 10 | _obj 11 | _test 12 | 13 | # Architecture specific extensions/prefixes 14 | *.[568vq] 15 | [568vq].out 16 | 17 | *.cgo1.go 18 | *.cgo2.c 19 | _cgo_defun.c 20 | _cgo_gotypes.go 21 | _cgo_export.* 22 | 23 | _testmain.go 24 | 25 | *.exe 26 | *.test 27 | 28 | 29 | ### Emacs ### 30 | # -*- mode: gitignore; -*- 31 | *~ 32 | \#*\# 33 | /.emacs.desktop 34 | /.emacs.desktop.lock 35 | *.elc 36 | auto-save-list 37 | tramp 38 | .\#* 39 | 40 | # Org-mode 41 | .org-id-locations 42 | *_archive 43 | 44 | # flymake-mode 45 | *_flymake.* 46 | 47 | # eshell files 48 | /eshell/history 49 | /eshell/lastdir 50 | 51 | # elpa packages 52 | /elpa/ 53 | 54 | 55 | ### vim ### 56 | [._]*.s[a-w][a-z] 57 | [._]s[a-w][a-z] 58 | *.un~ 59 | Session.vim 60 | .netrwhist 61 | *~ 62 | 63 | 64 | ### OSX ### 65 | .DS_Store 66 | .AppleDouble 67 | .LSOverride 68 | 69 | # Icon must ends with two \r. 70 | Icon 71 | 72 | 73 | # Thumbnails 74 | ._* 75 | 76 | # Files that might appear on external disk 77 | .Spotlight-V100 78 | .Trashes 79 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: go 2 | 3 | go: 4 | - 1.3 5 | - 1.2 6 | - release 7 | - tip 8 | 9 | install: 10 | - go get ./... 11 | 12 | script: 13 | - go test ./... -v 14 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2014 Ryuichi Yamamoto 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy of 6 | this software and associated documentation files (the "Software"), to deal in 7 | the Software without restriction, including without limitation the rights to 8 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of 9 | the Software, and to permit persons to whom the Software is furnished to do so, 10 | subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS 17 | FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR 18 | COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER 19 | IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 20 | CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 21 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | NNET - Neural Networks in Go [![Build Status](https://travis-ci.org/r9y9/nnet.svg?branch=master)](https://travis-ci.org/r9y9/nnet) 2 | --------------------------------------------------------------- 3 | 4 | NNET is a small collection of neural network algorithms written in the pure Go language. 5 | 6 | ## Packages 7 | 8 | - **rbm** - Binary-Binary Restricted Boltzmann Machines (RBMs) 9 | - **gbrbm** - Gaussian-Binary RBMs 10 | - **mlp** - Multi-Layer Perceptron (Feed Forward Neural Networks) 11 | - **mlp3** - Three-Layer Perceptron 12 | - **dbn** - Deep Belief Nets (in develop stage) 13 | 14 | ## Install 15 | 16 | go get github.com/r9y9/nnet 17 | 18 | ## Examples 19 | 20 | ### Binary-Binary Restricted Bolztmann Machines on MNIST 21 | 22 | cd examples/rbm 23 | go run rbm_mnist.go -h # for help 24 | go run rbm_mnist.go -epoch=5 -hidden_units=400 -learning_rate=0.1 -order=1 -output="rbm.json" -persistent -size=20 25 | 26 | It took 32 minutes to train RBM on my macbook air at 07/28/2014. 27 | 28 | #### Weight visualization 29 | 30 | python visualize.py rbm.json 31 | 32 | ![image](http://r9y9.github.io/images/RBM_mnist_Hidden_500_layers.png) 33 | 34 | ### Multi-layer Perceptron 35 | 36 | #### Training 37 | 38 | cd examples/mlp3 39 | go run mlp3_mnist.go -epoch=500000 -hidden_units=100 -learning_rate=0.1 -o="nn.json" 40 | 41 | It took 10 minutes to train MLP on my macbook air at 07/30/2014. 42 | 43 | #### Classification 44 | 45 | go run mlp3_mnist.go -test -m=nn.json 46 | ... 47 | Acc. 0.971000 (9710/10000) 48 | 49 | ## TODO 50 | 51 | - Use linear algebra library such as gonum/matrix or go.matrix 52 | - GPU powered training 53 | - Refactor (write more idiomatic codes, speedup, etc.) 54 | - Tests for all packages 55 | - More flexibility like pylearn2 56 | 57 | ## License 58 | 59 | [MIT](./LICENSE) 60 | -------------------------------------------------------------------------------- /dataset/mnist/mnist.go: -------------------------------------------------------------------------------- 1 | // Packkage mnist provides support for parsing MNIST dataset. 2 | package mnist 3 | 4 | import ( 5 | "encoding/binary" 6 | "io" 7 | "math" 8 | ) 9 | 10 | const ( 11 | PixelRange = 255 12 | NoiseConstant = 1.0e-21 13 | ) 14 | 15 | func ReadMNISTLabels(r io.Reader) (labels []byte) { 16 | header := [2]int32{} 17 | binary.Read(r, binary.BigEndian, &header) 18 | labels = make([]byte, header[1]) 19 | r.Read(labels) 20 | return 21 | } 22 | 23 | func ReadMNISTImages(r io.Reader) (images [][]byte, width, height int) { 24 | header := [4]int32{} 25 | binary.Read(r, binary.BigEndian, &header) 26 | images = make([][]byte, header[1]) 27 | width, height = int(header[2]), int(header[3]) 28 | for i := 0; i < len(images); i++ { 29 | images[i] = make([]byte, width*height) 30 | r.Read(images[i]) 31 | } 32 | return 33 | } 34 | 35 | func ImageString(buffer []byte, height, width int) (out string) { 36 | for i, y := 0, 0; y < height; y++ { 37 | for x := 0; x < width; x++ { 38 | if buffer[i] > 128 { 39 | out += "#" 40 | } else { 41 | out += " " 42 | } 43 | i++ 44 | } 45 | out += "\n" 46 | } 47 | return 48 | } 49 | 50 | func DownSample(X [][]float64, w, h, order int) [][]float64 { 51 | rows := len(X) 52 | result := make([][]float64, rows) 53 | for i := 0; i < rows; i++ { 54 | result[i] = make([]float64, len(X[i])/order/order) 55 | for j := 0; j < w/order; j++ { 56 | for k := 0; k < h/order; k++ { 57 | result[i][j*w/order+k] = X[i][j*w*order+k*order] 58 | } 59 | } 60 | } 61 | return result 62 | } 63 | 64 | func NormalizePixel(M [][]float64) [][]float64 { 65 | rows := len(M) 66 | result := make([][]float64, rows) 67 | for i := 0; i < rows; i++ { 68 | result[i] = make([]float64, len(M[i])) 69 | for j := 0; j < len(M[i]); j++ { 70 | result[i][j] = normalizePixel(M[i][j]) 71 | } 72 | } 73 | return result 74 | } 75 | 76 | func normalizePixel(px float64) float64 { 77 | return px/PixelRange + NoiseConstant 78 | } 79 | 80 | func NormalizePixelToStandardGaussian(X [][]float64) [][]float64 { 81 | rows := len(X) 82 | cols := len(X[0]) 83 | result := make([][]float64, rows) 84 | for i := 0; i < rows; i++ { 85 | result[i] = make([]float64, len(X[i])) 86 | } 87 | 88 | // for each dimention 89 | for i := 0; i < cols; i++ { 90 | mean, dev := 0.0, 0.0 91 | for j := 0; j < rows; j++ { 92 | mean += X[j][i] 93 | } 94 | mean /= float64(rows) 95 | for j := 0; j < rows; j++ { 96 | dev += (X[j][i] - mean) * (X[j][i] - mean) 97 | } 98 | dev = math.Sqrt(dev / float64(rows)) 99 | for j := 0; j < rows; j++ { 100 | if dev == 0 { 101 | result[j][i] = 0 102 | } else { 103 | result[j][i] = (X[j][i] - mean) / dev 104 | } 105 | 106 | if result[j][i] < -1 { 107 | result[j][i] = -1 108 | } 109 | if result[j][i] > 1 { 110 | result[j][i] = 1 111 | } 112 | 113 | } 114 | } 115 | 116 | return result 117 | } 118 | 119 | func PrepareX(M [][]byte) [][]float64 { 120 | rows := len(M) 121 | result := make([][]float64, rows) 122 | for i := 0; i < rows; i++ { 123 | result[i] = make([]float64, len(M[i])) 124 | for j := 0; j < len(M[i]); j++ { 125 | result[i][j] = float64(M[i][j]) 126 | } 127 | } 128 | return result 129 | } 130 | 131 | func PrepareY(N []byte) [][]float64 { 132 | result := make([][]float64, len(N)) 133 | for i := 0; i < len(result); i++ { 134 | tmp := make([]float64, 10) 135 | for j := 0; j < 10; j++ { 136 | tmp[j] = NoiseConstant // add noise 137 | } 138 | tmp[N[i]] = 0.99 139 | result[i] = tmp 140 | } 141 | return result 142 | } 143 | -------------------------------------------------------------------------------- /dbn/dbn.go: -------------------------------------------------------------------------------- 1 | // Package dbn provides support for Deep Belief Nets. 2 | package dbn 3 | 4 | import ( 5 | "encoding/json" 6 | "fmt" 7 | "github.com/r9y9/nnet" 8 | "github.com/r9y9/nnet/rbm" 9 | "os" 10 | ) 11 | 12 | // DBN represents Deep Belief Networks. 13 | type DBN struct { 14 | RBMs []*rbm.RBM 15 | NumLayers int 16 | } 17 | 18 | type PreTrainingOption struct { 19 | rbm.TrainingOption 20 | } 21 | 22 | func New() *DBN { 23 | return &DBN{} 24 | } 25 | 26 | // Load loads RBM from a dump file and return its instatnce. 27 | func Load(filename string) (*DBN, error) { 28 | file, err := os.Open(filename) 29 | if err != nil { 30 | return nil, err 31 | } 32 | defer file.Close() 33 | 34 | decoder := json.NewDecoder(file) 35 | d := &DBN{} 36 | err = decoder.Decode(d) 37 | 38 | if err != nil { 39 | return nil, err 40 | } 41 | 42 | return d, nil 43 | } 44 | 45 | // Dump writes DBN parameters to file in json format. 46 | func (d *DBN) Dump(filename string) error { 47 | return nnet.DumpAsJson(filename, d) 48 | } 49 | 50 | func (d *DBN) AddLayer(numVisibleUnits, numHiddenUnits int) { 51 | if d.RBMs != nil { 52 | // Get the number of visible units of new layer 53 | r := d.RBMs[len(d.RBMs)-1] // last layer 54 | numVisibleUnitsOfNewLayer := r.NumHiddenUnits 55 | 56 | if numVisibleUnits != numVisibleUnitsOfNewLayer { 57 | panic("unexpected!") 58 | } 59 | } 60 | 61 | // Add new RBM layer 62 | newRbm := rbm.New(numVisibleUnits, numHiddenUnits) 63 | d.RBMs = append(d.RBMs, newRbm) 64 | d.NumLayers++ 65 | } 66 | 67 | // PreTraining performs Layer-wise greedy unsupervised training of RBMs. 68 | func (d *DBN) PreTraining(data [][]float64, option PreTrainingOption) { 69 | newData := make([][]float64, len(data)) 70 | copy(newData, data) 71 | 72 | // layer-wise greedy training 73 | for i := range d.RBMs { 74 | option := rbm.TrainingOption{ 75 | LearningRate: option.LearningRate, 76 | Epoches: option.Epoches, 77 | OrderOfGibbsSampling: option.OrderOfGibbsSampling, 78 | MiniBatchSize: option.MiniBatchSize, 79 | L2Regularization: option.L2Regularization, 80 | RegularizationRate: option.RegularizationRate, 81 | Monitoring: option.Monitoring, 82 | } 83 | 84 | r := d.RBMs[i] 85 | fmt.Printf("Start training %d layer.\n", i+1) 86 | fmt.Println(r.NumVisibleUnits, r.NumHiddenUnits) 87 | 88 | // Train! 89 | r.Train(newData, option) 90 | 91 | // Transfer activation to the next layer 92 | for n := 0; n < len(newData); n++ { 93 | newData[n] = r.Forward(newData[n]) 94 | } 95 | } 96 | } 97 | 98 | // FineTurning performs supervised training of Deep Neural Networks, 99 | // which are composed of pre-traigned DBNs. 100 | func FineTurning(input [][]float64, target [][]float64) { 101 | 102 | } 103 | -------------------------------------------------------------------------------- /examples/data/t10k-images-idx3-ubyte: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/r9y9/nnet/ae78c1325801201faaabb5c7734e99ca83645401/examples/data/t10k-images-idx3-ubyte -------------------------------------------------------------------------------- /examples/data/t10k-labels-idx1-ubyte: -------------------------------------------------------------------------------- 1 | '                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                             -------------------------------------------------------------------------------- /examples/data/train-images-idx3-ubyte: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/r9y9/nnet/ae78c1325801201faaabb5c7734e99ca83645401/examples/data/train-images-idx3-ubyte -------------------------------------------------------------------------------- /examples/data/train-labels-idx1-ubyte: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/r9y9/nnet/ae78c1325801201faaabb5c7734e99ca83645401/examples/data/train-labels-idx1-ubyte -------------------------------------------------------------------------------- /examples/mlp3/mlp3_mnist.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "flag" 5 | "fmt" 6 | "github.com/r9y9/nnet" 7 | "github.com/r9y9/nnet/dataset/mnist" 8 | "github.com/r9y9/nnet/mlp3" 9 | "log" 10 | "os" 11 | "time" 12 | ) 13 | 14 | // Classification test using MNIST dataset. 15 | func Test(filename string) { 16 | net, err := mlp3.Load(filename) 17 | if err != nil { 18 | log.Fatal(err) 19 | } 20 | 21 | testPath := "../data/t10k-images-idx3-ubyte" 22 | targetPath := "../data/t10k-labels-idx1-ubyte" 23 | 24 | file, err := os.Open(testPath) 25 | if err != nil { 26 | log.Fatal(err) 27 | } 28 | images, w, h := mnist.ReadMNISTImages(file) 29 | fmt.Println(len(images), w, h, w*h) 30 | 31 | lfile, lerr := os.Open(targetPath) 32 | if lerr != nil { 33 | log.Fatal(lerr) 34 | } 35 | labels := mnist.ReadMNISTLabels(lfile) 36 | 37 | // Convert image to data matrix 38 | data := mnist.NormalizePixel(mnist.PrepareX(images)) 39 | target := mnist.PrepareY(labels) 40 | 41 | result := nnet.Test(net, data) 42 | 43 | sum := 0.0 44 | for i := range result { 45 | if result[i] == nnet.Argmax(target[i]) { 46 | sum += 1.0 47 | } 48 | } 49 | fmt.Printf("Acc. %f (%d/%d)\n", sum/float64(len(result)), 50 | int(sum), len(result)) 51 | } 52 | 53 | func main() { 54 | test := flag.Bool("test", false, "whether tests neural network or not") 55 | modelFilename := flag.String("m", "nn.json", "model filename (*.json)") 56 | outFilename := flag.String("o", "nn.json", "outtput model filename (*.json)") 57 | learningRate := flag.Float64("learning_rate", 0.1, "Learning rate") 58 | epoches := flag.Int("epoch", 50000*10, "Epoches") 59 | numHiddenUnits := flag.Int("hidden_units", 100, "Number of hidden units") 60 | 61 | flag.Parse() 62 | if *test == true { 63 | Test(*modelFilename) 64 | return 65 | } 66 | 67 | trainingPath := "../data/train-images-idx3-ubyte" 68 | labelPath := "../data/train-labels-idx1-ubyte" 69 | file, err := os.Open(trainingPath) 70 | if err != nil { 71 | log.Fatal(err) 72 | } 73 | 74 | images, w, h := mnist.ReadMNISTImages(file) 75 | fmt.Println(len(images), w, h, w*h) 76 | 77 | lfile, lerr := os.Open(labelPath) 78 | if lerr != nil { 79 | log.Fatal(lerr) 80 | } 81 | labels := mnist.ReadMNISTLabels(lfile) 82 | 83 | // Convert image to data matrix 84 | data := mnist.NormalizePixel(mnist.PrepareX(images)) 85 | target := mnist.PrepareY(labels) 86 | 87 | // Setup Neural Network 88 | net := mlp3.NewNeuralNetwork(w*h, *numHiddenUnits, 10) 89 | option := mlp3.TrainingOption{ 90 | LearningRate: *learningRate, 91 | Epoches: *epoches, // the number of iterations in SGD 92 | Monitoring: true, 93 | } 94 | 95 | // Perform training 96 | start := time.Now() 97 | nerr := net.Train(data, target, option) 98 | if nerr != nil { 99 | log.Fatal(nerr) 100 | } 101 | elapsed := time.Now().Sub(start) 102 | fmt.Println(elapsed) 103 | 104 | oerr := net.Dump(*outFilename) 105 | if oerr != nil { 106 | log.Fatal(err) 107 | } 108 | fmt.Println("Parameters are dummped to", *outFilename) 109 | fmt.Println("Training finished!") 110 | } 111 | -------------------------------------------------------------------------------- /examples/rbm/rbm_mnist.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "flag" 5 | "fmt" 6 | "github.com/r9y9/nnet/dataset/mnist" 7 | "github.com/r9y9/nnet/rbm" 8 | "log" 9 | "os" 10 | "time" 11 | ) 12 | 13 | func main() { 14 | outFilename := flag.String("output", "nn.json", "Output filename (*.json)") 15 | modelFilename := flag.String("model", "", "Model filename (*.json)") 16 | learningRate := flag.Float64("learning_rate", 0.1, "Learning rate") 17 | epoches := flag.Int("epoch", 5, "Epoches") 18 | usePersistent := flag.Bool("persistent", false, "Persistent constrastive learning") 19 | orderOfGibbsSampling := flag.Int("order", 1, "Order of Gibbs sampling") 20 | orderOfDownSampling := flag.Int("down", 1, "Order of down sampling") 21 | miniBatchSize := flag.Int("size", 20, "Mini-batch size") 22 | l2 := flag.Bool("l2", false, "L2 regularization") 23 | numHiddenUnits := flag.Int("hidden_units", 100, "Number of hidden units") 24 | flag.Parse() 25 | 26 | trainingPath := "../data/train-images-idx3-ubyte" 27 | file, err := os.Open(trainingPath) 28 | if err != nil { 29 | log.Fatal(err) 30 | } 31 | images, w, h := mnist.ReadMNISTImages(file) 32 | 33 | // Convert image to data matrix 34 | data := mnist.PrepareX(images) 35 | data = mnist.NormalizePixel(mnist.DownSample(data, w, h, *orderOfDownSampling)) 36 | 37 | // w and h with down sampled data 38 | w, h = w/(*orderOfDownSampling), h/(*orderOfDownSampling) 39 | 40 | // Create RBM 41 | var r *rbm.RBM 42 | if *modelFilename != "" { 43 | r, err = rbm.Load(*modelFilename) 44 | if err != nil { 45 | log.Fatal(err) 46 | 47 | } 48 | fmt.Println("Load parameters from", *modelFilename) 49 | } else { 50 | numVisibleUnits := w * h 51 | r = rbm.New(numVisibleUnits, *numHiddenUnits) 52 | } 53 | 54 | // Training 55 | option := rbm.TrainingOption{ 56 | LearningRate: *learningRate, 57 | Epoches: *epoches, 58 | OrderOfGibbsSampling: *orderOfGibbsSampling, 59 | UsePersistent: *usePersistent, 60 | MiniBatchSize: *miniBatchSize, 61 | L2Regularization: *l2, 62 | RegularizationRate: 0.0001, 63 | Monitoring: true, 64 | } 65 | 66 | fmt.Println("Start training") 67 | start := time.Now() 68 | terr := r.Train(data, option) 69 | if terr != nil { 70 | log.Fatal(terr) 71 | } 72 | fmt.Println("Elapsed:", time.Now().Sub(start)) 73 | 74 | oerr := r.Dump(*outFilename) 75 | if oerr != nil { 76 | log.Fatal(oerr) 77 | } 78 | fmt.Println("Parameters are dumped to", *outFilename) 79 | fmt.Println("Training finished.") 80 | } 81 | -------------------------------------------------------------------------------- /examples/rbm/visualize.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | import json 3 | 4 | # A weight visualization script for Restricted Bolztmann Machines 5 | # trained on MNSIT dataset. 6 | 7 | import numpy as np 8 | import sys 9 | from pylab import * 10 | 11 | argv = sys.argv 12 | if len(argv) != 2: 13 | print "Incorrect arguments." 14 | quit() 15 | 16 | filename = argv[1] 17 | f = open(filename) 18 | data = json.load(f) 19 | f.close() 20 | 21 | W = np.array(data['W']) 22 | M = int(data['NumHiddenUnits']) 23 | w,h = int(np.sqrt(M)), int(np.sqrt(M)) 24 | M = w*h 25 | for i in range(M): 26 | subplot(w, h, i+1) 27 | imshow(W[i].reshape(28, 28), cmap=cm.Greys_r) 28 | xticks(()) 29 | yticks(()) 30 | 31 | show() 32 | -------------------------------------------------------------------------------- /gbrbm/gbrbm.go: -------------------------------------------------------------------------------- 1 | // Package grbm provides support for Gaussian-Binary Restricted Bolztmann 2 | // Machines. 3 | package gbrbm 4 | 5 | import ( 6 | "encoding/json" 7 | "github.com/r9y9/nnet" // sigmoid, matrix 8 | "math" 9 | "math/rand" 10 | "os" 11 | "time" 12 | ) 13 | 14 | // Gaussian-Binary Restricted Boltzmann Machines (GBRBM) 15 | type GBRBM struct { 16 | W [][]float64 // Weight 17 | B []float64 // Bias of visible layer 18 | C []float64 // Bias of hidden layer 19 | NumHiddenUnits int 20 | NumVisibleUnits int 21 | PersistentVisibleUnits [][]float64 // used in Persistent contrastive learning 22 | GradW [][]float64 23 | GradB []float64 24 | GradC []float64 25 | Option TrainingOption 26 | } 27 | 28 | type TrainingOption struct { 29 | LearningRate float64 30 | OrderOfGibbsSampling int // 1 is enough for many cases. 31 | UsePersistent bool 32 | UseMean bool // hack option 33 | Epoches int 34 | MiniBatchSize int 35 | L2Regularization bool 36 | RegularizationRate float64 37 | Monitoring bool 38 | } 39 | 40 | // New creates new GBRBM instance. 41 | func New(numVisibleUnits, numHiddenUnits int) *GBRBM { 42 | rbm := new(GBRBM) 43 | rand.Seed(time.Now().UnixNano()) 44 | rbm.NumVisibleUnits = numVisibleUnits 45 | rbm.NumHiddenUnits = numHiddenUnits 46 | rbm.W = nnet.MakeMatrix(numHiddenUnits, numVisibleUnits) 47 | rbm.B = make([]float64, numVisibleUnits) 48 | rbm.C = make([]float64, numHiddenUnits) 49 | rbm.GradW = nnet.MakeMatrix(numHiddenUnits, numVisibleUnits) 50 | rbm.GradB = make([]float64, numVisibleUnits) 51 | rbm.GradC = make([]float64, numHiddenUnits) 52 | rbm.InitParam() 53 | return rbm 54 | } 55 | 56 | // InitParam performes a heuristic parameter initialization. 57 | func (rbm *GBRBM) InitParam() { 58 | // Init W 59 | for i := 0; i < rbm.NumHiddenUnits; i++ { 60 | for j := 0; j < rbm.NumVisibleUnits; j++ { 61 | rbm.W[i][j] = 0.01 * rand.NormFloat64() 62 | } 63 | } 64 | 65 | // Init visible bias 66 | for j := 0; j < rbm.NumVisibleUnits; j++ { 67 | rbm.B[j] = 0.0 68 | } 69 | 70 | // Init hidden bias 71 | for i := 0; i < rbm.NumHiddenUnits; i++ { 72 | rbm.C[i] = 0.0 73 | } 74 | } 75 | 76 | // Load loads GBRBM from a dump file and return its instatnce. 77 | func Load(filename string) (*GBRBM, error) { 78 | file, err := os.Open(filename) 79 | if err != nil { 80 | return nil, err 81 | } 82 | defer file.Close() 83 | 84 | decoder := json.NewDecoder(file) 85 | rbm := &GBRBM{} 86 | err = decoder.Decode(rbm) 87 | 88 | if err != nil { 89 | return nil, err 90 | } 91 | 92 | return rbm, nil 93 | } 94 | 95 | // Dump writes GBRBM parameters to file in json format. 96 | func (rbm *GBRBM) Dump(filename string) error { 97 | rbm.PersistentVisibleUnits = nil 98 | return nnet.DumpAsJson(filename, rbm) 99 | } 100 | 101 | // Forward performs activity propagation from visible to hidden layer. 102 | func (rbm *GBRBM) Forward(v []float64) []float64 { 103 | hidden := make([]float64, rbm.NumHiddenUnits) 104 | for i := 0; i < rbm.NumHiddenUnits; i++ { 105 | hidden[i] = rbm.P_H_Given_V(i, v) 106 | } 107 | return hidden 108 | } 109 | 110 | // P_H_Given_V returns p(h=1|v), the conditinal probability of activation 111 | // of a hidden unit given a set of visible units. 112 | func (rbm *GBRBM) P_H_Given_V(hiddenIndex int, v []float64) float64 { 113 | sum := 0.0 114 | for j := 0; j < rbm.NumVisibleUnits; j++ { 115 | sum += rbm.W[hiddenIndex][j] * v[j] 116 | } 117 | return nnet.Sigmoid(sum + rbm.C[hiddenIndex]) 118 | } 119 | 120 | // Sample_H_Given_V returns sample drawen by p(h|v), where h is a binary unit. 121 | func (rbm *GBRBM) Sample_H_Given_V(hiddenIndex int, v []float64) float64 { 122 | p := rbm.P_H_Given_V(hiddenIndex, v) 123 | if p > rand.Float64() { 124 | return 1.0 125 | } else { 126 | return 0.0 127 | } 128 | } 129 | 130 | // Mean_V_Given_H returns the mean of a Gaussian visible unit v ~ N(mean, sigma). 131 | func (rbm *GBRBM) Mean_V_Given_H(visibleIndex int, h []float64) float64 { 132 | sum := 0.0 133 | for i := 0; i < rbm.NumHiddenUnits; i++ { 134 | sum += rbm.W[i][visibleIndex] * h[i] 135 | } 136 | return sum + rbm.B[visibleIndex] 137 | } 138 | 139 | // Sample_V_Given_H returns a sample generated by Gaussian distribution p(v|h). 140 | func (rbm *GBRBM) Sample_V_Given_H(visibleIndex int, h []float64) float64 { 141 | return rbm.Mean_V_Given_H(visibleIndex, h) + rand.NormFloat64()*1.0 142 | } 143 | 144 | // Reconstruct performs reconstruction based on k-Gibbs sampling algorithm, 145 | // where k is the number of iterations. 146 | func (rbm *GBRBM) Reconstruct(v []float64, numSteps int, useMean bool) []float64 { 147 | // Initial value is set to input 148 | reconstructedVisible := make([]float64, len(v)) 149 | copy(reconstructedVisible, v) 150 | 151 | // perform Gibbs-sampling 152 | for step := 0; step < numSteps; step++ { 153 | // 1. sample hidden units 154 | hiddenState := make([]float64, rbm.NumHiddenUnits) 155 | for i := 0; i < rbm.NumHiddenUnits; i++ { 156 | hiddenState[i] = 157 | rbm.Sample_H_Given_V(i, reconstructedVisible) 158 | } 159 | // 2. sample visible units 160 | // try to use the mean value instread if training is unstable 161 | for j := 0; j < rbm.NumVisibleUnits; j++ { 162 | if useMean { 163 | reconstructedVisible[j] = 164 | rbm.Mean_V_Given_H(j, hiddenState) 165 | } else { 166 | reconstructedVisible[j] = 167 | rbm.Sample_V_Given_H(j, hiddenState) 168 | } 169 | } 170 | } 171 | 172 | return reconstructedVisible 173 | } 174 | 175 | // ReconstructionError returns reconstruction error. 176 | // Use mean of Gaussian when computing reconstruction error. 177 | func (rbm *GBRBM) ReconstructionError(data [][]float64, numSteps int) float64 { 178 | err := 0.0 179 | for _, v := range data { 180 | reconstructed := rbm.Reconstruct(v, numSteps, true) 181 | err += nnet.SquareErrBetweenTwoVector(v, reconstructed) 182 | } 183 | return 0.5 * err / float64(len(data)) 184 | } 185 | 186 | // FreeEnergy returns F(v), the free energy of GBRBM given a visible vector v. 187 | // It is assumed that the standard deviation equals to 1. 188 | func (rbm *GBRBM) FreeEnergy(v []float64) float64 { 189 | energy := 0.0 190 | 191 | for j := 0; j < rbm.NumVisibleUnits; j++ { 192 | energy -= 0.5 * (rbm.B[j] - v[j]) * (rbm.B[j] - v[j]) 193 | } 194 | 195 | for i := 0; i < rbm.NumHiddenUnits; i++ { 196 | sum := rbm.C[i] 197 | for j := 0; j < rbm.NumVisibleUnits; j++ { 198 | sum += rbm.W[i][j] * v[j] 199 | } 200 | energy -= math.Log(1 + math.Exp(sum)) 201 | } 202 | 203 | return energy 204 | } 205 | 206 | func (rbm *GBRBM) UnSupervisedObjective(data [][]float64) float64 { 207 | size := 3000 208 | if size > len(data) { 209 | size = len(data) 210 | } 211 | subset := nnet.RandomSubset(data, size) 212 | return rbm.ReconstructionError(subset, rbm.Option.OrderOfGibbsSampling) 213 | } 214 | 215 | func (rbm *GBRBM) P_H_Given_V_Batch(v []float64) []float64 { 216 | h := make([]float64, rbm.NumHiddenUnits) 217 | for i := range h { 218 | h[i] = rbm.P_H_Given_V(i, v) 219 | } 220 | return h 221 | } 222 | 223 | // Gradient returns gradients of GBRBM parameters for a given (mini-batch) dataset. 224 | func (rbm *GBRBM) Gradient(data [][]float64, 225 | miniBatchIndex int) ([][]float64, []float64, []float64) { 226 | gradW := nnet.MakeMatrix(rbm.NumHiddenUnits, rbm.NumVisibleUnits) 227 | gradB := make([]float64, rbm.NumVisibleUnits) 228 | gradC := make([]float64, rbm.NumHiddenUnits) 229 | 230 | for i, v := range data { 231 | // Set start state of Gibbs-sampling 232 | var gibbsStart []float64 233 | persistentIndex := i + miniBatchIndex*rbm.Option.MiniBatchSize 234 | if rbm.Option.UsePersistent { 235 | gibbsStart = rbm.PersistentVisibleUnits[persistentIndex] 236 | } else { 237 | gibbsStart = v 238 | } 239 | 240 | // Perform reconstruction using Gibbs-sampling 241 | reconstructedVisible := rbm.Reconstruct(gibbsStart, 242 | rbm.Option.OrderOfGibbsSampling, rbm.Option.UseMean) 243 | 244 | // keep recostructed visible 245 | if rbm.Option.UsePersistent { 246 | rbm.PersistentVisibleUnits[persistentIndex] = 247 | reconstructedVisible 248 | } 249 | 250 | // pre-computation that is used in gradient computation 251 | p_h_given_v1 := rbm.P_H_Given_V_Batch(v) 252 | p_h_given_v2 := rbm.P_H_Given_V_Batch(reconstructedVisible) 253 | 254 | // Gompute gradient of W 255 | for i := 0; i < rbm.NumHiddenUnits; i++ { 256 | for j := 0; j < rbm.NumVisibleUnits; j++ { 257 | gradW[i][j] += p_h_given_v1[i]*v[j] - 258 | p_h_given_v2[i]*reconstructedVisible[j] 259 | } 260 | } 261 | 262 | // Gompute gradient of B 263 | for j := 0; j < rbm.NumVisibleUnits; j++ { 264 | gradB[j] += v[j] - reconstructedVisible[j] 265 | } 266 | 267 | // Gompute gradient of C 268 | for i := 0; i < rbm.NumHiddenUnits; i++ { 269 | gradC[i] += p_h_given_v1[i] - p_h_given_v2[i] 270 | } 271 | } 272 | 273 | return rbm.normalizeGradBySizeOfBatch(gradW, gradB, gradC, len(data)) 274 | } 275 | 276 | func (rbm *GBRBM) normalizeGradBySizeOfBatch(gradW [][]float64, 277 | gradB, gradC []float64, size int) ([][]float64, []float64, []float64) { 278 | for i := 0; i < rbm.NumHiddenUnits; i++ { 279 | for j := 0; j < rbm.NumVisibleUnits; j++ { 280 | gradW[i][j] /= float64(size) 281 | } 282 | } 283 | for j := 0; j < rbm.NumVisibleUnits; j++ { 284 | gradB[j] /= float64(size) 285 | } 286 | for i := 0; i < rbm.NumHiddenUnits; i++ { 287 | gradC[i] /= float64(size) 288 | } 289 | return gradW, gradB, gradC 290 | } 291 | 292 | func (rbm *GBRBM) UnSupervisedMiniBatchUpdate(batch [][]float64, 293 | epoch, miniBatchIndex int) { 294 | gradW, gradB, gradC := rbm.Gradient(batch, miniBatchIndex) 295 | 296 | // TODO fix 297 | momentum := 0.5 298 | if epoch > 5 { 299 | momentum = 0.7 300 | } 301 | 302 | // Update W 303 | for i := 0; i < rbm.NumHiddenUnits; i++ { 304 | for j := 0; j < rbm.NumVisibleUnits; j++ { 305 | grad := momentum*rbm.GradW[i][j] + rbm.Option.LearningRate*gradW[i][j] 306 | rbm.W[i][j] += grad 307 | if rbm.Option.L2Regularization { 308 | rbm.W[i][j] *= (1.0 - rbm.Option.RegularizationRate) 309 | } 310 | rbm.GradW[i][j] = grad 311 | } 312 | } 313 | 314 | // Update B 315 | for j := 0; j < rbm.NumVisibleUnits; j++ { 316 | grad := momentum*rbm.GradB[j] + rbm.Option.LearningRate*gradB[j] 317 | rbm.B[j] += grad 318 | rbm.GradB[j] = grad 319 | } 320 | 321 | // Update C 322 | for i := 0; i < rbm.NumHiddenUnits; i++ { 323 | grad := momentum*rbm.GradC[i] + rbm.Option.LearningRate*gradC[i] 324 | rbm.C[i] += grad 325 | rbm.GradC[i] = grad 326 | } 327 | } 328 | 329 | // Train performs Contrastive divergense learning algorithm to train GBRBM. 330 | // The alrogithm is based on (mini-batch) Stochastic Gradient Ascent. 331 | func (rbm *GBRBM) Train(data [][]float64, option TrainingOption) error { 332 | rbm.Option = option 333 | opt := nnet.BaseTrainingOption{ 334 | Epoches: rbm.Option.Epoches, 335 | MiniBatchSize: rbm.Option.MiniBatchSize, 336 | Monitoring: rbm.Option.Monitoring, 337 | } 338 | 339 | // Peistent Contrastive learning 340 | if rbm.Option.UsePersistent { 341 | rbm.PersistentVisibleUnits = nnet.MakeMatrix(len(data), len(data[0])) 342 | copy(rbm.PersistentVisibleUnits, data) 343 | } 344 | 345 | s := nnet.NewTrainer(opt) 346 | return s.UnSupervisedMiniBatchTrain(rbm, data) 347 | } 348 | -------------------------------------------------------------------------------- /mlp/hidden_layer.go: -------------------------------------------------------------------------------- 1 | package mlp 2 | 3 | import ( 4 | "github.com/r9y9/nnet" 5 | "math/rand" 6 | ) 7 | 8 | type HiddenLayer struct { 9 | W [][]float64 10 | B []float64 11 | NumInputUnits int 12 | NumHiddenUnits int 13 | } 14 | 15 | func NewHiddenLayer(numInputUnits, numHiddenUnits int) *HiddenLayer { 16 | h := new(HiddenLayer) 17 | h.W = nnet.MakeMatrix(numInputUnits, numHiddenUnits) 18 | h.NumInputUnits = numInputUnits 19 | h.NumHiddenUnits = numHiddenUnits 20 | h.B = make([]float64, numHiddenUnits) 21 | h.Init() 22 | return h 23 | } 24 | 25 | // Init performs a heuristic parameter initialization. 26 | func (h *HiddenLayer) Init() { 27 | for i := range h.W { 28 | for j := range h.W[i] { 29 | h.W[i][j] = rand.Float64() - 0.5 30 | } 31 | } 32 | 33 | for j := range h.B { 34 | h.B[j] = 1.0 35 | } 36 | } 37 | 38 | // Forward prop 39 | func (h *HiddenLayer) Forward(input []float64) []float64 { 40 | return nnet.Forward(input, h.W, h.B) 41 | } 42 | 43 | func (h *HiddenLayer) ForwardBatch(input [][]float64) [][]float64 { 44 | predicted := make([][]float64, len(input)) 45 | for i := range input { 46 | predicted[i] = nnet.Forward(input[i], h.W, h.B) 47 | } 48 | return predicted 49 | } 50 | 51 | func (h *HiddenLayer) AccumulateDelta(deltas []float64) []float64 { 52 | acc := make([]float64, h.NumInputUnits) 53 | for i := range acc { 54 | sum := 0.0 55 | for j := 0; j < h.NumHiddenUnits; j++ { 56 | sum += deltas[j] * h.W[i][j] 57 | } 58 | acc[i] = sum 59 | } 60 | return acc 61 | } 62 | 63 | func (h *HiddenLayer) AccumulateDeltaBatch(deltas [][]float64) [][]float64 { 64 | acc := make([][]float64, len(deltas)) 65 | for i := range acc { 66 | acc[i] = h.AccumulateDelta(deltas[i]) 67 | } 68 | return acc 69 | } 70 | 71 | func (h *HiddenLayer) BackwardWithTarget(predicted, target []float64) []float64 { 72 | delta := make([]float64, h.NumHiddenUnits) 73 | for i := 0; i < h.NumHiddenUnits; i++ { 74 | delta[i] = (predicted[i] - target[i]) * nnet.DSigmoid(predicted[i]) 75 | } 76 | return delta 77 | } 78 | 79 | func (h *HiddenLayer) Backward(predicted, accumulateDelta []float64) []float64 { 80 | delta := make([]float64, h.NumHiddenUnits) 81 | for i := 0; i < h.NumHiddenUnits; i++ { 82 | delta[i] = accumulateDelta[i] * nnet.DSigmoid(predicted[i]) 83 | } 84 | return delta 85 | } 86 | 87 | func (h *HiddenLayer) BackwardWithTargetBatch(predicted, target [][]float64) ([][]float64, [][]float64) { 88 | deltas := make([][]float64, len(predicted)) 89 | for i := range predicted { 90 | deltas[i] = h.BackwardWithTarget(predicted[i], target[i]) 91 | } 92 | return deltas, h.AccumulateDeltaBatch(deltas) 93 | } 94 | 95 | func (h *HiddenLayer) BackwardBatch(predicted, accumulateDelta [][]float64) ([][]float64, [][]float64) { 96 | deltas := make([][]float64, len(predicted)) 97 | 98 | for i := range predicted { 99 | deltas[i] = h.Backward(predicted[i], accumulateDelta[i]) 100 | } 101 | return deltas, h.AccumulateDeltaBatch(deltas) 102 | } 103 | 104 | func (h *HiddenLayer) Gradient(input, deltas [][]float64) ([][]float64, []float64) { 105 | gradW := nnet.MakeMatrix(len(h.W), len(h.W[0])) 106 | gradB := make([]float64, len(h.B)) 107 | 108 | // Gradient 109 | for n := range input { 110 | for i := 0; i < h.NumHiddenUnits; i++ { 111 | for j := 0; j < h.NumInputUnits; j++ { 112 | gradW[j][i] -= deltas[n][i] * input[n][j] 113 | } 114 | gradB[i] -= deltas[n][i] 115 | } 116 | } 117 | 118 | return gradW, gradB 119 | } 120 | 121 | func (h *HiddenLayer) Turn(batch, deltas [][]float64, option TrainingOption) { 122 | gradW, gradB := h.Gradient(batch, deltas) 123 | 124 | // mini-batch SGD 125 | for i := 0; i < h.NumHiddenUnits; i++ { 126 | for j := 0; j < h.NumInputUnits; j++ { 127 | h.W[j][i] += option.LearningRate * gradW[j][i] / float64(len(batch)) 128 | if option.L2Regularization { 129 | h.W[j][i] *= (1.0 - option.RegularizationRate) 130 | } 131 | } 132 | h.B[i] += option.LearningRate * gradB[i] / float64(len(batch)) 133 | } 134 | } 135 | -------------------------------------------------------------------------------- /mlp/mlp.go: -------------------------------------------------------------------------------- 1 | package mlp 2 | 3 | import ( 4 | "encoding/json" 5 | "github.com/r9y9/nnet" 6 | "os" 7 | ) 8 | 9 | // MLP represents multi layer perceptron (Feed Forward Neural Networks). 10 | type MLP struct { 11 | HiddenLayers []*HiddenLayer 12 | Option TrainingOption 13 | NumLayers int // proxy for len(HiddenLayers) 14 | } 15 | 16 | type TrainingOption struct { 17 | LearningRate float64 18 | Epoches int 19 | MiniBatchSize int 20 | L2Regularization bool 21 | RegularizationRate float64 22 | Monitoring bool 23 | } 24 | 25 | // NewMLP create a new MLP instance. 26 | func NewMLP() *MLP { 27 | d := new(MLP) 28 | return d 29 | } 30 | 31 | // AddLayer adds a new hidden layer. 32 | func (d *MLP) AddLayer(numInputUnits, numHiddenUnits int) { 33 | layer := NewHiddenLayer(numInputUnits, numHiddenUnits) 34 | d.HiddenLayers = append(d.HiddenLayers, layer) 35 | d.NumLayers++ 36 | } 37 | 38 | // Load loads MLP from a dump file and return its instatnce. 39 | func Load(filename string) (*MLP, error) { 40 | file, err := os.Open(filename) 41 | if err != nil { 42 | return nil, err 43 | } 44 | defer file.Close() 45 | 46 | decoder := json.NewDecoder(file) 47 | d := &MLP{} 48 | err = decoder.Decode(d) 49 | 50 | if err != nil { 51 | return nil, err 52 | } 53 | 54 | return d, nil 55 | } 56 | 57 | func (d *MLP) Dump(filename string) error { 58 | return nnet.DumpAsJson(filename, d) 59 | } 60 | 61 | func (d *MLP) Forward(input []float64) []float64 { 62 | // Start with first layer 63 | predicted := d.HiddenLayers[0].Forward(input) 64 | 65 | // Trasfer to next layer 66 | for j := 1; j < len(d.HiddenLayers); j++ { 67 | predicted = d.HiddenLayers[j].Forward(predicted) 68 | } 69 | 70 | return predicted 71 | } 72 | 73 | // ObjectiveFunction returns the objective function to optimize, 74 | // given a input dat and its supervised data. 75 | func (d *MLP) SupervisedObjective(input, target [][]float64) float64 { 76 | return d.MeanSquareErr(input, target) 77 | } 78 | 79 | func (d *MLP) MeanSquareErr(input, target [][]float64) float64 { 80 | sum := 0.0 81 | for i := 0; i < len(target); i++ { 82 | sum += nnet.SquareErrBetweenTwoVector(d.Forward(input[i]), target[i]) 83 | } 84 | return 0.5 * sum / float64(len(target)) 85 | } 86 | 87 | // MiniBatchSGDUpdate performs one backkpropagation proccedure. 88 | func (d *MLP) SupervisedMiniBatchUpdate(input [][]float64, target [][]float64) { 89 | predicted := make([][][]float64, len(d.HiddenLayers)) 90 | lastIndex := len(d.HiddenLayers) - 1 91 | 92 | // 1. Forward 93 | firstLayer := d.HiddenLayers[0] 94 | predicted[0] = firstLayer.ForwardBatch(input) 95 | for i := 1; i < len(d.HiddenLayers); i++ { 96 | predicted[i] = d.HiddenLayers[i].ForwardBatch(predicted[i-1]) 97 | } 98 | 99 | lastLayer := d.HiddenLayers[lastIndex] 100 | lastPredicted := predicted[lastIndex] 101 | 102 | // 2. Backward 103 | deltas := make([][][]float64, len(d.HiddenLayers)) 104 | sumDelta := make([][]float64, lastLayer.NumHiddenUnits) 105 | deltas[lastIndex], sumDelta = lastLayer.BackwardWithTargetBatch(lastPredicted, target) 106 | for i := lastIndex - 1; i >= 0; i-- { 107 | deltas[i], sumDelta = d.HiddenLayers[i].BackwardBatch(predicted[i], sumDelta) 108 | } 109 | 110 | // 3. Feedback (update weight) 111 | for i := len(d.HiddenLayers) - 1; i >= 1; i-- { 112 | d.HiddenLayers[i].Turn(predicted[i-1], deltas[i], d.Option) 113 | } 114 | firstLayer.Turn(input, deltas[0], d.Option) 115 | } 116 | 117 | // Train performs mini-batch SGD-based backpropagation to optimize network. 118 | func (d *MLP) Train(input [][]float64, target [][]float64, option TrainingOption) error { 119 | d.Option = option 120 | opt := nnet.BaseTrainingOption{ 121 | Epoches: d.Option.Epoches, 122 | MiniBatchSize: d.Option.MiniBatchSize, 123 | Monitoring: d.Option.Monitoring, 124 | } 125 | s := nnet.NewTrainer(opt) 126 | return s.SupervisedMiniBatchTrain(d, input, target) 127 | } 128 | -------------------------------------------------------------------------------- /mlp/mlp_test.go: -------------------------------------------------------------------------------- 1 | package mlp 2 | 3 | import ( 4 | "math" 5 | "testing" 6 | ) 7 | 8 | // XOR 9 | func TestMLP(t *testing.T) { 10 | input := [][]float64{{0, 0}, {0, 1}, {1, 0}, {1, 1}} 11 | target := [][]float64{{0}, {1}, {1}, {0}} 12 | 13 | d := NewMLP() 14 | d.AddLayer(2, 10) 15 | d.AddLayer(10, 10) 16 | d.AddLayer(10, 1) 17 | option := TrainingOption{ 18 | LearningRate: 0.1, 19 | Epoches: 30000, 20 | MiniBatchSize: 1, 21 | L2Regularization: true, 22 | RegularizationRate: 1.0e-7, 23 | Monitoring: false, 24 | } 25 | 26 | err := d.Train(input, target, option) 27 | if err != nil { 28 | t.Errorf("Train returns error, want no error.") 29 | } 30 | 31 | // Test 32 | for i, val := range input { 33 | predicted := d.Forward(val) 34 | squaredErr := math.Abs(target[i][0] - predicted[0]) 35 | if squaredErr > 0.1 { 36 | t.Errorf("Prediction Error %f, want less than 0.1.", squaredErr) 37 | } 38 | } 39 | } 40 | 41 | func BenchmarkMLP(b *testing.B) { 42 | input := [][]float64{{0, 0}, {0, 1}, {1, 0}, {1, 1}} 43 | target := [][]float64{{0}, {1}, {1}, {0}} 44 | 45 | d := NewMLP() 46 | d.AddLayer(2, 10) 47 | d.AddLayer(10, 10) 48 | d.AddLayer(10, 1) 49 | option := TrainingOption{ 50 | LearningRate: 0.1, 51 | Epoches: 30000, 52 | MiniBatchSize: 1, 53 | L2Regularization: false, 54 | RegularizationRate: 0.00001, 55 | Monitoring: false, 56 | } 57 | 58 | for i := 0; i < b.N; i++ { 59 | d.Train(input, target, option) 60 | } 61 | } 62 | -------------------------------------------------------------------------------- /mlp3/mlp3.go: -------------------------------------------------------------------------------- 1 | // Package mlp3 provides support for three-layer perceptron. 2 | package mlp3 3 | 4 | import ( 5 | "encoding/json" 6 | "errors" 7 | "fmt" 8 | "github.com/r9y9/nnet" 9 | "math/rand" 10 | "os" 11 | "time" 12 | ) 13 | 14 | const ( 15 | Bias = 1.0 16 | ) 17 | 18 | // NeuralNetwork represents a Feed-forward Neural Network. 19 | type NeuralNetwork struct { 20 | OutputLayer []float64 21 | HiddenLayer []float64 22 | InputLayer []float64 23 | OutputWeight [][]float64 24 | HiddenWeight [][]float64 25 | Option TrainingOption 26 | } 27 | 28 | type TrainingOption struct { 29 | LearningRate float64 30 | Epoches int 31 | MiniBatchSize int 32 | Monitoring bool 33 | } 34 | 35 | // Load loads Neural Network from a dump file and return its instatnce. 36 | func Load(filename string) (*NeuralNetwork, error) { 37 | file, err := os.Open(filename) 38 | if err != nil { 39 | return nil, err 40 | } 41 | defer file.Close() 42 | 43 | decoder := json.NewDecoder(file) 44 | net := &NeuralNetwork{} 45 | err = decoder.Decode(net) 46 | 47 | if err != nil { 48 | return nil, err 49 | } 50 | 51 | return net, nil 52 | } 53 | 54 | // NewNeuralNetwork returns a new network instance with the number of 55 | // input units, number of hidden units and number output units 56 | // of the network. 57 | func NewNeuralNetwork(numInputUnits, 58 | numHiddenUnits, numOutputUnits int) *NeuralNetwork { 59 | net := new(NeuralNetwork) 60 | rand.Seed(time.Now().UnixNano()) 61 | 62 | // Layers 63 | net.InputLayer = make([]float64, numInputUnits+1) // plus bias 64 | net.HiddenLayer = make([]float64, numHiddenUnits) 65 | net.OutputLayer = make([]float64, numOutputUnits) 66 | 67 | // Weights 68 | net.OutputWeight = nnet.MakeMatrix(numHiddenUnits, numOutputUnits) 69 | net.HiddenWeight = nnet.MakeMatrix(numInputUnits+1, numHiddenUnits) 70 | 71 | net.InitParam() 72 | return net 73 | } 74 | 75 | // Dump writes Neural Network parameters to file in json format. 76 | func (net *NeuralNetwork) Dump(filename string) error { 77 | return nnet.DumpAsJson(filename, net) 78 | } 79 | 80 | // InitParam perform heuristic initialization of NN parameters. 81 | func (net *NeuralNetwork) InitParam() { 82 | for i := range net.HiddenWeight { 83 | for j := range net.HiddenWeight[i] { 84 | net.HiddenWeight[i][j] = rand.Float64() - 0.5 85 | } 86 | } 87 | 88 | for i := range net.OutputWeight { 89 | for j := range net.OutputWeight[i] { 90 | net.OutputWeight[i][j] = rand.Float64() - 0.5 91 | } 92 | } 93 | } 94 | 95 | // Forward performs a forward transfer algorithm of Neural network 96 | // and returns the output. 97 | func (net *NeuralNetwork) Forward(input []float64) []float64 { 98 | output := make([]float64, len(net.OutputLayer)) 99 | 100 | if len(input)+1 != len(net.InputLayer) { 101 | panic("Dimention doesn't match: The number units of input layer") 102 | } 103 | 104 | // Copy 105 | for i := range input { 106 | net.InputLayer[i] = input[i] 107 | } 108 | net.InputLayer[len(net.InputLayer)-1] = Bias 109 | 110 | // Transfer to hidden layer from input layer 111 | for i := 0; i < len(net.HiddenLayer)-1; i++ { 112 | sum := 0.0 113 | for j := range net.InputLayer { 114 | sum += net.HiddenWeight[j][i] * net.InputLayer[j] 115 | } 116 | net.HiddenLayer[i] = nnet.Sigmoid(sum) 117 | } 118 | net.HiddenLayer[len(net.HiddenLayer)-1] = Bias 119 | 120 | // Transfer to output layer from hidden layer 121 | for i := 0; i < len(net.OutputLayer); i++ { 122 | sum := 0.0 123 | for j := range net.HiddenLayer { 124 | sum += net.OutputWeight[j][i] * net.HiddenLayer[j] 125 | } 126 | output[i] = nnet.Sigmoid(sum) 127 | } 128 | net.OutputLayer = output 129 | 130 | return output 131 | } 132 | 133 | func (net *NeuralNetwork) ComputeDelta(predicted, 134 | target []float64) ([]float64, []float64) { 135 | outputDelta := make([]float64, len(net.OutputLayer)) 136 | hiddenDelta := make([]float64, len(net.HiddenLayer)) 137 | 138 | // Output Delta 139 | for i := 0; i < len(net.OutputLayer); i++ { 140 | outputDelta[i] = (predicted[i] - target[i]) * 141 | nnet.DSigmoid(predicted[i]) 142 | } 143 | 144 | // Hidden Delta 145 | for i := 0; i < len(net.HiddenLayer); i++ { 146 | sum := 0.0 147 | for j := range net.OutputLayer { 148 | sum += net.OutputWeight[i][j] * outputDelta[j] 149 | } 150 | hiddenDelta[i] = sum * nnet.DSigmoid(net.HiddenLayer[i]) 151 | } 152 | 153 | return outputDelta, hiddenDelta 154 | } 155 | 156 | // Feedback performs a backward transfer algorithm. 157 | func (net *NeuralNetwork) Feedback(predicted, target []float64) { 158 | outputDelta, hiddenDelta := net.ComputeDelta(predicted, target) 159 | 160 | // Update Weight of Output layer 161 | for i := range net.OutputLayer { 162 | for j := 0; j < len(net.HiddenLayer); j++ { 163 | net.OutputWeight[j][i] -= net.Option.LearningRate * 164 | outputDelta[i] * net.HiddenLayer[j] 165 | } 166 | } 167 | 168 | // Update Weight of Hidden layer 169 | for i := 0; i < len(net.HiddenLayer); i++ { 170 | for j := range net.InputLayer { 171 | net.HiddenWeight[j][i] -= net.Option.LearningRate * 172 | hiddenDelta[i] * net.InputLayer[j] 173 | } 174 | } 175 | } 176 | 177 | // Objective returns the objective function to optimize in training network. 178 | func (net *NeuralNetwork) Objective(input, target []float64) float64 { 179 | sum := 0.0 180 | for i := 0; i < len(target); i++ { 181 | sum += (input[i] - target[i]) * (input[i] - target[i]) 182 | } 183 | return 0.5 * sum 184 | } 185 | 186 | // Objective returns the objective function for all data. 187 | func (net *NeuralNetwork) ObjectiveForAllData(input, 188 | target [][]float64) float64 { 189 | sum := 0.0 190 | for i := 0; i < len(input); i++ { 191 | sum += net.Objective(input[i], target[i]) 192 | } 193 | return sum / float64(len(input)) 194 | } 195 | 196 | func (net *NeuralNetwork) ParseTrainingOption(option TrainingOption) error { 197 | net.Option = option 198 | 199 | if net.Option.Epoches <= 0 { 200 | return errors.New("Epoches must be larger than zero.") 201 | } 202 | if net.Option.LearningRate == 0 { 203 | return errors.New("Learning rate must be specified to train NN.") 204 | } 205 | 206 | return nil 207 | } 208 | 209 | // SupervisedSGD performs stochastic gradient decent to optimize network. 210 | func (net *NeuralNetwork) SupervisedSGD(input [][]float64, target [][]float64) { 211 | for epoch := 0; epoch < net.Option.Epoches; epoch++ { 212 | // Get random sample 213 | randIndex := rand.Intn(len(input)) 214 | x := input[randIndex] 215 | t := target[randIndex] 216 | 217 | // One feed-fowrward procedure 218 | predicted := net.Forward(x) 219 | net.Feedback(predicted, t) 220 | 221 | // Print objective function 222 | if net.Option.Monitoring { 223 | fmt.Println(epoch, net.Objective(predicted, t)) 224 | } 225 | } 226 | } 227 | 228 | // Train performs supervised network training. 229 | func (net *NeuralNetwork) Train(input [][]float64, 230 | target [][]float64, option TrainingOption) error { 231 | err := net.ParseTrainingOption(option) 232 | if err != nil { 233 | return err 234 | } 235 | 236 | // Perform SupervisedSGD 237 | net.SupervisedSGD(input, target) 238 | 239 | return nil 240 | } 241 | -------------------------------------------------------------------------------- /mlp3/mlp3_test.go: -------------------------------------------------------------------------------- 1 | package mlp3 2 | 3 | import ( 4 | "math" 5 | "testing" 6 | ) 7 | 8 | // XOR 9 | func TestNN(t *testing.T) { 10 | input := [][]float64{{0, 0}, {0, 1}, {1, 0}, {1, 1}} 11 | target := [][]float64{{0}, {1}, {1}, {0}} 12 | 13 | network := NewNeuralNetwork(2, 20, 1) 14 | option := TrainingOption{ 15 | LearningRate: 0.1, 16 | Epoches: 50000, 17 | Monitoring: false, 18 | } 19 | 20 | err := network.Train(input, target, option) 21 | if err != nil { 22 | t.Errorf("Train returns error, want no error.") 23 | } 24 | 25 | // Test 26 | for i, val := range input { 27 | predicted := network.Forward(val) 28 | squaredErr := math.Abs(target[i][0] - predicted[0]) 29 | if squaredErr > 0.1 { 30 | t.Errorf("Prediction Error %f, want less than 0.1.", squaredErr) 31 | } 32 | } 33 | } 34 | 35 | func BenchmarkNN(b *testing.B) { 36 | input := [][]float64{{0, 0}, {0, 1}, {1, 0}, {1, 1}} 37 | target := [][]float64{{0}, {1}, {1}, {0}} 38 | 39 | network := NewNeuralNetwork(2, 10, 1) 40 | option := TrainingOption{ 41 | LearningRate: 0.1, 42 | Epoches: 50000, 43 | Monitoring: false, 44 | } 45 | 46 | for i := 0; i < b.N; i++ { 47 | network.Train(input, target, option) 48 | } 49 | } 50 | -------------------------------------------------------------------------------- /nnet.go: -------------------------------------------------------------------------------- 1 | // Package nnet provides support for basis of Neural Network algorithms. 2 | package nnet 3 | 4 | import ( 5 | "encoding/json" 6 | "math" 7 | "math/rand" 8 | "os" 9 | ) 10 | 11 | type Forwarder interface { 12 | Forward(input []float64) []float64 13 | } 14 | 15 | func Test(net Forwarder, input [][]float64) []int { 16 | recognizedLabel := make([]int, len(input)) 17 | for i, val := range input { 18 | predicted := net.Forward(val) 19 | recognizedLabel[i] = Argmax(predicted) 20 | } 21 | return recognizedLabel 22 | } 23 | 24 | func DumpAsJson(filename string, obj interface{}) error { 25 | file, err := os.Create(filename) 26 | if err != nil { 27 | return err 28 | } 29 | defer file.Close() 30 | 31 | encoder := json.NewEncoder(file) 32 | err = encoder.Encode(obj) 33 | if err != nil { 34 | return err 35 | } 36 | 37 | return nil 38 | } 39 | 40 | func Forward(input []float64, W [][]float64, B []float64) []float64 { 41 | numOutputUnits := len(B) 42 | predicted := make([]float64, numOutputUnits) 43 | for i := 0; i < numOutputUnits; i++ { 44 | sum := 0.0 45 | for j := range input { 46 | sum += W[j][i] * input[j] 47 | } 48 | predicted[i] = Sigmoid(sum + B[i]) 49 | } 50 | 51 | return predicted 52 | } 53 | 54 | func Sigmoid(x float64) float64 { 55 | return 1.0 / (1.0 + math.Exp(-x)) 56 | } 57 | 58 | func DSigmoid(x float64) float64 { 59 | return x * (1.0 - x) 60 | } 61 | 62 | func Tanh(x float64) float64 { 63 | return math.Tanh(x) 64 | } 65 | 66 | func DTanh(x float64) float64 { 67 | return 1.0 - math.Pow(x, 2) 68 | } 69 | 70 | func MakeMatrix(rows, cols int) [][]float64 { 71 | matrix := make([][]float64, rows) 72 | for i := range matrix { 73 | matrix[i] = make([]float64, cols) 74 | } 75 | return matrix 76 | } 77 | 78 | func NormPDF(x, mu, sigma float64) float64 { 79 | c := math.Sqrt(2.0*math.Pi) * sigma 80 | return 1.0 / c * math.Exp(-1.0*(x-mu)*(x-mu)/sigma/sigma) 81 | } 82 | 83 | // squareErrBetweenTwoVector returns ||v1 - v2|| 84 | func SquareErrBetweenTwoVector(v1, v2 []float64) float64 { 85 | err := 0.0 86 | for i := range v1 { 87 | err += math.Sqrt((v1[i] - v2[i]) * (v1[i] - v2[i])) 88 | } 89 | return err 90 | } 91 | 92 | // argmax 93 | func Argmax(A []float64) int { 94 | x := 0 95 | v := -math.MaxFloat64 96 | for i, a := range A { 97 | if a > v { 98 | x = i 99 | v = a 100 | } 101 | } 102 | return x 103 | } 104 | 105 | func RandomSubset(data [][]float64, numSamples int) [][]float64 { 106 | if len(data) < numSamples { 107 | numSamples = len(data) 108 | } 109 | subset := make([][]float64, numSamples) 110 | for i := 0; i < numSamples; i++ { 111 | randIndex := rand.Intn(len(data)) 112 | subset[i] = data[randIndex] 113 | } 114 | return subset 115 | } 116 | -------------------------------------------------------------------------------- /rbm/rbm.go: -------------------------------------------------------------------------------- 1 | // Package rbm provides support for Restricted Boltzmann Machines. 2 | package rbm 3 | 4 | import ( 5 | "encoding/json" 6 | "github.com/r9y9/nnet" // sigmoid, matrix 7 | "math" 8 | "math/rand" 9 | "os" 10 | "time" 11 | ) 12 | 13 | // References: 14 | // [1] G. Hinton, "A Practical Guide to Training Restricted Boltzmann Machines", 15 | // UTML TR 2010-003. 16 | // url: http://www.cs.toronto.edu/~hinton/absps/guideTR.pdf 17 | // 18 | // [2] A. Fischer and C. Igel. "An introduction to restricted Boltzmann machines", 19 | // Proc. of the 17th Iberoamerican Congress on Pattern Recognition (CIARP), 20 | // Volume 7441 of LNCS, pages 14–36. Springer, 2012 21 | // url: http://image.diku.dk/igel/paper/AItRBM-proof.pdf 22 | // 23 | // [3] Restricted Boltzmann Machines (RBM), DeepLearning tutorial 24 | // url: http://deeplearning.net/tutorial/rbm.html 25 | 26 | // Notes about implementation: 27 | // Notation used in this code basically follows [2]. 28 | // e.g. W for weight, B for bias of visible layer, C for bias of hidden layer. 29 | 30 | // Graphical representation of Restricted Boltzmann Machines (RBM). 31 | // 32 | // ○ ○ .... ○ h(hidden layer), c(bias) 33 | // /\ /\ / /\ 34 | // ○ ○ ○ ... ○ v(visible layer), b(bias) 35 | type RBM struct { 36 | W [][]float64 // Weight 37 | B []float64 // Bias of visible layer 38 | C []float64 // Bias of hidden layer 39 | NumHiddenUnits int 40 | NumVisibleUnits int 41 | PersistentVisibleUnits [][]float64 // used in Persistent contrastive learning 42 | GradW [][]float64 43 | GradB []float64 44 | GradC []float64 45 | Option TrainingOption 46 | } 47 | 48 | type TrainingOption struct { 49 | LearningRate float64 50 | OrderOfGibbsSampling int // 1 is enough for many cases. 51 | UsePersistent bool 52 | Epoches int 53 | MiniBatchSize int 54 | L2Regularization bool 55 | RegularizationRate float64 56 | Monitoring bool 57 | } 58 | 59 | // NewRBM creates new RBM instance. It requires input data and number of 60 | // hidden units to initialize RBM. 61 | func New(numVisibleUnits, numHiddenUnits int) *RBM { 62 | rbm := new(RBM) 63 | rand.Seed(time.Now().UnixNano()) 64 | rbm.NumVisibleUnits = numVisibleUnits 65 | rbm.NumHiddenUnits = numHiddenUnits 66 | rbm.W = nnet.MakeMatrix(numHiddenUnits, numVisibleUnits) 67 | rbm.B = make([]float64, numVisibleUnits) 68 | rbm.C = make([]float64, numHiddenUnits) 69 | rbm.GradW = nnet.MakeMatrix(numHiddenUnits, numVisibleUnits) 70 | rbm.GradB = make([]float64, numVisibleUnits) 71 | rbm.GradC = make([]float64, numHiddenUnits) 72 | rbm.InitParam() 73 | return rbm 74 | } 75 | 76 | // InitParam performes a heuristic parameter initialization. 77 | func (rbm *RBM) InitParam() { 78 | // Init W 79 | for i := 0; i < rbm.NumHiddenUnits; i++ { 80 | for j := 0; j < rbm.NumVisibleUnits; j++ { 81 | rbm.W[i][j] = 0.01 * rand.NormFloat64() 82 | } 83 | } 84 | // Init B 85 | for j := 0; j < rbm.NumVisibleUnits; j++ { 86 | rbm.B[j] = 0.0 87 | } 88 | // Init C (bias of hidden layer) 89 | for i := 0; i < rbm.NumHiddenUnits; i++ { 90 | rbm.C[i] = 0.0 91 | } 92 | } 93 | 94 | // Load loads RBM from a dump file and return its instatnce. 95 | func Load(filename string) (*RBM, error) { 96 | file, err := os.Open(filename) 97 | if err != nil { 98 | return nil, err 99 | } 100 | defer file.Close() 101 | 102 | decoder := json.NewDecoder(file) 103 | rbm := &RBM{} 104 | err = decoder.Decode(rbm) 105 | 106 | if err != nil { 107 | return nil, err 108 | } 109 | 110 | return rbm, nil 111 | } 112 | 113 | // Dump writes RBM parameters to file in json format. 114 | func (rbm *RBM) Dump(filename string) error { 115 | rbm.PersistentVisibleUnits = nil 116 | return nnet.DumpAsJson(filename, rbm) 117 | } 118 | 119 | // Forward performs activity transformation from visible to hidden layer. 120 | func (rbm *RBM) Forward(v []float64) []float64 { 121 | hidden := make([]float64, rbm.NumHiddenUnits) 122 | for i := 0; i < rbm.NumHiddenUnits; i++ { 123 | hidden[i] = rbm.P_H_Given_V(i, v) 124 | } 125 | return hidden 126 | } 127 | 128 | // P_H_Given_V returns p(h=1|v), the conditinal probability of activation 129 | // of a hidden unit given a set of visible units. 130 | func (rbm *RBM) P_H_Given_V(hiddenIndex int, v []float64) float64 { 131 | sum := 0.0 132 | for j := 0; j < rbm.NumVisibleUnits; j++ { 133 | sum += rbm.W[hiddenIndex][j] * v[j] 134 | } 135 | return nnet.Sigmoid(sum + rbm.C[hiddenIndex]) 136 | } 137 | 138 | // P_V_Given_H returns p(v=1|h) the conditinal probability of activation 139 | // of a visible unit given a set of hidden units. 140 | func (rbm *RBM) P_V_Given_H(visibleIndex int, h []float64) float64 { 141 | sum := 0.0 142 | for i := 0; i < rbm.NumHiddenUnits; i++ { 143 | sum += rbm.W[i][visibleIndex] * h[i] 144 | } 145 | return nnet.Sigmoid(sum + rbm.B[visibleIndex]) 146 | } 147 | 148 | // Reconstruct performs reconstruction based on Gibbs sampling algorithm. 149 | // numSteps is the number of iterations in Gibbs sampling. 150 | func (rbm *RBM) Reconstruct(v []float64, numSteps int) ([]float64, []float64) { 151 | // Initial value is set to input visible 152 | reconstructedVisible := make([]float64, len(v)) 153 | copy(reconstructedVisible, v) 154 | reconstructedProb := make([]float64, len(v)) 155 | 156 | // perform Gibbs-sampling 157 | for t := 0; t < numSteps; t++ { 158 | // 1. sample hidden units 159 | hiddenState := make([]float64, rbm.NumHiddenUnits) 160 | for i := 0; i < rbm.NumHiddenUnits; i++ { 161 | p := rbm.P_H_Given_V(i, reconstructedVisible) 162 | if p > rand.Float64() { 163 | hiddenState[i] = 1.0 164 | } else { 165 | hiddenState[i] = 0.0 166 | } 167 | } 168 | // 2. sample visible units 169 | for j := 0; j < rbm.NumVisibleUnits; j++ { 170 | p := rbm.P_V_Given_H(j, hiddenState) 171 | if p > rand.Float64() { 172 | reconstructedVisible[j] = 1.0 173 | } else { 174 | reconstructedVisible[j] = 0.0 175 | } 176 | // keep probability 177 | reconstructedProb[j] = p 178 | } 179 | } 180 | 181 | return reconstructedVisible, reconstructedProb 182 | } 183 | 184 | // ReconstructionError returns reconstruction error. 185 | func (rbm *RBM) ReconstructionError(data [][]float64, numSteps int) float64 { 186 | err := 0.0 187 | for _, v := range data { 188 | _, reconstructed := rbm.Reconstruct(v, numSteps) 189 | err += nnet.SquareErrBetweenTwoVector(v, reconstructed) 190 | } 191 | return 0.5 * err / float64(len(data)) 192 | } 193 | 194 | func flip(x []float64, bit int) []float64 { 195 | y := make([]float64, len(x)) 196 | copy(y, x) 197 | y[bit] = 1.0 - x[bit] 198 | return y 199 | } 200 | 201 | // FreeEnergy returns F(v), the free energy of RBM given a visible vector v. 202 | // refs: eq. (25) in [1]. 203 | func (rbm *RBM) FreeEnergy(v []float64) float64 { 204 | energy := 0.0 205 | 206 | for j := 0; j < rbm.NumVisibleUnits; j++ { 207 | energy -= rbm.B[j] * v[j] 208 | } 209 | 210 | for i := 0; i < rbm.NumHiddenUnits; i++ { 211 | sum := rbm.C[i] 212 | for j := 0; j < rbm.NumVisibleUnits; j++ { 213 | sum += rbm.W[i][j] * v[j] 214 | } 215 | energy -= math.Log(1 + math.Exp(sum)) 216 | } 217 | 218 | return energy 219 | } 220 | 221 | // PseudoLogLikelihood returns pseudo log-likelihood for a given input sample. 222 | func (rbm *RBM) PseudoLogLikelihoodForOneSample(v []float64) float64 { 223 | bitIndex := rand.Intn(len(v)) 224 | fe := rbm.FreeEnergy(v) 225 | feFlip := rbm.FreeEnergy(flip(v, bitIndex)) 226 | cost := float64(rbm.NumVisibleUnits) * math.Log(nnet.Sigmoid(feFlip-fe)) 227 | return cost 228 | } 229 | 230 | // PseudoLogLikelihood returns pseudo log-likelihood for a given set of data. 231 | func (rbm *RBM) PseudoLogLikelihood(data [][]float64) float64 { 232 | sum := 0.0 233 | for i := range data { 234 | sum += rbm.PseudoLogLikelihoodForOneSample(data[i]) 235 | } 236 | cost := sum / float64(len(data)) 237 | return cost 238 | } 239 | 240 | func (rbm *RBM) UnSupervisedObjective(data [][]float64) float64 { 241 | size := 3000 242 | if size > len(data) { 243 | size = len(data) 244 | } 245 | subset := nnet.RandomSubset(data, size) 246 | return rbm.PseudoLogLikelihood(subset) 247 | // return rbm.ReconstructionError(subset, rbm.Option.OrderOfGibbsSampling) 248 | } 249 | 250 | // Gradient returns gradients of RBM parameters for a given (mini-batch) dataset. 251 | func (rbm *RBM) Gradient(data [][]float64, 252 | miniBatchIndex int) ([][]float64, []float64, []float64) { 253 | gradW := nnet.MakeMatrix(rbm.NumHiddenUnits, rbm.NumVisibleUnits) 254 | gradB := make([]float64, rbm.NumVisibleUnits) 255 | gradC := make([]float64, rbm.NumHiddenUnits) 256 | 257 | for i, v := range data { 258 | // Set start state of Gibbs-sampling 259 | var gibbsStart []float64 260 | persistentIndex := i + miniBatchIndex*rbm.Option.MiniBatchSize 261 | if rbm.Option.UsePersistent { 262 | gibbsStart = rbm.PersistentVisibleUnits[persistentIndex] 263 | } else { 264 | gibbsStart = v 265 | } 266 | 267 | // Perform reconstruction using Gibbs-sampling 268 | reconstructedVisible, _ := rbm.Reconstruct(gibbsStart, 269 | rbm.Option.OrderOfGibbsSampling) 270 | 271 | // keep recostructed visible 272 | if rbm.Option.UsePersistent { 273 | rbm.PersistentVisibleUnits[persistentIndex] = 274 | reconstructedVisible 275 | } 276 | 277 | // pre-computation that is used in gradient computation 278 | p_h_given_v1 := make([]float64, rbm.NumHiddenUnits) 279 | p_h_given_v2 := make([]float64, rbm.NumHiddenUnits) 280 | for i := 0; i < rbm.NumHiddenUnits; i++ { 281 | p_h_given_v1[i] = rbm.P_H_Given_V(i, v) 282 | p_h_given_v2[i] = rbm.P_H_Given_V(i, reconstructedVisible) 283 | } 284 | 285 | // Gompute gradient of W 286 | for i := 0; i < rbm.NumHiddenUnits; i++ { 287 | for j := 0; j < rbm.NumVisibleUnits; j++ { 288 | gradW[i][j] += p_h_given_v1[i]*v[j] - 289 | p_h_given_v2[i]*reconstructedVisible[j] 290 | } 291 | } 292 | 293 | // Gompute gradient of B 294 | for j := 0; j < rbm.NumVisibleUnits; j++ { 295 | gradB[j] += v[j] - reconstructedVisible[j] 296 | } 297 | 298 | // Gompute gradient of C 299 | for i := 0; i < rbm.NumHiddenUnits; i++ { 300 | gradC[i] += p_h_given_v1[i] - p_h_given_v2[i] 301 | } 302 | } 303 | 304 | // Normalized by size of mini-batch 305 | for i := 0; i < rbm.NumHiddenUnits; i++ { 306 | for j := 0; j < rbm.NumVisibleUnits; j++ { 307 | gradW[i][j] /= float64(len(data)) 308 | } 309 | } 310 | 311 | for j := 0; j < rbm.NumVisibleUnits; j++ { 312 | gradB[j] /= float64(len(data)) 313 | } 314 | 315 | for i := 0; i < rbm.NumHiddenUnits; i++ { 316 | gradC[i] /= float64(len(data)) 317 | } 318 | 319 | return gradW, gradB, gradC 320 | } 321 | 322 | func (rbm *RBM) UnSupervisedMiniBatchUpdate(batch [][]float64, 323 | epoch, miniBatchIndex int) { 324 | gradW, gradB, gradC := rbm.Gradient(batch, miniBatchIndex) 325 | 326 | // TODO fix 327 | momentum := 0.0 328 | if epoch > 5 { 329 | momentum = 0.0 330 | } 331 | 332 | // Update W 333 | for i := 0; i < rbm.NumHiddenUnits; i++ { 334 | for j := 0; j < rbm.NumVisibleUnits; j++ { 335 | grad := momentum*rbm.GradW[i][j] + 336 | rbm.Option.LearningRate*gradW[i][j] 337 | rbm.W[i][j] += grad 338 | if rbm.Option.L2Regularization { 339 | rbm.W[i][j] *= 340 | (1.0 - rbm.Option.RegularizationRate) 341 | } 342 | rbm.GradW[i][j] = grad 343 | } 344 | } 345 | 346 | // Update B 347 | for j := 0; j < rbm.NumVisibleUnits; j++ { 348 | grad := momentum*rbm.GradB[j] + rbm.Option.LearningRate*gradB[j] 349 | rbm.B[j] += grad 350 | rbm.GradB[j] = grad 351 | } 352 | 353 | // Update C 354 | for i := 0; i < rbm.NumHiddenUnits; i++ { 355 | grad := momentum*rbm.GradC[i] + rbm.Option.LearningRate*gradC[i] 356 | rbm.C[i] += grad 357 | rbm.GradC[i] = grad 358 | } 359 | } 360 | 361 | // Train performs Contrastive divergense learning algorithm. 362 | // The alrogithm is based on (mini-batch) Stochastic Gradient Ascent. 363 | func (rbm *RBM) Train(data [][]float64, option TrainingOption) error { 364 | rbm.Option = option 365 | opt := nnet.BaseTrainingOption{ 366 | Epoches: rbm.Option.Epoches, 367 | MiniBatchSize: rbm.Option.MiniBatchSize, 368 | Monitoring: rbm.Option.Monitoring, 369 | } 370 | 371 | // Peistent Contrastive learning 372 | if rbm.Option.UsePersistent { 373 | rbm.PersistentVisibleUnits = 374 | nnet.MakeMatrix(len(data), len(data[0])) 375 | copy(rbm.PersistentVisibleUnits, data) 376 | } 377 | 378 | s := nnet.NewTrainer(opt) 379 | return s.UnSupervisedMiniBatchTrain(rbm, data) 380 | } 381 | -------------------------------------------------------------------------------- /rbm/rbm_test.go: -------------------------------------------------------------------------------- 1 | package rbm 2 | 3 | import ( 4 | "math" 5 | "math/rand" 6 | "testing" 7 | "time" 8 | ) 9 | 10 | func createDummyData(size int) [][]float64 { 11 | rand.Seed(time.Now().UnixNano()) 12 | data := make([][]float64, size) 13 | for i := 0; i < size/2; i++ { 14 | sample := make([]float64, 2) 15 | sample[0] = math.Abs(rand.NormFloat64()*0.1 + 0.1) 16 | sample[1] = math.Abs(rand.NormFloat64()*0.1 + 0.1) 17 | data[i] = sample 18 | } 19 | rand.Seed(time.Now().UnixNano()) 20 | 21 | for i := size / 2; i < size; i++ { 22 | sample := make([]float64, 2) 23 | sample[0] = rand.NormFloat64()*0.1 + 0.7 24 | sample[1] = rand.NormFloat64()*0.1 + 0.7 25 | 26 | data[i] = sample 27 | } 28 | 29 | return data 30 | } 31 | 32 | func TestRBM(t *testing.T) { 33 | data := createDummyData(1000) 34 | 35 | // RBM Training 36 | numVisibleUnits := 2 37 | numHiddenUnits := 2 38 | r := New(numVisibleUnits, numHiddenUnits) 39 | option := TrainingOption{ 40 | LearningRate: 0.1, 41 | Epoches: 1000, 42 | OrderOfGibbsSampling: 1, 43 | MiniBatchSize: 20, 44 | L2Regularization: true, 45 | RegularizationRate: 1.0e-10, 46 | Monitoring: false, 47 | } 48 | 49 | err := r.Train(data, option) 50 | if err != nil { 51 | t.Error("Train returns nil, want non-nil.") 52 | } 53 | } 54 | 55 | func BenchmarkRBM(b *testing.B) { 56 | data := createDummyData(1000) 57 | 58 | // RBM Training 59 | numVisibleUnits := 2 60 | numHiddenUnits := 2 61 | r := New(numVisibleUnits, numHiddenUnits) 62 | option := TrainingOption{ 63 | LearningRate: 0.1, 64 | Epoches: 10, 65 | OrderOfGibbsSampling: 1, 66 | MiniBatchSize: 20, 67 | L2Regularization: false, 68 | RegularizationRate: 0.0001, 69 | Monitoring: false, 70 | } 71 | 72 | for i := 0; i < b.N; i++ { 73 | r.Train(data, option) 74 | } 75 | } 76 | -------------------------------------------------------------------------------- /train.go: -------------------------------------------------------------------------------- 1 | package nnet 2 | 3 | import ( 4 | "errors" 5 | "fmt" 6 | ) 7 | 8 | // SupervisedObjecitiver is an interface to provide objective function 9 | // for supervised training. 10 | type SupervisedObjectiver interface { 11 | SupervisedObjective(input, target [][]float64) float64 12 | } 13 | 14 | type SupervisedOnlineUpdater interface { 15 | SupervisedOnlineUpdate(input, target []float64) 16 | SupervisedObjectiver 17 | } 18 | 19 | type SupervisedMiniBatchUpdater interface { 20 | SupervisedMiniBatchUpdate(input, target [][]float64) 21 | SupervisedObjectiver 22 | } 23 | 24 | // UnSupervisedObjecitiver is an interface to provide objective function 25 | // for un-supervised training. 26 | type UnSupervisedObjectiver interface { 27 | UnSupervisedObjective(input [][]float64) float64 28 | } 29 | 30 | type UnSupervisedOnlineUpdater interface { 31 | UnSupervisedOnlineUpdate(input []float64) 32 | UnSupervisedObjectiver 33 | } 34 | 35 | type UnSupervisedMiniBatchUpdater interface { 36 | UnSupervisedMiniBatchUpdate(input [][]float64, epoch, 37 | miniBatchIndex int) 38 | UnSupervisedObjectiver 39 | } 40 | 41 | type Trainer struct { 42 | Option BaseTrainingOption 43 | } 44 | 45 | type BaseTrainingOption struct { 46 | Epoches int 47 | MiniBatchSize int // not used in standerd sgd 48 | Monitoring bool 49 | } 50 | 51 | // New creates a new instance from training option. 52 | func NewTrainer(option BaseTrainingOption) *Trainer { 53 | s := new(Trainer) 54 | s.Option = option 55 | return s 56 | } 57 | 58 | func (t *Trainer) ParseTrainingOption(option BaseTrainingOption) error { 59 | t.Option = option 60 | 61 | if t.Option.MiniBatchSize <= 0 { 62 | return errors.New("Number of mini-batchs must be larger than zero.") 63 | } 64 | if t.Option.Epoches <= 0 { 65 | return errors.New("Epoches must be larger than zero.") 66 | } 67 | 68 | return nil 69 | } 70 | 71 | func (s *Trainer) SupervisedOnlineTrain(u SupervisedOnlineUpdater, 72 | input, target [][]float64) error { 73 | for epoch := 0; epoch < s.Option.Epoches; epoch++ { 74 | for m := 0; m < len(input); m++ { 75 | u.SupervisedOnlineUpdate(input[m], target[m]) 76 | } 77 | if s.Option.Monitoring { 78 | fmt.Println(epoch, u.SupervisedObjective(input, target)) 79 | } 80 | } 81 | return nil 82 | } 83 | 84 | func (s *Trainer) SupervisedMiniBatchTrain(u SupervisedMiniBatchUpdater, 85 | input, target [][]float64) error { 86 | numMiniBatches := len(input) / s.Option.MiniBatchSize 87 | for epoch := 0; epoch < s.Option.Epoches; epoch++ { 88 | for m := 0; m < numMiniBatches; m++ { 89 | b := m * s.Option.MiniBatchSize 90 | e := (m + 1) * s.Option.MiniBatchSize 91 | u.SupervisedMiniBatchUpdate(input[b:e], target[b:e]) 92 | } 93 | if s.Option.Monitoring { 94 | fmt.Println(epoch, u.SupervisedObjective(input, target)) 95 | } 96 | } 97 | return nil 98 | } 99 | 100 | func (s *Trainer) UnSupervisedOnlineTrain(u UnSupervisedOnlineUpdater, 101 | input [][]float64) error { 102 | for epoch := 0; epoch < s.Option.Epoches; epoch++ { 103 | for m := 0; m < len(input); m++ { 104 | u.UnSupervisedOnlineUpdate(input[m]) 105 | } 106 | if s.Option.Monitoring { 107 | fmt.Println(epoch, u.UnSupervisedObjective(input)) 108 | } 109 | } 110 | return nil 111 | } 112 | 113 | func (s *Trainer) UnSupervisedMiniBatchTrain(u UnSupervisedMiniBatchUpdater, 114 | input [][]float64) error { 115 | numMiniBatches := len(input) / s.Option.MiniBatchSize 116 | for epoch := 0; epoch < s.Option.Epoches; epoch++ { 117 | for m := 0; m < numMiniBatches; m++ { 118 | b := m * s.Option.MiniBatchSize 119 | e := (m + 1) * s.Option.MiniBatchSize 120 | u.UnSupervisedMiniBatchUpdate(input[b:e], epoch, m) 121 | } 122 | if s.Option.Monitoring { 123 | fmt.Println(epoch, u.UnSupervisedObjective(input)) 124 | } 125 | } 126 | return nil 127 | } 128 | --------------------------------------------------------------------------------