├── README.md
├── matlab
    ├── 2layer
    │   ├── README.md
    │   ├── XOR_start.m
    │   ├── mse.png
    │   ├── nnEval.m
    │   ├── nnInit.m
    │   ├── nnTrain.m
    │   └── surface.png
    └── readme
├── tensorflow
    ├── README.txt
    ├── XOR.py
    ├── mnist_cnn.py
    ├── mnist_mlp.py
    └── mnist_mlp_essential.py
└── torch
    ├── README
    ├── pnn
        ├── datasetCreation.lua
        ├── pnn.lua
        └── training.lua
    └── xor
        ├── README.md
        ├── data
            ├── data.lua
            └── includes.lua
        ├── plots
            ├── includes.lua
            └── plots.lua
        ├── regularization
            ├── WeightDecayWrapper.lua
            └── includes.lua
        └── xor.lua


/README.md:
--------------------------------------------------------------------------------
 1 | # First-Day-At-Lab
 2 | 
 3 | Manual is available at http://arxiv.org/abs/1703.05298.
 4 | 
 5 | 
 6 | 
 7 | If you found it useful for your research, please cite as:
 8 | 
 9 | @misc{neural_networks_for_beginners,
10 | 
11 | Author = {Francesco Giannini and Vincenzo Laveglia and Alessandro Rossi and Dario Zanca and Andrea Zugarini},
12 | 
13 | Title = {Neural Networks for Beginners. A fast implementation in Matlab, Torch, TensorFlow},
14 | 
15 | Year = {2017},
16 | 
17 | Eprint = {arXiv:1703.05298},
18 | 
19 | }
20 | 


--------------------------------------------------------------------------------
/matlab/2layer/README.md:
--------------------------------------------------------------------------------
 1 | # Setting up the XOR problem
 2 | 
 3 | In this repository we show our simple implementation of a 2-layer ANN, just to give you some tips if you need (or would like) to design your own model. The code we present is basic and can be easily improved, but we try to keep it simple just to understand fundamental steps. In back-propagation we avoid for loops exploiting the Matlab efficiency with matrix operations. This is a key points and can substantially affect the running time for large data.
 4 | 
 5 | ## Data definition
 6 | The classification task on the boolean function XOR is a common not-linearly separabile instructional Machine Learning problem. Input data can be defined as a matrix:
 7 | ```matlab
 8 |    >> X = [0, 0, 1, 1; 0, 1, 0, 1]
 9 |    
10 |    X =
11 | 
12 |        0     0     1     1
13 |        0     1     0     1
14 |    ```
15 | To which we can assign the target:
16 | ```matlab
17 |    >> Y = [0, 1, 1, 0]
18 |    
19 |    Y =
20 | 
21 |        0     1     1     0
22 |    ```
23 | 
24 | 
25 | ## Network initialization
26 | 
27 | In our case for example, we save the variables of the network in a structure (named `nn`) for which we define three functions. [nnInit.m](https://github.com/alered87/First-Day-at-AI-lab/blob/master/matlab/2layer/nnInit.m) randomly initializes the weights and bias of the hidden (wH, bH) and the output (wO, bO) layers. Each input sample is assumed in column vector form, then the weights are organized in matrices with as many rows as the number of units in the layer (the desired number of hidden units in the hidden layer and the output size in output layer) and as many columns as the dimension of the input (the data dimension for the hidden layer and the number of hidden units in the output one). We can create a network to compute the XOR data passing the number of hidden units (10 in this case), the input and the output dimension to:
28 | ```matlab
29 |  nn = nnInit(10,2,1)
30 |  ```
31 | 
32 | The prediction of the network on data can be evaluated by  can by [nnEval.m](https://github.com/alered87/First-Day-at-AI-lab/blob/master/matlab/2layer/nnEval.m). This can be set by simple matrices multiplications, since the built-in Matlab functions which are used to compute the transfer function (i.e. _logsig_, _tanh_, etc) are vectorized (for example we implement the _ReLu_ activation by `max(A,0)`). The function [nnTrain.m](https://github.com/alered87/First-Day-at-AI-lab/blob/master/matlab/2layer/nnTrain.m) perform the training of the network with respect to the given input data and targets. We set up the training phase in an on-line setting via a for loops, but its straight-forward to implement a batch training by matrices multiplications. To start the training we have to provide as argument the structure containing variable (`nn``)), the defined input data `X` and its targets `Y`, the desired number of epochs of training and the learning rate, obtaing as output the trained structure:
33 | 
34 | 
35 | ```matlab
36 |  nn = nnTrain(nn,X,Y,1000,0.001)
37 |  ```
38 | We can visualize the performance during training of the network w.r.t penalty dur on data by
39 | 
40 | ```matlab
41 | plot(n.Loss);
42 | xlabel('Epochs of Training','FontSize',14)
43 | ylabel('MSE','FontSize',14)
44 | title('Training Error','FontSize',16)
45 | ```
46 | 
47 | ![](mse.png)
48 | 
49 | and to have an idea of the prediction of the network on the input space we can plot the separation surfaces evaluating the network on a space gridding: 
50 | 
51 | ```matlab
52 | bound = [-.5,2;-1,1.5]; % axis bound
53 | step = .3; % number of evaluation point
54 | 
55 | X = [ 0 0; 0 1; 1 0; 1 1]'; % input matrix
56 | Y = [0 1 1 0]; % target matrix
57 | 
58 | % generating space grid
59 | [xp1,xp2] = meshgrid(bound(1,1):step:bound(1,2),bound(2,1):step:bound(2,2));
60 | 
61 | % evaluation on space grid
62 | f = zeros(size(xp1));
63 | for i=1:size(xp1,1)
64 |     for j=1:size(xp1,2)
65 |         n = nnEval(n,[xp1(i,j);xp2(i,j)]);
66 |         f(i,j) = n.o;
67 | %         f(i,j) = n([xp1(i,j);xp2(i,j)]);
68 |     end
69 | end
70 | 
71 | pcolor(xp1,xp2,f); % plot of evaluation color
72 | shading interp; % removing gridding from plot
73 | colormap(jet); % setting colormap
74 | hold on;
75 | contour(xp1,xp2,f,[.5,.5],'LineWidth',2,'Color','k'); % drawing separation curve
76 | % drawing data points 
77 | scatter(X(1,[1,4]),X(2,[1,4]),200,'o','filled','MarkerEdgeColor','k','MarkerFaceColor','w','LineWidth',2);
78 | scatter(X(1,[2,3]),X(2,[2,3]),200,'d','filled','MarkerEdgeColor','k','MarkerFaceColor','w','LineWidth',2);
79 | % labeling data points
80 | c = {'X_1','X_2','X_3','X_4'};
81 | dx = [-.15, -.15, .1, .1];
82 | dy = [-.1, .1, -.1, .1];
83 | text(X(1,:)+dx, X(2,:)+dy, c, 'FontSize',14);
84 | colorbar;
85 | 
86 | % plot labels
87 | xlabel('X_1','FontSize',14)
88 | ylabel('X_2','FontSize',14)
89 | 
90 | title('Separation Surfaces','FontSize',16);
91 | h = legend({'Prediction','Classes Bound','Class 0','Class 1'},'Location','SouthEast');
92 | set(h,'FontSize',14);
93 | ```
94 | ![](surface.png)
95 | 


--------------------------------------------------------------------------------
/matlab/2layer/XOR_start.m:
--------------------------------------------------------------------------------
 1 | 
 2 | %  Quick start on XOR data
 3 | 
 4 | X = [0 0;0 1;1 0;1 1]'; % input matrix
 5 | Y = [0 1 1 0]; % target matrix
 6 | % uncomment to train on fuzzy distribution
 7 | % X = rand( 2, 100 );
 8 | % Y = ( X(1,:)-.5 ).*( X(2,:)-.5 ) < 0 ;
 9 | 
10 | %   model variables
11 | learningRate = 1e-3;
12 | epochsOfTraining = 2.5e4;
13 | hiddenUnits = 5;
14 | inputSize = size(X,1);
15 | outputSize = size(Y,1);
16 | 
17 | %   initialization
18 | n = nnInit(hiddenUnits,inputSize,outputSize);
19 | 
20 | %   training
21 | n = nnTrain(n,X,Y,epochsOfTraining,learningRate);
22 | 
23 | 
24 | 
25 | % % % % % plot Loss of network in time
26 | 
27 | figure;
28 | 
29 | % plot Loss of network in time
30 | subplot(1,2,1);
31 | plot(n.Loss);
32 | xlabel('Epochs of Training','FontSize',14)
33 | ylabel('MSE','FontSize',14)
34 | title('Training Error','FontSize',16)
35 | 
36 | 
37 | % plot separation surface
38 | subplot(1,2,2);
39 | 
40 | bound = [-.5,2;-1,1.5]; % axis bound
41 | step = .3; % number of evaluation point
42 | 
43 | % generating space grid
44 | [xp1,xp2] = meshgrid(bound(1,1):step:bound(1,2),bound(2,1):step:bound(2,2));
45 | 
46 | % evaluation on space grid
47 | f = zeros(size(xp1));
48 | for i=1:size(xp1,1)
49 |     for j=1:size(xp1,2)
50 |         n = nnEval(n,[xp1(i,j);xp2(i,j)]);
51 |         f(i,j) = n.o;
52 |     end
53 | end
54 | 
55 | pcolor(xp1,xp2,f); % plot of evaluation color
56 | shading interp; % removing gridding from plot
57 | colormap(jet); % setting colormap
58 | hold on;
59 | contour(xp1,xp2,f,[.5,.5],'LineWidth',2,'Color','k'); % drawing separation curve
60 | % drawing data points 
61 | X = [0 0;0 1;1 0;1 1]'; % input matrix
62 | Y = [0 1 1 0]; % target matrix
63 | scatter(X(1,[1,4]),X(2,[1,4]),200,'o','filled','MarkerEdgeColor','k','MarkerFaceColor','w');
64 | scatter(X(1,[2,3]),X(2,[2,3]),200,'d','filled','MarkerEdgeColor','k','MarkerFaceColor','w');
65 | % labeling data points
66 | c = {'X_1','X_2','X_3','X_4'};
67 | dx = [-.15, -.15, .1, .1];
68 | dy = [-.1, .1, -.1, .1];
69 | text(X(1,:)+dx, X(2,:)+dy, c, 'FontSize',14);
70 | colorbar;
71 | 
72 | % plot labels
73 | xlabel('X_1','FontSize',14)
74 | ylabel('X_2','FontSize',14)
75 | 
76 | title('Separation Surfaces','FontSize',16);
77 | h = legend({'Prediction','Classes Bound','Class 0','Class 1'},'Location','SouthEast');
78 | set(h,'FontSize',14);
79 | 


--------------------------------------------------------------------------------
/matlab/2layer/mse.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AILabUSiena/NeuralNetworksForBeginners/e0ebbad3da296a5195983510e99538c14dd789ca/matlab/2layer/mse.png


--------------------------------------------------------------------------------
/matlab/2layer/nnEval.m:
--------------------------------------------------------------------------------
 1 | function nn = nnEval(nn,X)
 2 | % nnEval : compute values of neurons 'a' and its activation 'z' for hidden
 3 | %          and output layers of the 2-layer Neural Network structure 'nn' 
 4 | %          on the provided input 'X'
 5 | %
 6 | %     nn = nnEval(nn,X)
 7 | %
 8 | %     nn : structure containing variables for a 2-layer Neural Network, 
 9 | %          assuming ReLu as activation and linear output 
10 | %     X : (input_size)-by-(number_of_samples) data matrix
11 | 
12 | nn.aH = nn.wH * X + repmat(nn.bH,1,size(X,2)); % hidden connection computing
13 | 
14 | nn.zH = max(nn.aH,0); % activation
15 | 
16 | nn.o = nn.wO * nn.zH + repmat(nn.bO,1,size(X,2)); % output connection computing
17 | 
18 | end
19 | 
20 | 


--------------------------------------------------------------------------------
/matlab/2layer/nnInit.m:
--------------------------------------------------------------------------------
 1 | function nn = nnInit(hiddenLayerSize,inputSize,outputSize)
 2 | % nnInit : initialization of the variable of a 2-layers neural network
 3 | %
 4 | %     nn = nnInit(hiddenLayerSize,inputSize,outputSize)
 5 | %
 6 | %     hiddenLayersSize : number of hidden units for the hidden layer
 7 | %     inputSize : input space dimension
 8 | %     outputSize : output space dimension
 9 | 
10 | nn.wH = rand(hiddenLayerSize,inputSize)*0.5-0.2; % hidden layer weights
11 | nn.bH = rand(hiddenLayerSize,1)*0.5-0.2; % hidden layer bias
12 |     
13 | nn.wO = rand(outputSize,hiddenLayerSize)*0.5-0.2; % output layer weights
14 | nn.bO = rand(outputSize,1)*0.5-0.2; % output layer bias
15 | 
16 | nn.Loss = [];
17 | nn.Accuracy = [];
18 | 
19 | 


--------------------------------------------------------------------------------
/matlab/2layer/nnTrain.m:
--------------------------------------------------------------------------------
 1 | function nn = nnTrain(nn,X,Y,maxEpochs,eta)
 2 | % nnTrain : perform an on-line neural network training on the structure
 3 | %           'nn' on the input X with target 'Y'
 4 | %
 5 | %     nn = nnTrain(nn,X,Y,maxEpochs)
 6 | %
 7 | %     nn : structure containing variables to implement a simple Neural
 8 | %          Network (see 'nnInit','nnEval' for variables explanation)
 9 | %     X : (input_size)-by-(number_of_samples) data matrix
10 | %     Y : (output_size)-by-(number_of_samples) target matrix for X
11 | %     maxEpochs : max number of training epochs
12 | %     eta : learning rate
13 | 
14 | if size(Y,1)>1
15 |     labels = vec2ind(Y); % labels for multi-class accuracy
16 | else
17 |     cT = (max(Y)+min(Y))/2; % classes threshold for 2-class accuracy
18 |     labels = Y>cT;
19 | end
20 | 
21 | i = 1;
22 | 
23 | while i <= maxEpochs
24 |     
25 |     for j = 1 : size(X,2)
26 |         
27 |         %   forward propagation
28 |         nn = nnEval(nn,X(:,j));
29 |         
30 |         %   Square Error derivative evaluation
31 |         delta = nn.o - Y(:,j);
32 |         
33 |         %   gradients computing
34 |         % dE/dwO = (dE/do)*(do/dwO)
35 |         nn.D_wO = delta * nn.zH'; % output layer weights
36 |         % dE/dbO = (dE/do)*(do/dbO)  (do/dbO = 1)
37 |         nn.D_bO = delta; % output layer bias
38 |         % back-propagation
39 |         % dE/dbH = (dE/do)*(do/dzH)*(dzH/daH)*(daH/dbH) , (daH/dbH = 1)
40 |         nn.D_bH = (nn.wO'*delta).*(nn.aH>0); % hidden layer bias
41 |         % dE/dwH = (dE/do)*(do/dzH)*(dzH/daH)*(daH/dwH)
42 |         nn.D_wH = nn.D_bH * X(:,j)' ; % hidden layer weights 
43 |         
44 |         %   updating
45 |         nn.wO = nn.wO - eta*nn.D_wO;
46 |         nn.bO = nn.bO - eta*nn.D_bO;
47 |         nn.wH = nn.wH - eta*nn.D_wH;
48 |         nn.bH = nn.bH - eta*nn.D_bH;
49 |          
50 |     end
51 |     
52 |     %   error evaluating
53 |     nn = nnEval(nn,X);
54 |     MSE = 0.5*mean(mean((nn.o-Y).^2,1),2);
55 |     nn.Loss = [nn.Loss,MSE];
56 |     %   classification accuracy
57 |     if size(Y,1)>1
58 |         Accuracy = mean(vec2ind(nn.o) == labels);
59 |     else
60 |         Accuracy =  mean((nn.o>cT)==labels);
61 |     end
62 |     nn.Accuracy = [nn.Accuracy,Accuracy];
63 |     
64 |     fprintf('Epoch of training: %i/%i - Error: %f \n',i,maxEpochs,nn.Loss(end));
65 |     
66 |     if (nn.Loss(end)<1e-4)&&(nn.Accuracy(end)>0.99) % stopping criterion
67 |         break;
68 |     end
69 |     
70 |     i = i + 1;
71 |     
72 | end
73 | 
74 | 
75 | 
76 | end
77 | 


--------------------------------------------------------------------------------
/matlab/2layer/surface.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AILabUSiena/NeuralNetworksForBeginners/e0ebbad3da296a5195983510e99538c14dd789ca/matlab/2layer/surface.png


--------------------------------------------------------------------------------
/matlab/readme:
--------------------------------------------------------------------------------
1 | In this repository we present some MATLAB scripts for a simple Neural Network implementation
2 | 


--------------------------------------------------------------------------------
/tensorflow/README.txt:
--------------------------------------------------------------------------------
 1 | Description of the folder contents
 2 | 
 3 | XOR.py 
 4 | Implementation of a 2-layers neural network to solve the XOR problem.
 5 | It includes: definition of the dataset, definition and training of the model, plot separation surfaces.
 6 | 
 7 | mnist_mlp_essential.py
 8 | Minimal implementation of a 2-layers neural network to solve the MNIST problem.
 9 | It only includes: download and loading of the dataset, definition and training of the model, plot the gradient.
10 | 
11 | mnist_mlp.py
12 | Implementation of a 2-layers neural network to solve the MNIST problem.
13 | It includes: download and loading of the dataset, definition and mini-batch training of the model, regularization criterion and validation check, tools functions, plot the gradient.
14 | 
15 | mnist_cnn.py 
16 | Implementation of a convolutional neural network to solve the MNIST problem.
17 | It includes: download and loading of the dataset, definition and training of the model, plot the filters.
18 | 


--------------------------------------------------------------------------------
/tensorflow/XOR.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | @author: Dario Zanca, Vincenzo Laveglia
 3 | @date: 03-Nov-2016
 4 | @summary: 2-layers neural network to solve XOR
 5 | '''
 6 | 
 7 | import numpy as np
 8 | import tensorflow as tf
 9 | import matplotlib.pyplot as plt
10 | 
11 | "Dataset definition"
12 | 
13 | INPUT = np.array([[0,0],[0,1],[1,0],[1,1]])
14 | TARGET = np.array([[0],[1],[1],[0]])
15 | 
16 | HU = 3 # number of hidden units
17 | Epochs = 10001
18 | 
19 | "Define symbolic variables"
20 | 
21 | x_ = tf.placeholder(tf.float32, shape=[None,2]) # for the input
22 | y_ = tf.placeholder(tf.float32, shape=[None,1]) # for the target
23 | 
24 | "Definition of the Model"
25 | 
26 | # First layer
27 | W1 = tf.Variable(tf.random_uniform([2,HU], -1.0, 1.0))
28 | b1 = tf.Variable(tf.zeros([HU]))
29 | 
30 | O = tf.sigmoid(tf.matmul(x_, W1) + b1)
31 | 
32 | # Second layer
33 | W2 = tf.Variable(tf.random_uniform([HU,1], -1.0, 1.0))
34 | b2 = tf.Variable(tf.zeros([1]))
35 | 
36 | y = tf.sigmoid(tf.matmul(O, W2) + b2)
37 | 
38 | "Definition of the cost function and optimizer"
39 | 
40 | cost = tf.reduce_mean(tf.square(y_ - y))
41 | 
42 | train_step = tf.train.GradientDescentOptimizer(0.1).minimize(cost)
43 | 
44 | "Start Session"
45 | 
46 | init = tf.global_variables_initializer()
47 | sess = tf.Session()
48 | sess.run(init)
49 | 
50 | "Training"
51 | for i in range(Epochs):
52 |         sess.run(train_step, feed_dict={x_: INPUT, y_: TARGET})
53 |         if i % 1000 == 0:
54 |             print('Epoch:', i, ' -- Cost:', sess.run(cost, feed_dict={x_: INPUT, y_: TARGET}))
55 |             #print('Output ', sess.run(y, feed_dict={x_: INPUT, y_: TARGET}))
56 |             #print('W1 ', sess.run(W1))
57 |             #print('b1 ', sess.run(b1))
58 |             #print('W2 ', sess.run(W2))
59 |             #print('b2 ', sess.run(b2))
60 |             
61 |         
62 | 
63 | "Test the trained model"
64 | 
65 | correct_prediction = abs(y_ - y) < 0.5 
66 | cast = tf.cast(correct_prediction, "float")
67 | accuracy = tf.reduce_mean(cast)
68 | 
69 | 
70 | yy, cc, aa = sess.run([y, cast, accuracy],feed_dict={x_: INPUT, y_: TARGET})
71 | print "\n\n\n Final Accuracy: ", aa
72 | 
73 | "Draw separation surfaces"
74 | plt.figure()
75 | # Plotting dataset
76 | c1 = plt.scatter([1,0], [0,1], marker='s', color='gray', s=100)
77 | c0 = plt.scatter([1,0], [1,0], marker='^', color='gray', s=100)
78 | # Generating points in [-1,2]x[-1,2]
79 | DATA_x = (np.random.rand(10**6,2)*3)-1
80 | DATA_y = sess.run(y,feed_dict={x_: DATA_x})
81 | # Selecting borderline predictions
82 | ind = np.where(np.logical_and(0.49 < DATA_y, DATA_y< 0.51))[0]
83 | DATA_ind = DATA_x[ind]
84 | # Plotting separation surfaces
85 | ss = plt.scatter(DATA_ind[:,0], DATA_ind[:,1], marker='_', color='black', s=5)
86 | # Some figure's settings
87 | plt.legend((c1, c0, ss), ('Class 1', 'Class 0', 'Separation surfaces'), scatterpoints=1)
88 | plt.xlabel('Input x1')
89 | plt.ylabel('Input x2')
90 | plt.axis([-1,2,-1,2])
91 | plt.show()
92 | 
93 | 


--------------------------------------------------------------------------------
/tensorflow/mnist_cnn.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | @author: Dario Zanca, Vincenzo Laveglia
  3 | @date: 03-Nov-2016
  4 | @summary: CNN to solve MNIST
  5 | '''
  6 | 
  7 | import tensorflow as tf	
  8 | import numpy as np
  9 | import matplotlib.pyplot as plt
 10 | 
 11 | from tensorflow.examples.tutorials.mnist import input_data
 12 | 
 13 | 
 14 | mnist = input_data.read_data_sets('MNIST_data', one_hot=True)
 15 | 
 16 | x = tf.placeholder(tf.float32, shape=[None, 784])
 17 | y_ = tf.placeholder(tf.float32, shape=[None, 10])
 18 | 
 19 | #____________tools_______________________
 20 | 
 21 | def weight_variable(shape):
 22 |   initial = tf.truncated_normal(shape, stddev=0.1)
 23 |   return tf.Variable(initial)
 24 | 
 25 | def bias_variable(shape):
 26 |   initial = tf.constant(0.1, shape=shape)
 27 |   return tf.Variable(initial)
 28 | 
 29 | # strides: 1, padding: 0
 30 | def conv2d(x, W):
 31 |   return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME')#strides shape???
 32 | 
 33 | # max pooling over 2x2 blocks
 34 | def max_pool_3x3(x):
 35 |   return tf.nn.max_pool(x, ksize=[1, 3, 3, 1], # ksize is the size of the sliding window (for each dimension)
 36 |                         strides=[1, 3, 3, 1], padding='SAME')#strides shapes indicate how the sliding window moves
 37 | 
 38 | #_____________ model_____________________
 39 | 
 40 | #convolution layer1 - compute 32 features for each 5x5 patch
 41 | INPUT_C1 = 1
 42 | OUTPUT_C1 = 12
 43 | W_conv1 = weight_variable([5, 5, INPUT_C1, OUTPUT_C1]) #shape:[patch_size_x, patch_size_y, input_channels, output_channels]
 44 | b_conv1 = bias_variable([OUTPUT_C1])
 45 | 
 46 | # reshape the tensor x (dataset of images (None x 728)) to a 4D tensor
 47 | #2nd and 3rd arguments are the image width and height
 48 | #4th argument corresponds to the number of color channels
 49 | #1st arguments: the dimensione has to be computed,
 50 | # in our case is the size of the batch
 51 | x_image = tf.reshape(x, [-1,28,28,1]) 
 52 | 
 53 | #convolution step
 54 | h_conv1 = tf.nn.relu( conv2d(x_image, W_conv1) + b_conv1 )
 55 | 
 56 | #max pooling step
 57 | h_pool1 = max_pool_3x3(h_conv1)
 58 | 
 59 | #convolution layer2 - 
 60 | INPUT_C2 = OUTPUT_C1
 61 | OUTPUT_C2 = 16
 62 | W_conv2 = weight_variable([5, 5, INPUT_C2, OUTPUT_C2])
 63 | b_conv2 = bias_variable([OUTPUT_C2])
 64 | 
 65 | #convolution step
 66 | h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2)
 67 | 
 68 | #max pooling step
 69 | h_pool2 = max_pool_3x3(h_conv2)
 70 | 
 71 | '''
 72 | We have a 7x7 image. Reshape it and fed in a fully connected network.
 73 | '''
 74 | #create the layer and baias
 75 | FS = 4 # final size, it is possible to compute it! (I left you as exercize
 76 | W_fc1 = weight_variable([ FS * FS * OUTPUT_C2, 1024])
 77 | b_fc1 = bias_variable([1024])
 78 | 
 79 | #reshape images
 80 | h_pool2_flat = tf.reshape(h_pool2, [-1, FS * FS * OUTPUT_C2])
 81 | 
 82 | #forward step
 83 | h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, W_fc1) + b_fc1)
 84 | 
 85 | #output layer
 86 | W_fc2 = weight_variable([1024, 10])
 87 | b_fc2 = bias_variable([10])
 88 | 
 89 | #forward step
 90 | y_conv = tf.matmul(h_fc1, W_fc2) + b_fc2
 91 | 
 92 | ''' 
 93 | Here follows the code to train and evaluate the model
 94 | '''
 95 | cross_entropy = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(y_conv, y_))
 96 | 
 97 | train_step = tf.train.AdamOptimizer(1e-4).minimize(cross_entropy)
 98 | 
 99 | correct_prediction = tf.equal(tf.argmax(y_conv,1), tf.argmax(y_,1))
100 | 
101 | accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
102 | 
103 | #sess = tf.Session()
104 | sess = tf.InteractiveSession()
105 | sess.run(tf.global_variables_initializer())
106 | 
107 | # Early stopping setup, to check on validation set
108 | prec_err = 10**6 # just a very big vaLue
109 | val_count = 0
110 | val_max_steps = 6
111 | 
112 | # Training specs
113 | epochs = 100
114 | BATCH_SIZE = 1000
115 | num_of_batches = 60000/BATCH_SIZE
116 | 
117 | i=1
118 | while i <= epochs and val_count < val_max_steps:
119 | 
120 |     print 'Epoch:', i, '(Early stopping criterion: ', val_count, '/', val_max_steps, ')'
121 | 
122 |     for j in range(num_of_batches):
123 |         # training step
124 |         batch = mnist.train.next_batch(BATCH_SIZE)
125 |         sess.run(train_step, feed_dict={x: batch[0], y_: batch[1]})
126 | 
127 |     # visualize accuracy each 10 epochs
128 |     if i == 1 or i%10 == 0:
129 |         train_accuracy = accuracy.eval(feed_dict={x: mnist.train.images, y_: mnist.train.labels})    
130 |         test_accuracy = accuracy.eval(feed_dict={x: mnist.test.images, y_: mnist.test.labels})
131 |         print("\nAccuracy at epoch %d: train accuracy %g, test accuracy %g\n"%(i, train_accuracy, test_accuracy))
132 | 
133 |     # validation check
134 |     curr_err = sess.run(cross_entropy, feed_dict={x: mnist.validation.images, y_: mnist.validation.labels})
135 |     if curr_err >= prec_err*0.9999:
136 |         val_count = val_count + 1
137 |     else:
138 |         val_count = 0
139 |     prec_err = curr_err
140 | 
141 |     i+=1
142 | 
143 | 
144 | print("\n\nResult:\nTest accuracy %g" % accuracy.eval(feed_dict={x: mnist.test.images, y_: mnist.test.labels}))
145 | 
146 | 
147 | "Visualize filters"
148 | 
149 | FILTERS = W_conv1.eval()
150 | 
151 | fig = plt.figure()
152 | 
153 | for i in range(np.shape(FILTERS)[3]):
154 |     ax = fig.add_subplot(2, 6, i+1)
155 |     ax.matshow(FILTERS[:,:,0,i], cmap='gray')
156 | plt.show()
157 | 


--------------------------------------------------------------------------------
/tensorflow/mnist_mlp.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | @author: Dario Zanca, Vincenzo Laveglia
  3 | @date: 03-Nov-2016
  4 | @summary: 2-layers neural network to solve MNIST
  5 | '''
  6 | 
  7 | import tensorflow as tf	
  8 | import numpy as np
  9 | import matplotlib.pyplot as plt
 10 | 
 11 | ##########################################################
 12 | ''' tools definition'''
 13 | 
 14 | def init_weights(shape):
 15 | 	return tf.Variable(tf.random_uniform(shape, -0.1, 0.1))
 16 | 
 17 | def mlp_output(X, W_h, W_o, b_h, b_o):
 18 | 	ak = tf.matmul(X, W_h) + b_h
 19 | 	O = tf.nn.relu(ak) #output layer 1
 20 | 	
 21 | 	a2 = tf.matmul(O, W_o) + b_o
 22 | 	o2 = tf.nn.softmax(a2)  #output layer2
 23 | 	return o2
 24 | 
 25 | #########################################################
 26 | ''' data preparation '''
 27 | 
 28 | from tensorflow.examples.tutorials.mnist import input_data
 29 | mnist = input_data.read_data_sets("MNIST_data/", one_hot=True)
 30 | 
 31 | x_dim = 784
 32 | y_dim = 10
 33 | 
 34 | x = tf.placeholder(tf.float32, [None, x_dim])
 35 | 
 36 | #target output
 37 | y_ = tf.placeholder(tf.float32, [None, y_dim])
 38 | 
 39 | ########################################################
 40 | ''' model creation '''
 41 | 
 42 | h_layer_dim = 10
 43 | epochs = 1000
 44 | LEARNING_RATE = 10**-4
 45 | 
 46 | W1 = init_weights([x_dim, h_layer_dim])
 47 | b1 = init_weights([h_layer_dim])
 48 | 
 49 | W2 = init_weights([h_layer_dim, y_dim])
 50 | b2 = init_weights([y_dim])
 51 | 
 52 | #predicted output
 53 | y = mlp_output(x, W1, W2, b1, b2)
 54 | 
 55 | # Loss
 56 | cross_entropy = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(y, y_))
 57 | 
 58 | # regularization term
 59 | regularization = tf.reduce_sum(tf.square(W1), [0, 1]) + tf.reduce_sum(tf.square(W2), [0, 1])
 60 | 
 61 | train_step = tf.train.AdamOptimizer(LEARNING_RATE).minimize(cross_entropy + 10**-4 * regularization)
 62 | 
 63 | ########################################################
 64 | ''' model running and evaluation'''
 65 | 
 66 | sess = tf.Session()
 67 | sess.run(tf.global_variables_initializer())
 68 | 
 69 | errors_train=[]
 70 | errors_test=[]
 71 | errors_val=[]
 72 | 
 73 | # Early stopping setup, to check on validation set
 74 | prec_err = 10**6 # just a very big vaLue
 75 | val_count = 0
 76 | val_max_steps = 6
 77 | 
 78 | BATCH_SIZE = np.shape(mnist.train.images)[0]
 79 | MINI_BATCH_SIZE = 1000
 80 | 
 81 | i = 1
 82 | while i <= epochs and val_count < val_max_steps:
 83 | 
 84 | 	for j in range(BATCH_SIZE/MINI_BATCH_SIZE): 
 85 | 		batch_xs, batch_ys = mnist.train.next_batch(MINI_BATCH_SIZE)
 86 | 		sess.run(train_step, feed_dict={x: batch_xs, y_: batch_ys})
 87 | 
 88 | 	curr_err = sess.run(cross_entropy, feed_dict={x: mnist.validation.images, y_: mnist.validation.labels})
 89 | 	if curr_err >= prec_err*0.9999:
 90 | 		val_count = val_count + 1
 91 | 	else:
 92 | 		val_count = 0
 93 | 	prec_err = curr_err
 94 | 	
 95 | 	if i % 1 == 0:
 96 | 		errors_val.append(curr_err)
 97 | 		c_test = sess.run(cross_entropy, feed_dict={x: mnist.test.images, y_: mnist.test.labels})
 98 | 		errors_test.append(c_test)
 99 | 		c_train = sess.run(cross_entropy, feed_dict={x: mnist.train.images, y_: mnist.train.labels})
100 | 		errors_train.append(c_train)
101 | 		print "\n\nEPOCH: ",i, "/", epochs,"\n  TRAIN ERR: ", c_train, "\n  VALIDATION ERR: ", curr_err, "\n  TEST ERR: ", c_test,
102 | 		print "\n(Early stopping criterion: ", val_count, "/", val_max_steps, ")"
103 | 	i = i+1
104 | 
105 | correct_prediction = tf.equal(tf.argmax(y,1), tf.argmax(y_,1))
106 | accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
107 | 
108 | aa = sess.run(accuracy, feed_dict={x: mnist.test.images, y_: mnist.test.labels})
109 | print "Accuracy: ", aa
110 | 
111 | 
112 | "Plot errors"
113 | E = range(np.shape(errors_train)[0])
114 | E = np.asanyarray(E)*1
115 | line_train, = plt.plot(E, errors_train)
116 | line_test, = plt.plot(E, errors_test)
117 | line_val, = plt.plot(E, errors_val)
118 | plt.legend([line_train, line_val, line_test], ['Training', 'Validation', 'Test'])
119 | plt.ylabel('Cross-Entropy')
120 | plt.xlabel('Epochs')
121 | plt.show()
122 | 
123 | 


--------------------------------------------------------------------------------
/tensorflow/mnist_mlp_essential.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | @author: Dario Zanca
 3 | @summary: 2-layers neural network to solve MNIST
 4 | '''
 5 | 
 6 | import tensorflow as tf	
 7 | import numpy as np
 8 | 
 9 | ########################################################
10 | ''' data preparation '''
11 | 
12 | from tensorflow.examples.tutorials.mnist import input_data
13 | mnist = input_data.read_data_sets("MNIST_data/", one_hot=True)
14 | 
15 | x_dim = 784
16 | y_dim = 10
17 | 
18 | x_ = tf.placeholder(tf.float32, [None, x_dim])
19 | y_ = tf.placeholder(tf.float32, [None, y_dim])
20 | 
21 | ########################################################
22 | ''' model definition '''
23 | 
24 | h_layer_dim = 10
25 | epochs = 100
26 | 
27 | W1 = tf.Variable(tf.random_uniform([x_dim, h_layer_dim],-0.1, 0.1))
28 | b1 = tf.Variable(tf.random_uniform([h_layer_dim],-0.1, 0.1))
29 | 
30 | h1 = tf.nn.relu(tf.matmul(x_,W1)+b1) # hidden layer
31 | 
32 | W2 = tf.Variable(tf.random_uniform([h_layer_dim, y_dim],-0.1, 0.1))
33 | b2 = tf.Variable(tf.random_uniform([y_dim],-0.1, 0.1))
34 | 
35 | y = tf.nn.relu(tf.matmul(h1,W2)+b2) # prediction
36 | 
37 | MSE = tf.losses.mean_squared_error(y_, y) # Mean squared error
38 | 
39 | train_step = tf.train.AdamOptimizer(0.01).minimize(MSE)
40 | 
41 | correct_prediction = tf.equal(tf.argmax(y,1), tf.argmax(y_,1))
42 | accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
43 | 
44 | ########################################################
45 | ''' model running and evaluation'''
46 | 
47 | sess = tf.Session()
48 | sess.run(tf.global_variables_initializer())
49 | 
50 | for i in range(epochs): 
51 | 	sess.run(train_step, feed_dict={x_: mnist.train.images, y_: mnist.train.labels})
52 | 	print "\nEpoch: ", i+1 ,"/", epochs, " -- MSE =", sess.run(MSE, feed_dict={x_: mnist.train.images, y_: mnist.train.labels})
53 | 	
54 | # final result
55 | print "\nAccuracy on test: ", sess.run(accuracy, feed_dict={x_: mnist.test.images, y_: mnist.test.labels})
56 | 


--------------------------------------------------------------------------------
/torch/README:
--------------------------------------------------------------------------------
1 | This folder collects (not yet) some useful examples to learn torch and lua for machine learning applications.
2 | 


--------------------------------------------------------------------------------
/torch/pnn/datasetCreation.lua:
--------------------------------------------------------------------------------
 1 | function kernel(x_1,x_0,h)
 2 | 	if ((torch.abs(x_1-x_0))/h <= 0.5 ) then
 3 | 		return 1;
 4 | 	else
 5 | 		return 0;
 6 | 	end
 7 | end
 8 | 
 9 | function parzenPDF(input, h_0)
10 | 	-- this function returns the target constructed using the parzen window pdf estimation on a given input
11 | 	assert(input, "no data has been provided ")
12 | 	h_0 = h_0 or 16
13 | 
14 | 	n = input:size(1)
15 | 	ker = torch.zeros(n):typeAs(input);
16 | 	target = torch.zeros(n):typeAs(input);
17 | 	h = h_0/torch.sqrt(n);
18 | 	for i = 1,n do
19 | 		for j = 1,n do
20 | 			if i ~= j then		
21 | 				ker[i] = ker[i] + kernel(input[i],input[j],h)
22 | 			end
23 | 		end
24 | 		target[i] = ker[i]/(n*h)
25 | 	end
26 | 	print(target)
27 | 	return target
28 | end
29 | 
30 | function truePDF(x)
31 | 	-- normal distribution
32 | 	local x = -torch.pow(x,2)/2
33 | 	local expx = x:exp()
34 | 	return expx*(1/torch.sqrt(math.pi*2))
35 | end
36 | 
37 | 
38 | 
39 | 
40 | 


--------------------------------------------------------------------------------
/torch/pnn/pnn.lua:
--------------------------------------------------------------------------------
 1 | require 'torch'
 2 | require 'nn'
 3 | require 'gnuplot'
 4 | 
 5 | require 'datasetCreation.lua'
 6 | require 'training.lua'
 7 | 
 8 | local options = {}
 9 | options.nTrainExamples = 2000
10 | options.nTestExamples = 1000
11 | options.h_0 = 16
12 | options.nepochs = 10
13 | options.eta = 0.01
14 | options.cuda = false -- ENABLE CUDA only if you have it and don't forget to install cutorch and cunn packagess
15 | 
16 | 
17 | local x = torch.randn(options.nTrainExamples) -- create a tensor of values taken from a normal distribution with mean=0 and std=1
18 | local xtest = torch.randn(options.nTestExamples,1)
19 | 
20 | local truePdf = truePDF(x)
21 | gnuplot.plot(x,truePdf, '+')
22 | gnuplot.title("True PDF")
23 | gnuplot.figure()
24 | 
25 | local target = parzenPDF(x, options.h_0)
26 | gnuplot.plot(x,target,'+')
27 | gnuplot.title("PDF estimated with Parzen Window")
28 | gnuplot.figure()
29 | 
30 | 
31 | ---- PNN creation ----
32 | local mlp = nn.Sequential() 
33 | inputs = 1 outputs = 1 HUs = 10 -- parameters
34 | mlp:add(nn.Linear(inputs, HUs))
35 | mlp:add(nn.Tanh())
36 | mlp:add(nn.Linear(HUs, outputs))
37 | mlp:add(nn.ReLU())
38 | local criterion = nn.MSECriterion()  
39 | 
40 | if options.cuda then
41 | 	require 'cutorch'
42 | 	require 'cunn'
43 | 	mlp:cuda()
44 | 	criterion:cuda()
45 | 	target = target:cuda()
46 | 	x = x:cuda()
47 | 	xtest = xtest:cuda()
48 | 	batchTraining(mlp, criterion, x, target, options.nepochs, options.eta)
49 | else 
50 | 	onlineTraining(mlp, criterion, x, target, options.nepochs, options.eta)
51 | end
52 | 
53 | --PNN training
54 | --onlineTraining(mlp, criterion, x, target, options.nepochs, options.eta)
55 | 
56 | 
57 | --PNN test
58 | 
59 | 
60 | testOutput = mlp:forward(xtest)
61 | 
62 | gnuplot.plot(xtest:view(-1):double(),testOutput:view(-1):double(),'+')
63 | gnuplot.title("PDF estimated with the PNN")
64 | gnuplot.figure()
65 | 


--------------------------------------------------------------------------------
/torch/pnn/training.lua:
--------------------------------------------------------------------------------
 1 | function onlineTraining(mlp, criterion, y, target, nepochs, eta)
 2 | 	local nepochs = nepochs or 10 -- how to assign default values to function parameters
 3 | 	local eta = eta or 0.01 -- how to assign default values to function parameters		
 4 | 
 5 | 	local input = torch.Tensor(1);
 6 | 	local output = torch.Tensor(1);
 7 | 
 8 | 	for e = 1,nepochs do
 9 | 		for k = 1,n do
10 | 			input[1] = y[k]
11 | 			output[1] = target[k]
12 | 		  	criterion:forward(mlp:forward(input), output)
13 | 			mlp:zeroGradParameters()
14 | 			-- (2) accumulate gradients
15 | 			mlp:backward(input, criterion:backward(mlp.output, output))
16 | 			-- (3) update parameters with a 0.01 learning rate
17 | 			mlp:updateParameters(eta)
18 | 		end
19 | 	end
20 | end
21 | 
22 | function batchTraining(mlp, criterion, input, target, nepochs, eta)
23 | 	local nepochs = nepochs or 10 -- how to assign default values to function parameters
24 | 	local eta = eta or 0.01 -- how to assign default values to function parameters		
25 | 
26 | 	local input = input:view(-1,1)
27 | 	local target = target:view(-1,1)
28 | 	print(input:size())
29 | 	print(target:size())
30 | 	for e = 1,nepochs do
31 | 		for k = 1,n do			
32 | 		  	criterion:forward(mlp:forward(input), target)
33 | 			mlp:zeroGradParameters()
34 | 			-- (2) accumulate gradients
35 | 			mlp:backward(input, criterion:backward(mlp.output, target))
36 | 			-- (3) update parameters with a 0.01 learning rate
37 | 			mlp:updateParameters(eta)
38 | 		end
39 | 	end
40 | end
41 | 


--------------------------------------------------------------------------------
/torch/xor/README.md:
--------------------------------------------------------------------------------
 1 | <h1>XOR classifier</h1>
 2 | 
 3 | To launch the code download the <strong>xor</strong> folder and run the file xor.lua .
 4 | 
 5 | Inside that file, you can edit several parameters:
 6 | 	<ul>
 7 | 	<li> architecture of the network </li>
 8 | 	<li> add weight decay </li>
 9 | 	<li> learning rate </li>
10 | 	</ul>
11 | 
12 | As default options it is created a network with 2 hidden layers, 
13 | the weight decay is off, and at runtime 3 plots are saved in the current directory.
14 | 
15 | 
16 | 
17 | 


--------------------------------------------------------------------------------
/torch/xor/data/data.lua:
--------------------------------------------------------------------------------
 1 | 
 2 | function trueXorDataset()
 3 | 	-- generate the classic 4 samples where True is 0.5, False -0.5 
 4 | 	local dataset = torch.Tensor(4,2):fill(-1)
 5 | 	local target = torch.Tensor(4,1):fill(-1)
 6 | 	dataset[2][1] = 1 -- ex T F
 7 | 	dataset[3][2] = 1 -- ex F T
 8 | 	dataset[4][1] = 1; dataset[4][2] = 1 -- ex T T
 9 | 	dataset:mul(0.5)
10 | 
11 | 	target[2][1] = 1; target[3][1] = 1
12 | 	target:mul(0.5)
13 | 	return dataset, target
14 | end
15 | 
16 | function fuzzyXorDataset(n)
17 | 	-- generate n 2D examples in [-0.5,0.5], supervising as positive the ones with the elements having
18 | 	-- the same sign and as negative otherwise
19 | 	local n = n or 1000
20 | 	local dataset = torch.rand(n,2):add(-0.5)
21 | 	local target = torch.rand(n):fill(0)
22 | 
23 | 	for i=1,dataset:size(1) do
24 | 		local pattern =  dataset[i]
25 | 		if pattern[1]*pattern[2] > 0 then
26 | 			target[i] = 0.5
27 | 		else
28 | 			target[i] = -0.5
29 | 		end
30 | 	end	
31 | 	return dataset, target
32 | end


--------------------------------------------------------------------------------
/torch/xor/data/includes.lua:
--------------------------------------------------------------------------------
1 | include('data.lua')
2 | 


--------------------------------------------------------------------------------
/torch/xor/plots/includes.lua:
--------------------------------------------------------------------------------
1 | include('./plots.lua')
2 | 


--------------------------------------------------------------------------------
/torch/xor/plots/plots.lua:
--------------------------------------------------------------------------------
 1 | function plotSepSurface( mlp, n )
 2 |     -- estimate the separation surface
 3 |     local eps = 2e-3
 4 |     local  n = n or 1000000 
 5 |     local x = torch.rand(n,2):add(-0.5) -- huge number of 2D samples in [-0.5,0.5]
 6 |     local mlpOutput = mlp:forward(x) -- predict the value of each sample
 7 |     local mask = torch.le(torch.abs(mlpOutput),mlpOutput:clone():fill(eps)) -- a mask of x to plot only samples predicted less or equal than eps
 8 | 
 9 |     if torch.sum(mask) > 0 then
10 |         gnuplot.epsfigure('Separation surface.eps')
11 |         local x1 = x:narrow(2,1,1)
12 |         local x2 = x:narrow(2,2,1)
13 |         gnuplot.plot(x1[mask], x2[mask], '+')
14 |         gnuplot.title('Separation surface')
15 |         gnuplot.grid(true)
16 |         gnuplot.plotflush()
17 |         gnuplot.figure()
18 |     end
19 | end
20 | 
21 | function plotPredictions(mlp, n)
22 |     -- given n samples randomly picked, plots positive and negative
23 |     -- predictions with different colors
24 |     local n = n or 3000
25 |     local x = torch.rand(n,2):add(-0.5)
26 |     local mlpOutput = mlp:forward(x)
27 |     local truePredMask = torch.gt(mlpOutput, mlpOutput:clone():fill(0))
28 |     local falsePredMask = truePredMask:clone():add(-1):mul(-1)
29 |     local x1 = x:narrow(2,1,1)
30 |     local x2 = x:narrow(2,2,1)
31 | 
32 | print(falsePredMask)
33 | print(mlpOutput)
34 |     if torch.sum(truePredMask) > 0 and  torch.sum(falsePredMask) > 0 then
35 |         local truePlot = {'predicted as true', x1[truePredMask], x2[truePredMask],'+'}
36 |         local falsePlot = {'predicted as false', x1[falsePredMask], x2[falsePredMask], '+'}
37 |         gnuplot.epsfigure('XOR.eps')
38 |         gnuplot.plot(truePlot,falsePlot)
39 |         gnuplot.title('Xor')
40 |         gnuplot.grid(true)
41 |         gnuplot.plotflush()
42 |         gnuplot.figure()
43 |     end
44 | end
45 | 
46 | function plotLoss( loss, weightDecay )
47 |     -- compute the loss function and the weight decay, over the number of epochs
48 |     local nepochs = loss:size(1) > weightDecay:size(1) and loss:size(1) or weightDecay:size(1)
49 |     gnuplot.epsfigure('XORloss.eps')
50 |     gnuplot.plot({'loss function', torch.range(1,nepochs),loss},{'weight decay', torch.range(1,nepochs), weightDecay})
51 |     gnuplot.title('Loss')
52 |     gnuplot.grid(true)
53 |     gnuplot.plotflush()
54 |     gnuplot.figure()
55 | end


--------------------------------------------------------------------------------
/torch/xor/regularization/WeightDecayWrapper.lua:
--------------------------------------------------------------------------------
 1 | local WeightDecay, parent = torch.class('nn.WeightDecayWrapper', 'nn.Sequential')
 2 | 
 3 | function WeightDecay:__init()
 4 | 	parent.__init(self)
 5 | 	self.weightDecay = 0
 6 | 	self.currentOutput = 0
 7 | end
 8 | 
 9 | function WeightDecay:getWeightDecay(alpha)
10 | 	local alpha = alpha or 0
11 | 	local  weightDecay = 0
12 | 	for i=1,#self.modules do
13 | 		local params,_ = self.modules[i]:parameters()
14 | 		if params then
15 | 			for j=1,#params do
16 | 				weightDecay = weightDecay + torch.dot(params[j], params[j])*alpha/2
17 | 			end
18 | 		end
19 | 	end
20 | 	self.weightDecay = weightDecay
21 | 	return self.weightDecay
22 | end
23 | 
24 | function WeightDecay:updateParameters(learningRate,alpha)
25 |    local alpha = alpha or 0
26 |    for i=1,#self.modules do
27 |    	   local params, gradParams = self.modules[i]:parameters()
28 | 	   if params then
29 | 	      for j=1,#params do
30 | 	        params[j]:add(-learningRate, gradParams[j] + (alpha*params[j]))
31 | 	      end
32 | 	   end
33 | 	end
34 | end


--------------------------------------------------------------------------------
/torch/xor/regularization/includes.lua:
--------------------------------------------------------------------------------
1 | require 'nn'
2 | include('./WeightDecayWrapper.lua')
3 | 


--------------------------------------------------------------------------------
/torch/xor/xor.lua:
--------------------------------------------------------------------------------
 1 | require "nn"
 2 | require "gnuplot"
 3 | require './regularization/includes.lua'
 4 | require './data/includes.lua'
 5 | require './plots/includes.lua' -- include some routines to plot some results
 6 | ---- Neural Network Creation ----
 7 | --mlp = nn.Sequential();  -- make a multi-layer perceptron
 8 | mlp = nn.WeightDecayWrapper()
 9 | inputs = 2; outputs = 1; HUs = 2; -- parameters
10 | mlp:add(nn.Linear(inputs, HUs))
11 | mlp:add(nn.Tanh())
12 | mlp:add(nn.Linear(HUs, outputs))
13 | 
14 | ---- Loss Function ----
15 | criterion = nn.MSECriterion()
16 | 
17 | local dataset, target = trueXorDataset()
18 | --local dataset, target = fuzzyXorDataset()
19 | 
20 | print('Dataset');print(dataset)
21 | print('Target');print( target)
22 | 
23 | ---- Training the Network ----
24 | nepochs = 1000; learning_rate = 0.05;
25 | alpha = 0 -- weight decay coefficent (0 means no weight decay contribute)
26 | local loss = torch.Tensor(nepochs):fill(0); local weightDecay = torch.Tensor(nepochs):fill(0)
27 | for i = 1,nepochs do
28 |   local input = dataset
29 |   loss[i] = criterion:forward(mlp:forward(input), target) -- feed the net and the criterion
30 |   weightDecay[i] = mlp:getWeightDecay(alpha)
31 | 
32 |   mlp:zeroGradParameters()   -- zero the accumulation of the gradients
33 |   mlp:backward(input, criterion:backward(mlp.output, target))   -- accumulate gradients
34 |   mlp:updateParameters(learning_rate, alpha)   -- update parameters with a learining rate learning_rate and weight decay coefficent of alpha
35 | end
36 | 
37 | ----  Test the Network ----
38 | plotSepSurface(mlp)
39 | plotLoss(loss, weightDecay)
40 | plotPredictions(mlp)
41 | 


--------------------------------------------------------------------------------