34 | * Dl4j's AlexNet model interpretation based on the original paper ImageNet Classification with Deep Convolutional Neural Networks 35 | * and the imagenetExample code referenced. 36 | *
37 | * References: 38 | * http://papers.nips.cc/paper/4824-imagenet-classification-with-deep-convolutional-neural-networks.pdf 39 | * https://github.com/BVLC/caffe/blob/master/models/bvlc_alexnet/train_val.prototxt 40 | *
41 | * Model is built in dl4j based on available functionality and notes indicate where there are gaps waiting for enhancements. 42 | *
43 | * Bias initialization in the paper is 1 in certain layers but 0.1 in the imagenetExample code 44 | * Weight distribution uses 0.1 std for all layers in the paper but 0.005 in the dense layers in the imagenetExample code 45 | */ 46 | public class AlexNet { 47 | 48 | private int height; 49 | private int width; 50 | private int channels; 51 | private int numLabels = 1000; 52 | private long seed = 42; 53 | private int iterations = 90; 54 | 55 | public AlexNet(int height, int width, int channels, int numLabels, long seed, int iterations) { 56 | this.height = height; 57 | this.width = width; 58 | this.channels = channels; 59 | this.numLabels = numLabels; 60 | this.seed = seed; 61 | this.iterations = iterations; 62 | } 63 | 64 | public MultiLayerConfiguration conf() { 65 | double nonZeroBias = 1; 66 | double dropOut = 0.5; 67 | SubsamplingLayer.PoolingType poolingType = SubsamplingLayer.PoolingType.MAX; 68 | 69 | // TODO split and link kernel maps on GPUs - 2nd, 4th, 5th convolution should only connect maps on the same gpu, 3rd connects to all in 2nd 70 | MultiLayerConfiguration.Builder conf = new NeuralNetConfiguration.Builder() 71 | .seed(seed) 72 | .weightInit(WeightInit.DISTRIBUTION) 73 | .dist(new NormalDistribution(0.0, 0.01)) 74 | .activation("relu") 75 | .updater(Updater.NESTEROVS) 76 | .iterations(iterations) 77 | .gradientNormalization(GradientNormalization.RenormalizeL2PerLayer) // normalize to prevent vanishing or exploding gradients 78 | .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) 79 | .learningRate(1e-2) 80 | .biasLearningRate(1e-2 * 2) 81 | .learningRateDecayPolicy(LearningRatePolicy.Step) 82 | .lrPolicyDecayRate(0.1) 83 | .lrPolicySteps(100000) 84 | .regularization(true) 85 | .l2(5 * 1e-4) 86 | .momentum(0.9) 87 | .miniBatch(false) 88 | .list() 89 | .layer(0, new ConvolutionLayer.Builder(new int[]{11, 11}, new int[]{4, 4}, new int[]{3, 3}) 90 | .name("cnn1") 91 | .nIn(channels) 92 | .nOut(96) 93 | .build()) 94 | .layer(1, new LocalResponseNormalization.Builder() 95 | .name("lrn1") 96 | .build()) 97 | .layer(2, new SubsamplingLayer.Builder(poolingType, new int[]{3, 3}, new int[]{2, 2}) 98 | .name("maxpool1") 99 | .build()) 100 | .layer(3, new ConvolutionLayer.Builder(new int[]{5, 5}, new int[]{1, 1}, new int[]{2, 2}) 101 | .name("cnn2") 102 | .nOut(256) 103 | .biasInit(nonZeroBias) 104 | .build()) 105 | .layer(4, new LocalResponseNormalization.Builder() 106 | .name("lrn2") 107 | .k(2).n(5).alpha(1e-4).beta(0.75) 108 | .build()) 109 | .layer(5, new SubsamplingLayer.Builder(poolingType, new int[]{3, 3}, new int[]{2, 2}) 110 | .name("maxpool2") 111 | .build()) 112 | .layer(6, new ConvolutionLayer.Builder(new int[]{3, 3}, new int[]{1, 1}, new int[]{1, 1}) 113 | .name("cnn3") 114 | .nOut(384) 115 | .build()) 116 | .layer(7, new ConvolutionLayer.Builder(new int[]{3, 3}, new int[]{1, 1}, new int[]{1, 1}) 117 | .name("cnn4") 118 | .nOut(384) 119 | .biasInit(nonZeroBias) 120 | .build()) 121 | .layer(8, new ConvolutionLayer.Builder(new int[]{3, 3}, new int[]{1, 1}, new int[]{1, 1}) 122 | .name("cnn5") 123 | .nOut(256) 124 | .biasInit(nonZeroBias) 125 | .build()) 126 | .layer(9, new SubsamplingLayer.Builder(poolingType, new int[]{3, 3}, new int[]{2, 2}) 127 | .name("maxpool3") 128 | .build()) 129 | .layer(10, new DenseLayer.Builder() 130 | .name("ffn1") 131 | .nOut(4096) 132 | .dist(new GaussianDistribution(0, 0.005)) 133 | .biasInit(nonZeroBias) 134 | .dropOut(dropOut) 135 | .build()) 136 | .layer(11, new DenseLayer.Builder() 137 | .name("ffn2") 138 | .nOut(4096) 139 | .dist(new GaussianDistribution(0, 0.005)) 140 | .biasInit(nonZeroBias) 141 | .dropOut(dropOut) 142 | .build()) 143 | .layer(12, new OutputLayer.Builder(LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD) 144 | .name("output") 145 | .nOut(numLabels) 146 | .activation("softmax") 147 | .build()) 148 | .backprop(true) 149 | .pretrain(false) 150 | .cnnInputSize(height, width, channels); 151 | 152 | return conf.build(); 153 | } 154 | 155 | public MultiLayerNetwork init() { 156 | MultiLayerNetwork network = new MultiLayerNetwork(conf()); 157 | network.init(); 158 | return network; 159 | 160 | } 161 | 162 | } 163 | --------------------------------------------------------------------------------