├── nets ├── .gitignore ├── README.md ├── Makefile ├── alexnet_deploy.json ├── VGG_CNN_M_deploy.json ├── extract.py ├── faster_rcnn_vggm.json ├── VGG_ILSVRC_16_layers_deploy.json └── faster_rcnn_vgg16.json ├── config ├── vgg16-fconly.json ├── alexnet.json ├── vgg-m.json ├── alexnet-scale.json ├── faster-rcnn-vgg-m.json ├── vgg16-after-pool1.json ├── vgg16.json ├── vgg16-scale.json └── faster-rcnn-vgg16.json ├── data ├── eie-layers.csv ├── eyeriss-alexnet.csv └── eyeriss-vgg16.csv ├── raw ├── eyeriss-tableV.txt ├── eyeriss-tableIII.txt ├── eyeriss-tableIV.txt ├── eyeriss-tableVI.txt └── eie-raw.csv ├── LICENSE ├── README.md └── fodlam.py /nets/.gitignore: -------------------------------------------------------------------------------- 1 | caffe.proto 2 | caffe_pb2.py 3 | *.prototxt 4 | -------------------------------------------------------------------------------- /config/vgg16-fconly.json: -------------------------------------------------------------------------------- 1 | { 2 | "net": "VGG16", 3 | "layers": [ 4 | "FC6", 5 | "FC7", 6 | "FC8" 7 | ] 8 | } 9 | -------------------------------------------------------------------------------- /data/eie-layers.csv: -------------------------------------------------------------------------------- 1 | Layer,AlexNet FC6,AlexNet FC7,AlexNet FC8,VGG16 FC6,VGG16 FC7,VGG16 FC8,NT- We,NT- Wd,NT- LSTM 2 | Theoretical Time,28.1,11.7,8.9,28.1,7.9,7.3,5.2,13.0,6.5 3 | Actual Time,30.3,12.2,9.9,34.4,8.7,8.4,8.0,13.9,7.5 4 | -------------------------------------------------------------------------------- /config/alexnet.json: -------------------------------------------------------------------------------- 1 | { 2 | "net": "AlexNet", 3 | "layers": [ 4 | "CONV1", 5 | "CONV2", 6 | "CONV3", 7 | "CONV4", 8 | "CONV5", 9 | "FC6", 10 | "FC7", 11 | "FC8" 12 | ] 13 | } 14 | -------------------------------------------------------------------------------- /config/vgg-m.json: -------------------------------------------------------------------------------- 1 | { 2 | "netfile": "VGG_CNN_M_deploy.json", 3 | "layers": [ 4 | "conv1", 5 | "conv2", 6 | "conv3", 7 | "conv4", 8 | "conv5", 9 | "fc6", 10 | "fc7", 11 | "fc8" 12 | ] 13 | } 14 | -------------------------------------------------------------------------------- /config/alexnet-scale.json: -------------------------------------------------------------------------------- 1 | { 2 | "netfile": "alexnet_deploy.json", 3 | "layers": [ 4 | "CONV1", 5 | "CONV2", 6 | "CONV3", 7 | "CONV4", 8 | "CONV5", 9 | "FC6", 10 | "FC7", 11 | "FC8" 12 | ] 13 | } 14 | -------------------------------------------------------------------------------- /nets/README.md: -------------------------------------------------------------------------------- 1 | FODLAM Network Statistics 2 | ========================= 3 | 4 | This directory contains cost statistics extracted from network specifications from around the Web. 5 | 6 | The directory also contains the infrastructure used to fetch the data and perform the configuration, but the JSON files are included here to make FODLAM self-contained. 7 | -------------------------------------------------------------------------------- /config/faster-rcnn-vgg-m.json: -------------------------------------------------------------------------------- 1 | { 2 | "netfile": "faster_rcnn_vggm.json", 3 | "layers": [ 4 | "conv1", 5 | "conv2", 6 | "conv3", 7 | "conv4", 8 | "conv5", 9 | "rpn_conv/3x3", 10 | "rpn_cls_score", 11 | "rpn_bbox_pred", 12 | "fc6", 13 | "fc7", 14 | "cls_score", 15 | "bbox_pred" 16 | ] 17 | } 18 | -------------------------------------------------------------------------------- /config/vgg16-after-pool1.json: -------------------------------------------------------------------------------- 1 | { 2 | "net": "VGG16", 3 | "layers": [ 4 | "CONV2-1", 5 | "CONV2-2", 6 | "CONV3-1", 7 | "CONV3-2", 8 | "CONV3-3", 9 | "CONV4-1", 10 | "CONV4-2", 11 | "CONV4-3", 12 | "CONV5-1", 13 | "CONV5-2", 14 | "CONV5-3", 15 | "FC6", 16 | "FC7", 17 | "FC8" 18 | ] 19 | } 20 | -------------------------------------------------------------------------------- /config/vgg16.json: -------------------------------------------------------------------------------- 1 | { 2 | "net": "VGG16", 3 | "layers": [ 4 | "CONV1-1", 5 | "CONV1-2", 6 | "CONV2-1", 7 | "CONV2-2", 8 | "CONV3-1", 9 | "CONV3-2", 10 | "CONV3-3", 11 | "CONV4-1", 12 | "CONV4-2", 13 | "CONV4-3", 14 | "CONV5-1", 15 | "CONV5-2", 16 | "CONV5-3", 17 | "FC6", 18 | "FC7", 19 | "FC8" 20 | ] 21 | } 22 | -------------------------------------------------------------------------------- /config/vgg16-scale.json: -------------------------------------------------------------------------------- 1 | { 2 | "netfile": "VGG_ILSVRC_16_layers_deploy.json", 3 | "layers": [ 4 | "CONV1-1", 5 | "CONV1-2", 6 | "CONV2-1", 7 | "CONV2-2", 8 | "CONV3-1", 9 | "CONV3-2", 10 | "CONV3-3", 11 | "CONV4-1", 12 | "CONV4-2", 13 | "CONV4-3", 14 | "CONV5-1", 15 | "CONV5-2", 16 | "CONV5-3", 17 | "FC6", 18 | "FC7", 19 | "FC8" 20 | ] 21 | } 22 | -------------------------------------------------------------------------------- /data/eyeriss-alexnet.csv: -------------------------------------------------------------------------------- 1 | Layer,Power (mW),Total Latency (ms),Processing Latency (ms),Num. of MACs (G),Num. of Active PEs,Zeros in Ifmaps,Global Buffer Accesses (MB),DRAM Accesses (MB) 2 | CONV1,332,20.9 ,16.5, 0.42,154,0.01%, 18.5, 5.0 3 | CONV2,288,41.9 ,39.2, 0.90,135,38.7%, 77.6, 4.0 4 | CONV3,266,23.6 ,21.8, 0.60,156,72.5%, 50.2, 3.0 5 | CONV4,235,18.4 ,16.0, 0.45,156,79.3%, 37.4, 2.1 6 | CONV5,236,10.5 ,10.0, 0.30,156,77.6%, 24.9, 1.3 7 | Total,278,115.3,103.5,2.66,148,57.53%,208.5,15.4 8 | -------------------------------------------------------------------------------- /config/faster-rcnn-vgg16.json: -------------------------------------------------------------------------------- 1 | { 2 | "netfile": "faster_rcnn_vgg16.json", 3 | "layers": [ 4 | "conv1_1", 5 | "conv1_2", 6 | "conv2_1", 7 | "conv2_2", 8 | "conv3_1", 9 | "conv3_2", 10 | "conv3_3", 11 | "conv4_1", 12 | "conv4_2", 13 | "conv4_3", 14 | "conv5_1", 15 | "conv5_2", 16 | "conv5_3", 17 | "rpn_conv/3x3", 18 | "rpn_cls_score", 19 | "rpn_bbox_pred", 20 | "fc6", 21 | "fc7", 22 | "cls_score", 23 | "bbox_pred" 24 | ] 25 | } 26 | -------------------------------------------------------------------------------- /raw/eyeriss-tableV.txt: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | Lav" iner 10ml Proceseing Num. of Num. «1' Zem in Glnhnl Bufl'. DRAM 18 | 19 | ’ (mW) Lamncy (Ills) Latency (Ills) MACS Active PEs Ifmaps (%) Accesses Awesses 20 | 21 | CONVI 332 209 165 0420 154(92%) (mm; 18.5 MB 50 MB 22 | CONVZ 288 41.9 39 2 0.906 135 (80%) 38.7% 77.6 MB 23 | CONV3 266 23.6 21.3 0.600 156 (93%) 72. % 5 2 MB 24 | CONV4 235 18.4 16.0 0.455 156 (93%) 79.3 70 37 4 MB 25 | CONVS 236 10.5 10.0 0.300 [56 (93%) 77.6% 24 9 MB 26 | Total 278 115.3 1035 2.666 143 (88%) 5753% 208.5 MB 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | -------------------------------------------------------------------------------- /raw/eyeriss-tableIII.txt: -------------------------------------------------------------------------------- 1 | 2 | 3 | CNN Shape Parameom 4 | 5 | RS Datafluw Mapping Paramenm 6 | 7 | Global Buffer Allocation 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | I‘m' H/W‘ R/S E/F C M U m n e p q r 2 ifmup psum 54 | CONVI 227 11 55 3 96 4 96 1 7 16 1 1 2 155103 721103 55 | CONVZ 3| 5 27 48 256 I 64 l 27 16 2 l l 3,8KE 9LIKB 56 | CONva 15 3 13 256 334 1 64 4 13 16 4 1 4 70103 345105 57 | CONVA 15 3 13 192 3134 1 64 4 13 16 3 2 2 1115103 84.5KB 58 | CONVS 15 3 13 192 256 1 64 4 13 16 3 2 2 105105 345103 59 | 60 | 61 | 62 | 63 | 64 | -------------------------------------------------------------------------------- /data/eyeriss-vgg16.csv: -------------------------------------------------------------------------------- 1 | Layer,Power (mW),Total Latency (ms),Processing Latency (ms),MACs (G),PEs,Zeros,Global Buffer Accesses (MB),DRAM Accesses (MB) 2 | CONV1-1,247,76.2,38.0,0.26,156,1.6%,112.6,15.4 3 | CONV1-2,218,910.3,810.6,5.55,156,47.7%,2402.8,54.0 4 | CONV2-1,242,470.3,405.3,2.77,156,24.8%,1201.4,33.4 5 | CONV2-2,231,894.3,810.8,5.55,156,38.7%,2402.8,48.5 6 | CONV3-1,254,241.1,204.0,2.77,156,39.7%,607.4,20.2 7 | CONV3-2,235,460.9,408.1,5.55,156,58.1%,1214.8,32.2 8 | CONV3-3,233,457.7,408.1,5.55,156,58.7%,1214.8,30.8 9 | CONV4-1,278,135.8,105.1,2.77,168,64.3%,321.8,17.8 10 | CONV4-2,261,254.8,210.0,5.55,168,74.7%,643.7,28.6 11 | CONV4-3,240,246.3,210.0,5.55,168,85.4%,643.7,22.8 12 | CONV5-1,258,54.3,48.3,1.39,163,79.4%,90.0,6.3 13 | CONV5-2,236,53.7,48.5,1.39,168,87.4%,90.0,5.7 14 | CONV5-3,230,53.7,48.5,1.39,168,88.5%,90.0,5.6 15 | Total,236,4309.5,3755.2,46.04,158,58.6%,11035.8,321.1 16 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License 2 | 3 | Copyright (c) 2017 Adrian Sampson 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in 13 | all copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 21 | THE SOFTWARE. 22 | -------------------------------------------------------------------------------- /nets/Makefile: -------------------------------------------------------------------------------- 1 | # Network specifications from the Web. 2 | 3 | NETS := VGG_CNN_M_deploy VGG_ILSVRC_16_layers_deploy alexnet_deploy \ 4 | faster_rcnn_vgg16 faster_rcnn_vggm 5 | 6 | .PHONY: all clean 7 | all: $(NETS:%=%.json) 8 | clean: 9 | rm -f $(NETS:%=%.prototxt) $(NETS:%=%.json) 10 | 11 | # Prototxt from the web. 12 | 13 | VGG_CNN_M_deploy.prototxt: 14 | curl -LO "https://gist.githubusercontent.com/ksimonyan/f194575702fae63b2829/raw/6516d9be2064680697ee6791d8de77cfc0dd0990/VGG_CNN_M_deploy.prototxt" 15 | 16 | VGG_ILSVRC_16_layers_deploy.prototxt: 17 | curl -LO "https://gist.githubusercontent.com/ksimonyan/211839e770f7b538e2d8/raw/ded9363bd93ec0c770134f4e387d8aaaaa2407ce/VGG_ILSVRC_16_layers_deploy.prototxt" 18 | 19 | alexnet_deploy.prototxt: 20 | curl -L -o $@ "https://raw.githubusercontent.com/BVLC/caffe/master/models/bvlc_alexnet/deploy.prototxt" 21 | 22 | faster_rcnn_vgg16.prototxt: 23 | curl -L -o $@ "https://raw.githubusercontent.com/rbgirshick/py-faster-rcnn/master/models/pascal_voc/VGG16/faster_rcnn_end2end/test.prototxt" 24 | 25 | faster_rcnn_vggm.prototxt: 26 | curl -L -o $@ "https://raw.githubusercontent.com/rbgirshick/py-faster-rcnn/master/models/pascal_voc/VGG_CNN_M_1024/faster_rcnn_end2end/test.prototxt" 27 | 28 | # Conversion tool. 29 | 30 | %.json: %.prototxt 31 | python2 extract.py $< > $@ 32 | -------------------------------------------------------------------------------- /raw/eyeriss-tableIV.txt: -------------------------------------------------------------------------------- 1 | 2 | 3 | Technology 4 | 5 | TSMC 65nm LP IPQM 6 | 7 | 8 | 9 | Chip Size 10 | 11 | 4.0 mm x 4.0 mm 12 | 13 | 14 | 15 | Core Area 16 | 17 | 35 mm x 35 mm 18 | 19 | 20 | 21 | Gate Count (logic only) 22 | 23 | ll76k (2-inpm NAND) 24 | 25 | 26 | 27 | On-Chip SRAM 28 | 29 | 181.5K bytes 30 | 31 | 32 | 33 | Number of PEs 34 | 35 | 168 36 | 37 | 38 | 39 | Global Buffer 40 | 41 | IOS‘OK bytes (SRAM) 42 | 43 | 44 | 45 | Scratch Pads 46 | (per PE) 47 | 48 | finer wcigms: 44s bytcs (SRAM) 49 | feature maps: 24 bytes (Registers) 50 | parlial sums: 48 byles (Regislers) 51 | 52 | 53 | 54 | Supply Voltage 55 | 56 | core: 0,8271‘17 V 57 | 1/0: 1.8 V 58 | 59 | 60 | 61 | Clock Rate 62 | 63 | core: 1007250 MHZ 64 | link: up 10 90 MHz 65 | 66 | 67 | 68 | Peak Throughput 69 | 70 | 16‘842‘0 GMACS 71 | 72 | 73 | 74 | Arithmetic Precision 75 | 76 | 16-bit fixed-poinl 77 | 78 | 79 | 80 | 81 | 82 | Namely Supported 83 | CNN Shapes 84 | 85 | 86 | 87 | filler height (H): 1712 88 | 89 | filter width (5): 32 90 | 91 | num. of films (M): 71024 92 | um, of channels (C): 1—1024 93 | venical smde: 1,2,4 94 | horizomul S&ridc: Hz 95 | 96 | 97 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | -------------------------------------------------------------------------------- /raw/eyeriss-tableVI.txt: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | Layer Power Tuml Pmcexcing Num. «r Num. of Zen: in Ghlml Bun. DRAM 36 | ‘ (mW) Latency (ms) Latency (ms) MACx Active PEx Imam (%) Acmm Accemx 37 | CONVIVI 247 76.2 38 0 0,260 156 (93%) 16% 112.5 MB 154 MB 38 | CONVl»2 218 910.3 810.6 5,556 156 (93%) 47.7% 2402.8 MB 54 0 MB 39 | CONVZVI 242 470.3 405.3 2.776 150 193%) 24.8% 1201.4 MB 334 MB 40 | CONv22 231 894.3 1110.3 5556 156 (93%) 38 7% 2402.3 MB 485 MB 41 | CONV3-1 254 241 1 204.0 2.776 156 (93%) 39.7% 607.4 MB 20 2 MB 42 | CONVBVZ 235 460.9 408.1 5.556 |56 (93%) 53 |% 1214.8 MB 322 MB 43 | CONV373 233 457.7 408.1 5,556 156 (93%) 58.7% |2l4.8 MB 308 MB 44 | CoNv4-1 278 135.3 105.1 2.770 168 (100%) (74.3% 321.3 MB 17 8 MB 45 | CONV4.2 261 254 8 210 0 5 556 168 (100%) 74 7% 643 7 MB 20.6 MB 46 | CONV473 240 246.3 210.0 SSSG 168 (100%) 85.4% 643.7 MB 228 MB 47 | CONVS-l 258 54.3 48.3 1.390 163 (100%) 79.4% 90.0 MB 6 3 MB 48 | CONVSVZ 236 . .7 48 5 1,390 168 (100%) 87 4% 90 0 MB 57 MB 49 | CONV5»3 230 53.7 48.5 1,396 168 (100%) 88.5% 90.0 MB 5 6 ME 50 | Tubal 236 4309.5 3755.2 46.040 158 (94%} 58.6% [1035.8 MB 321.1 MB 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | -------------------------------------------------------------------------------- /nets/alexnet_deploy.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "name": "data", 4 | "type": "Input" 5 | }, 6 | { 7 | "macs": 105415200, 8 | "name": "conv1", 9 | "type": "Convolution" 10 | }, 11 | { 12 | "name": "relu1", 13 | "type": "ReLU" 14 | }, 15 | { 16 | "name": "norm1", 17 | "type": "LRN" 18 | }, 19 | { 20 | "name": "pool1", 21 | "type": "Pooling" 22 | }, 23 | { 24 | "macs": 223948800, 25 | "name": "conv2", 26 | "type": "Convolution" 27 | }, 28 | { 29 | "name": "relu2", 30 | "type": "ReLU" 31 | }, 32 | { 33 | "name": "norm2", 34 | "type": "LRN" 35 | }, 36 | { 37 | "name": "pool2", 38 | "type": "Pooling" 39 | }, 40 | { 41 | "macs": 149520384, 42 | "name": "conv3", 43 | "type": "Convolution" 44 | }, 45 | { 46 | "name": "relu3", 47 | "type": "ReLU" 48 | }, 49 | { 50 | "macs": 112140288, 51 | "name": "conv4", 52 | "type": "Convolution" 53 | }, 54 | { 55 | "name": "relu4", 56 | "type": "ReLU" 57 | }, 58 | { 59 | "macs": 74760192, 60 | "name": "conv5", 61 | "type": "Convolution" 62 | }, 63 | { 64 | "name": "relu5", 65 | "type": "ReLU" 66 | }, 67 | { 68 | "name": "pool5", 69 | "type": "Pooling" 70 | }, 71 | { 72 | "macs": 37748736, 73 | "name": "fc6", 74 | "type": "InnerProduct" 75 | }, 76 | { 77 | "name": "relu6", 78 | "type": "ReLU" 79 | }, 80 | { 81 | "name": "drop6", 82 | "type": "Dropout" 83 | }, 84 | { 85 | "macs": 16777216, 86 | "name": "fc7", 87 | "type": "InnerProduct" 88 | }, 89 | { 90 | "name": "relu7", 91 | "type": "ReLU" 92 | }, 93 | { 94 | "name": "drop7", 95 | "type": "Dropout" 96 | }, 97 | { 98 | "macs": 4096000, 99 | "name": "fc8", 100 | "type": "InnerProduct" 101 | }, 102 | { 103 | "name": "prob", 104 | "type": "Softmax" 105 | } 106 | ] 107 | -------------------------------------------------------------------------------- /nets/VGG_CNN_M_deploy.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "name": "input", 4 | "type": "Input" 5 | }, 6 | { 7 | "macs": 167664672, 8 | "name": "conv1", 9 | "type": "Convolution" 10 | }, 11 | { 12 | "name": "relu1", 13 | "type": "ReLU" 14 | }, 15 | { 16 | "name": "norm1", 17 | "type": "LRN" 18 | }, 19 | { 20 | "name": "pool1", 21 | "type": "Pooling" 22 | }, 23 | { 24 | "macs": 415334400, 25 | "name": "conv2", 26 | "type": "Convolution" 27 | }, 28 | { 29 | "name": "relu2", 30 | "type": "ReLU" 31 | }, 32 | { 33 | "name": "norm2", 34 | "type": "LRN" 35 | }, 36 | { 37 | "name": "pool2", 38 | "type": "Pooling" 39 | }, 40 | { 41 | "macs": 199360512, 42 | "name": "conv3", 43 | "type": "Convolution" 44 | }, 45 | { 46 | "name": "relu3", 47 | "type": "ReLU" 48 | }, 49 | { 50 | "macs": 398721024, 51 | "name": "conv4", 52 | "type": "Convolution" 53 | }, 54 | { 55 | "name": "relu4", 56 | "type": "ReLU" 57 | }, 58 | { 59 | "macs": 398721024, 60 | "name": "conv5", 61 | "type": "Convolution" 62 | }, 63 | { 64 | "name": "relu5", 65 | "type": "ReLU" 66 | }, 67 | { 68 | "name": "pool5", 69 | "type": "Pooling" 70 | }, 71 | { 72 | "macs": 75497472, 73 | "name": "fc6", 74 | "type": "InnerProduct" 75 | }, 76 | { 77 | "name": "relu6", 78 | "type": "ReLU" 79 | }, 80 | { 81 | "name": "drop6", 82 | "type": "Dropout" 83 | }, 84 | { 85 | "macs": 16777216, 86 | "name": "fc7", 87 | "type": "InnerProduct" 88 | }, 89 | { 90 | "name": "relu7", 91 | "type": "ReLU" 92 | }, 93 | { 94 | "name": "drop7", 95 | "type": "Dropout" 96 | }, 97 | { 98 | "macs": 4096000, 99 | "name": "fc8", 100 | "type": "InnerProduct" 101 | }, 102 | { 103 | "name": "prob", 104 | "type": "Softmax" 105 | } 106 | ] 107 | -------------------------------------------------------------------------------- /raw/eie-raw.csv: -------------------------------------------------------------------------------- 1 | "",Power (%)(mW),(%),Area (μm2),, 2 | Total,9.157,,"638,024",, 3 | memory,5.416,(59.15%),"594,786",,(93.22%) 4 | clock network,1.874,(20.46%),866,,(0.14%) 5 | register,1.026,(11.20%),"9,465",,(1.48%) 6 | combinational,0.841,(9.18%),"8,946",,(1.40%) 7 | filler cell,,,"23,961",,(3.76%) 8 | Act queue,0.112,(1.23%),758,,(0.12%) 9 | PtrRead,1.807,(19.73%),"121,849",,(19.10%) 10 | SpmatRead,4.955,(54.11%),"469,412",,(73.57%) 11 | ArithmUnit,1.162,(12.68%),"3,110",,(0.49%) 12 | ActRW,1.122,(12.25%),"18,934",,(2.97%) 13 | filler cell,,,"23,961",,(3.76%) 14 | NT-Platform Batch Matrix,AlexNet,VGG16,, 15 | Size Type,FC6 FC7,FC8 FC6 FC7,FC8 We Wd,LSTM 16 | 470.5CPU 1 dense,7516.2 6187.1,1134.9 35022.8 5372.8,774.2 605.0 1361.4, 17 | 260.0(Core sparse,3066.5 1282.1,890.5 3774.3 545.1,777.3 261.2 437.4, 18 | 28.8i7-5930k) 64 dense,318.4 188.9,45.8 1056.0 188.3,45.7 28.7 69.0, 19 | sparse,1417.6 682.1,407.7 1780.3 274.9,363.1 117.7 176.4,107.4 20 | 51.91GPU dense sparse,541.5 134.8 243.0 65.8,80.5 54.6 1467.8 167.0 243.0 39.8,80.5 48.0 65 17.7 90.1 41.1,18.5 21 | "(Titan X) 2.564 dense",19.8 8.9,5.9 53.6 8.9,5.9 3.2 2.3, 22 | sparse,94.6 51.5,23.2 121.5 24.4,22.0 10.9 11.0,9.0 23 | 956.91mGPU dense sparse,12437.2 2879.3 5765.0 1256.5,2252.1 837.0 35427.0 4377.2 5544.3 626.3,2243.1 745.1 1316 240.6 2565.5 570.6,315 24 | "(Tegra K1) 95.264 dense",1663.6 2056.8,298.0 2001.4 2050.7,483.9 87.8 956.3, 25 | sparse,4003.9 1372.8,576.7 8024.8 660.2,544.1 236.3 187.7,186.5 26 | EIE Theoretical Actual Time Time,28.1 30.3 11.7 12.2,8.9 9.9 28.1 34.4 7.9 8.7,7.3 8.4 5.2 8.0 13.0 13.9,6.57.5 27 | Platform,Core-i7,GeForce,Tegra,A-Eye,Da-,True-,EIE,EIE 28 | "",5930K,Titan X,K1,[14],DianNao,North,"(ours,","(28nm," 29 | "",,,,,[11],[40],64PE),256PE) 30 | Year,2014,2015,2014,2015,2014,2014,2016,2016 31 | Platform Type,CPU,GPU,mGPU,FPGA,ASIC,ASIC,ASIC,ASIC 32 | Technology,22nm,28nm,28nm,28nm,28nm,28nm,45nm,28nm 33 | Clock (MHz),3500,1075,852,150,606,Async,800,1200 34 | Memory type,DRAM,DRAM,DRAM,DRAM,eDRAM,SRAM,SRAM,SRAM 35 | Max DNN model size (#Params),<16G,<3G,<500M,<500M,18M,256M,84M,336M 36 | Quantization Stategy,32-bit,32-bit,32-bit,16-bit,16-bit,1-bit,4-bit,4-bit 37 | "",float,float,float,fixed,fixed,fixed,fixed,fixed 38 | Area (mm2),356,601,-,-,67.7,430,40.8,63.8 39 | Power (W),73,159,5.1,9.63,15.97,0.18,0.59,2.36 40 | M×V Throughput (Frames/s),162,"4,115",173,33,"147,938","1,989","81,967","426,230" 41 | Area Efficiency ( Frames/s/mm2),0.46,6.85,-,-,"2,185",4.63,"2,009","6,681" 42 | Energy Efficiency (Frames/J),2.22,25.9,33.9,3.43,"9,263","10,839","138,927","180,606" 43 | -------------------------------------------------------------------------------- /nets/extract.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python2 2 | from __future__ import division, print_function 3 | 4 | import sys 5 | import caffe 6 | import json 7 | 8 | 9 | def _blob_and_weights(net, layer_name): 10 | """Get the activation blob and the weights blob for the named layer 11 | in the Caffe network. 12 | """ 13 | # Get the activation blob for this layer and its parameters 14 | # (weights). 15 | blob = net.blobs[net.top_names[layer_name][0]] 16 | weights = net.params[layer_name][0] 17 | return blob, weights 18 | 19 | 20 | def extract(model_fn): 21 | """Extract per-layer cost information from a Caffe model file, given 22 | as the path to a prototxt specification. 23 | 24 | Generate a sequence of dicts with each layer's name, type, and (for 25 | some kinds of layers) the total number of multiply--accumulate 26 | operations needed for a (forward) computation of the layer. 27 | """ 28 | # Load the model from the prototxt file. 29 | net = caffe.Net(model_fn, caffe.TEST) 30 | 31 | for name, layer in zip(net._layer_names, net.layers): 32 | layer_info = { 33 | 'name': name, 34 | 'type': layer.type, 35 | } 36 | 37 | # Convolutional layers. 38 | if layer.type in ('Convolution', 'Deconvolution'): 39 | blob, weights = _blob_and_weights(net, name) 40 | 41 | # Extract relevant hyperparameters from the layer's 42 | # activation and weight buffers. 43 | layer_height = blob.shape[2] 44 | layer_width = blob.shape[3] 45 | in_chan = weights.shape[0] 46 | out_chan = weights.shape[1] 47 | kernel_height = weights.shape[2] 48 | kernel_width = weights.shape[3] 49 | 50 | # Compute the total number of multiply--accumulate 51 | # operations for this convolutional layer. 52 | num_outputs = layer_width * layer_height * out_chan 53 | num_macs_per_out = in_chan * kernel_height * kernel_width 54 | num_macs = num_outputs * num_macs_per_out 55 | 56 | layer_info['macs'] = num_macs 57 | 58 | # Fully-connected layers. 59 | elif layer.type == "InnerProduct": 60 | blob, weights = _blob_and_weights(net, name) 61 | 62 | # There is one MAC per "synapse" (i.e., each pairing of an 63 | # input neuron with an output neuron). 64 | num_output = weights.shape[0] 65 | num_input = weights.shape[1] 66 | num_macs = num_input * num_output 67 | 68 | layer_info['macs'] = num_macs 69 | 70 | yield layer_info 71 | 72 | 73 | if __name__ == '__main__': 74 | out = list(extract(sys.argv[1])) 75 | print(json.dumps(out, indent=2, sort_keys=True)) 76 | -------------------------------------------------------------------------------- /nets/faster_rcnn_vggm.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "name": "input", 4 | "type": "Input" 5 | }, 6 | { 7 | "macs": 167664672, 8 | "name": "conv1", 9 | "type": "Convolution" 10 | }, 11 | { 12 | "name": "relu1", 13 | "type": "ReLU" 14 | }, 15 | { 16 | "name": "norm1", 17 | "type": "LRN" 18 | }, 19 | { 20 | "name": "pool1", 21 | "type": "Pooling" 22 | }, 23 | { 24 | "macs": 415334400, 25 | "name": "conv2", 26 | "type": "Convolution" 27 | }, 28 | { 29 | "name": "relu2", 30 | "type": "ReLU" 31 | }, 32 | { 33 | "name": "norm2", 34 | "type": "LRN" 35 | }, 36 | { 37 | "name": "pool2", 38 | "type": "Pooling" 39 | }, 40 | { 41 | "macs": 199360512, 42 | "name": "conv3", 43 | "type": "Convolution" 44 | }, 45 | { 46 | "name": "relu3", 47 | "type": "ReLU" 48 | }, 49 | { 50 | "macs": 398721024, 51 | "name": "conv4", 52 | "type": "Convolution" 53 | }, 54 | { 55 | "name": "relu4", 56 | "type": "ReLU" 57 | }, 58 | { 59 | "macs": 398721024, 60 | "name": "conv5", 61 | "type": "Convolution" 62 | }, 63 | { 64 | "name": "relu5", 65 | "type": "ReLU" 66 | }, 67 | { 68 | "name": "conv5_relu5_0_split", 69 | "type": "Split" 70 | }, 71 | { 72 | "macs": 199360512, 73 | "name": "rpn_conv/3x3", 74 | "type": "Convolution" 75 | }, 76 | { 77 | "name": "rpn_relu/3x3", 78 | "type": "ReLU" 79 | }, 80 | { 81 | "name": "rpn/output_rpn_relu/3x3_0_split", 82 | "type": "Split" 83 | }, 84 | { 85 | "macs": 778752, 86 | "name": "rpn_cls_score", 87 | "type": "Convolution" 88 | }, 89 | { 90 | "macs": 1557504, 91 | "name": "rpn_bbox_pred", 92 | "type": "Convolution" 93 | }, 94 | { 95 | "name": "rpn_cls_score_reshape", 96 | "type": "Reshape" 97 | }, 98 | { 99 | "name": "rpn_cls_prob", 100 | "type": "Softmax" 101 | }, 102 | { 103 | "name": "rpn_cls_prob_reshape", 104 | "type": "Reshape" 105 | }, 106 | { 107 | "name": "proposal", 108 | "type": "Python" 109 | }, 110 | { 111 | "name": "roi_pool5", 112 | "type": "ROIPooling" 113 | }, 114 | { 115 | "macs": 75497472, 116 | "name": "fc6", 117 | "type": "InnerProduct" 118 | }, 119 | { 120 | "name": "relu6", 121 | "type": "ReLU" 122 | }, 123 | { 124 | "name": "drop6", 125 | "type": "Dropout" 126 | }, 127 | { 128 | "macs": 4194304, 129 | "name": "fc7", 130 | "type": "InnerProduct" 131 | }, 132 | { 133 | "name": "relu7", 134 | "type": "ReLU" 135 | }, 136 | { 137 | "name": "drop7", 138 | "type": "Dropout" 139 | }, 140 | { 141 | "name": "fc7_drop7_0_split", 142 | "type": "Split" 143 | }, 144 | { 145 | "macs": 21504, 146 | "name": "cls_score", 147 | "type": "InnerProduct" 148 | }, 149 | { 150 | "macs": 86016, 151 | "name": "bbox_pred", 152 | "type": "InnerProduct" 153 | }, 154 | { 155 | "name": "cls_prob", 156 | "type": "Softmax" 157 | } 158 | ] 159 | -------------------------------------------------------------------------------- /nets/VGG_ILSVRC_16_layers_deploy.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "name": "input", 4 | "type": "Input" 5 | }, 6 | { 7 | "macs": 86704128, 8 | "name": "conv1_1", 9 | "type": "Convolution" 10 | }, 11 | { 12 | "name": "relu1_1", 13 | "type": "ReLU" 14 | }, 15 | { 16 | "macs": 1849688064, 17 | "name": "conv1_2", 18 | "type": "Convolution" 19 | }, 20 | { 21 | "name": "relu1_2", 22 | "type": "ReLU" 23 | }, 24 | { 25 | "name": "pool1", 26 | "type": "Pooling" 27 | }, 28 | { 29 | "macs": 924844032, 30 | "name": "conv2_1", 31 | "type": "Convolution" 32 | }, 33 | { 34 | "name": "relu2_1", 35 | "type": "ReLU" 36 | }, 37 | { 38 | "macs": 1849688064, 39 | "name": "conv2_2", 40 | "type": "Convolution" 41 | }, 42 | { 43 | "name": "relu2_2", 44 | "type": "ReLU" 45 | }, 46 | { 47 | "name": "pool2", 48 | "type": "Pooling" 49 | }, 50 | { 51 | "macs": 924844032, 52 | "name": "conv3_1", 53 | "type": "Convolution" 54 | }, 55 | { 56 | "name": "relu3_1", 57 | "type": "ReLU" 58 | }, 59 | { 60 | "macs": 1849688064, 61 | "name": "conv3_2", 62 | "type": "Convolution" 63 | }, 64 | { 65 | "name": "relu3_2", 66 | "type": "ReLU" 67 | }, 68 | { 69 | "macs": 1849688064, 70 | "name": "conv3_3", 71 | "type": "Convolution" 72 | }, 73 | { 74 | "name": "relu3_3", 75 | "type": "ReLU" 76 | }, 77 | { 78 | "name": "pool3", 79 | "type": "Pooling" 80 | }, 81 | { 82 | "macs": 924844032, 83 | "name": "conv4_1", 84 | "type": "Convolution" 85 | }, 86 | { 87 | "name": "relu4_1", 88 | "type": "ReLU" 89 | }, 90 | { 91 | "macs": 1849688064, 92 | "name": "conv4_2", 93 | "type": "Convolution" 94 | }, 95 | { 96 | "name": "relu4_2", 97 | "type": "ReLU" 98 | }, 99 | { 100 | "macs": 1849688064, 101 | "name": "conv4_3", 102 | "type": "Convolution" 103 | }, 104 | { 105 | "name": "relu4_3", 106 | "type": "ReLU" 107 | }, 108 | { 109 | "name": "pool4", 110 | "type": "Pooling" 111 | }, 112 | { 113 | "macs": 462422016, 114 | "name": "conv5_1", 115 | "type": "Convolution" 116 | }, 117 | { 118 | "name": "relu5_1", 119 | "type": "ReLU" 120 | }, 121 | { 122 | "macs": 462422016, 123 | "name": "conv5_2", 124 | "type": "Convolution" 125 | }, 126 | { 127 | "name": "relu5_2", 128 | "type": "ReLU" 129 | }, 130 | { 131 | "macs": 462422016, 132 | "name": "conv5_3", 133 | "type": "Convolution" 134 | }, 135 | { 136 | "name": "relu5_3", 137 | "type": "ReLU" 138 | }, 139 | { 140 | "name": "pool5", 141 | "type": "Pooling" 142 | }, 143 | { 144 | "macs": 102760448, 145 | "name": "fc6", 146 | "type": "InnerProduct" 147 | }, 148 | { 149 | "name": "relu6", 150 | "type": "ReLU" 151 | }, 152 | { 153 | "name": "drop6", 154 | "type": "Dropout" 155 | }, 156 | { 157 | "macs": 16777216, 158 | "name": "fc7", 159 | "type": "InnerProduct" 160 | }, 161 | { 162 | "name": "relu7", 163 | "type": "ReLU" 164 | }, 165 | { 166 | "name": "drop7", 167 | "type": "Dropout" 168 | }, 169 | { 170 | "macs": 4096000, 171 | "name": "fc8", 172 | "type": "InnerProduct" 173 | }, 174 | { 175 | "name": "prob", 176 | "type": "Softmax" 177 | } 178 | ] 179 | -------------------------------------------------------------------------------- /nets/faster_rcnn_vgg16.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "name": "input", 4 | "type": "Input" 5 | }, 6 | { 7 | "macs": 86704128, 8 | "name": "conv1_1", 9 | "type": "Convolution" 10 | }, 11 | { 12 | "name": "relu1_1", 13 | "type": "ReLU" 14 | }, 15 | { 16 | "macs": 1849688064, 17 | "name": "conv1_2", 18 | "type": "Convolution" 19 | }, 20 | { 21 | "name": "relu1_2", 22 | "type": "ReLU" 23 | }, 24 | { 25 | "name": "pool1", 26 | "type": "Pooling" 27 | }, 28 | { 29 | "macs": 924844032, 30 | "name": "conv2_1", 31 | "type": "Convolution" 32 | }, 33 | { 34 | "name": "relu2_1", 35 | "type": "ReLU" 36 | }, 37 | { 38 | "macs": 1849688064, 39 | "name": "conv2_2", 40 | "type": "Convolution" 41 | }, 42 | { 43 | "name": "relu2_2", 44 | "type": "ReLU" 45 | }, 46 | { 47 | "name": "pool2", 48 | "type": "Pooling" 49 | }, 50 | { 51 | "macs": 924844032, 52 | "name": "conv3_1", 53 | "type": "Convolution" 54 | }, 55 | { 56 | "name": "relu3_1", 57 | "type": "ReLU" 58 | }, 59 | { 60 | "macs": 1849688064, 61 | "name": "conv3_2", 62 | "type": "Convolution" 63 | }, 64 | { 65 | "name": "relu3_2", 66 | "type": "ReLU" 67 | }, 68 | { 69 | "macs": 1849688064, 70 | "name": "conv3_3", 71 | "type": "Convolution" 72 | }, 73 | { 74 | "name": "relu3_3", 75 | "type": "ReLU" 76 | }, 77 | { 78 | "name": "pool3", 79 | "type": "Pooling" 80 | }, 81 | { 82 | "macs": 924844032, 83 | "name": "conv4_1", 84 | "type": "Convolution" 85 | }, 86 | { 87 | "name": "relu4_1", 88 | "type": "ReLU" 89 | }, 90 | { 91 | "macs": 1849688064, 92 | "name": "conv4_2", 93 | "type": "Convolution" 94 | }, 95 | { 96 | "name": "relu4_2", 97 | "type": "ReLU" 98 | }, 99 | { 100 | "macs": 1849688064, 101 | "name": "conv4_3", 102 | "type": "Convolution" 103 | }, 104 | { 105 | "name": "relu4_3", 106 | "type": "ReLU" 107 | }, 108 | { 109 | "name": "pool4", 110 | "type": "Pooling" 111 | }, 112 | { 113 | "macs": 462422016, 114 | "name": "conv5_1", 115 | "type": "Convolution" 116 | }, 117 | { 118 | "name": "relu5_1", 119 | "type": "ReLU" 120 | }, 121 | { 122 | "macs": 462422016, 123 | "name": "conv5_2", 124 | "type": "Convolution" 125 | }, 126 | { 127 | "name": "relu5_2", 128 | "type": "ReLU" 129 | }, 130 | { 131 | "macs": 462422016, 132 | "name": "conv5_3", 133 | "type": "Convolution" 134 | }, 135 | { 136 | "name": "relu5_3", 137 | "type": "ReLU" 138 | }, 139 | { 140 | "name": "conv5_3_relu5_3_0_split", 141 | "type": "Split" 142 | }, 143 | { 144 | "macs": 462422016, 145 | "name": "rpn_conv/3x3", 146 | "type": "Convolution" 147 | }, 148 | { 149 | "name": "rpn_relu/3x3", 150 | "type": "ReLU" 151 | }, 152 | { 153 | "name": "rpn/output_rpn_relu/3x3_0_split", 154 | "type": "Split" 155 | }, 156 | { 157 | "macs": 1806336, 158 | "name": "rpn_cls_score", 159 | "type": "Convolution" 160 | }, 161 | { 162 | "macs": 3612672, 163 | "name": "rpn_bbox_pred", 164 | "type": "Convolution" 165 | }, 166 | { 167 | "name": "rpn_cls_score_reshape", 168 | "type": "Reshape" 169 | }, 170 | { 171 | "name": "rpn_cls_prob", 172 | "type": "Softmax" 173 | }, 174 | { 175 | "name": "rpn_cls_prob_reshape", 176 | "type": "Reshape" 177 | }, 178 | { 179 | "name": "proposal", 180 | "type": "Python" 181 | }, 182 | { 183 | "name": "roi_pool5", 184 | "type": "ROIPooling" 185 | }, 186 | { 187 | "macs": 102760448, 188 | "name": "fc6", 189 | "type": "InnerProduct" 190 | }, 191 | { 192 | "name": "relu6", 193 | "type": "ReLU" 194 | }, 195 | { 196 | "name": "drop6", 197 | "type": "Dropout" 198 | }, 199 | { 200 | "macs": 16777216, 201 | "name": "fc7", 202 | "type": "InnerProduct" 203 | }, 204 | { 205 | "name": "relu7", 206 | "type": "ReLU" 207 | }, 208 | { 209 | "name": "drop7", 210 | "type": "Dropout" 211 | }, 212 | { 213 | "name": "fc7_drop7_0_split", 214 | "type": "Split" 215 | }, 216 | { 217 | "macs": 86016, 218 | "name": "cls_score", 219 | "type": "InnerProduct" 220 | }, 221 | { 222 | "macs": 344064, 223 | "name": "bbox_pred", 224 | "type": "InnerProduct" 225 | }, 226 | { 227 | "name": "cls_prob", 228 | "type": "Softmax" 229 | } 230 | ] 231 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | First-Order Deep Learning Accelerator Model (FODLAM) 2 | ==================================================== 3 | 4 | FODLAM is a quick, easy model for the power and performance of modern hardware implementations of deep neural networks. It is based on published numbers from two papers: 5 | 6 | * ["EIE: Efficient Inference Engine on Compressed Deep Neural Network."](https://arxiv.org/pdf/1602.01528.pdf) 7 | Song Han, Xingyu Liu, Huizi Mao, Jing Pu, Ardavan Pedram, Mark A. Horowitz, and William J. Dally. 8 | In ISCA 2016. 9 | * ["Eyeriss: An Energy-Efficient Reconfigurable Accelerator for Deep Convolutional Neural Networks."](http://www.rle.mit.edu/eems/wp-content/uploads/2016/04/eyeriss_isca_2016.pdf) 10 | Yu-Hsin Chen, Tushar Krishna, Joel S. Emer, and Vivienne Sze. 11 | In J. Solid-State Circuits, January 2017. 12 | 13 | EIE provides the fully-connected layers; Eyeriss provides the convolutional layers. FODLAM only supports these two kinds of layers. 14 | 15 | 16 | Running the Model 17 | ----------------- 18 | 19 | FODLAM is a Python 3 program. It has no other dependencies. 20 | 21 | To specify a DNN, create a JSON file containing two keys: 22 | 23 | * Choose one of these two options to select a network to draw layers from: 24 | * `net`: A built-in network name, either `"VGG16"` or `"AlexNet"`. FODLAM will use precise published numbers. 25 | * `netfile`: The name of a JSON file in the `nets/` directory that describes any CNN. FODLAM will approximate layer costs using scaling. 26 | * `layers`: A list of layer names to enable. 27 | 28 | You can see examples in `config/`. 29 | 30 | Run FODLAM by piping in a configuration file, like this: 31 | 32 | $ python3 fodlam.py < config/vgg16.json 33 | { 34 | "conv": { 35 | "energy": 1.0162585, 36 | "latency": 4.3094 37 | }, 38 | "fc": { 39 | "energy": 9.157180384087789e-05, 40 | "latency": 7.438888888888888e-05 41 | }, 42 | "total": { 43 | "energy": 1.016350071803841, 44 | "latency": 4.309474388888889 45 | } 46 | } 47 | 48 | The results are printed as JSON to stdout. The output consists of the total energy in joules and total latency in seconds. The output includes the total for the entire network, just the convolutional layers, and just the fully-connected layers. 49 | 50 | ### Providing a Network 51 | 52 | FODLAM ships with statistics for a few popular neural networks as JSON files under the `nets/` directory. This JSON file describes the total computational cost of each layer in the network. 53 | 54 | To provide a new network specification, you need to produce a similar JSON file. FODLAM has a tool that can extract these statistics from Caffe models, but unlike FODLAM itself, this tool requires a working Caffe installation. (You can even use a funky hacked-up alternative versions of Caffe, such as [the one for Fast and Faster R-CNN][caffe-fast-rcnn].) See the Makefile in that directory for tips on how to extract a JSON statistics file from your network specification. 55 | 56 | [caffe-fast-rcnn]: https://github.com/rbgirshick/caffe-fast-rcnn 57 | 58 | 59 | How it Works 60 | ------------ 61 | 62 | The model just totals up the latency and energy for each layer in a given configuration. Because both of the source papers measure AlexNet and VGG-16, layers from those networks are supported directly. For other layers, FODLAM can scale the data from those networks. 63 | 64 | ### Process Normalization 65 | 66 | Because Eyeriss and EIE were evaluated on different process technologies, we have to scale one of them to model a single ASIC. Specifically, Eyeriss is on TSMC 65nm and EIE is on TSMC 45nm; we normalize to 65nm. This works by multiplying EIE time by the scaling factor and multiplying the power by the square of the scaling factor---i.e., Dennard scaling, which is admittedly retro. 67 | 68 | ### Power 69 | 70 | While the Eyeriss paper reports per-layer power, the EIE paper does not. Instead, this is how energy is computed (quoting from the paper): 71 | 72 | > Energy is obtained by multiplying computation time and total measured power... 73 | 74 | So the authors assume that power is constant across layers. FODLAM applies the same assumption to compute EIE layer energy. 75 | 76 | ### New Layers 77 | 78 | To estimate the costs for new layer configurations not found in AlexNet or VGG-16, FODLAM can scale the numbers from those networks. Scaling works by getting the number of multiply--accumulate (MAC) operations required to compute each layer. We compute the average cost per MAC among layers of the same type and use that to estimate the cost of a new layer. 79 | 80 | The assumption underlying this scaling technique is that the cost per MAC is close to constant across layers of varying shape. To validate this hypothesis, run FODLAM in diagnosis mode: 81 | 82 | $ python3 fodlam.py --diagnose 83 | 84 | FODLAM will print out the energy and latency per MAC for each layer. Notice that the cost per MAC is different for convolutional and fully-connected layers, but it varies by less than an order of magnitude within each layer type. 85 | 86 | 87 | Data Extraction 88 | --------------- 89 | 90 | To make FODLAM, I extracted raw data from tables in the papers. The raw text files from this extraction are in `raw/`. 91 | 92 | * For EIE, I first used [Tabula][] to extract unstructured CSV data. I extracted tables II, IV, and V. (Table III was not referenced in the text; it just seems to characterize the benchmarks.) 93 | * In the Eyeriss journal paper, the PDF does not have text embedded for the tables. I extracted images of tables III through VI and OCR'd them with [Tesseract][]. There were a lot of errors. 94 | 95 | I then cleaned up the relevant data by hand. The cleaned-up CSVs that FODLAM uses are in `data/`. 96 | 97 | [tabula]: http://tabula.technology 98 | [tesseract]: https://github.com/tesseract-ocr/tesseract 99 | 100 | 101 | Credits 102 | ------- 103 | 104 | This is a research artifact from [Capra][] at Cornell. The license is [MIT][]. If you use FODLAM in a research paper, please cite it: 105 | 106 | @misc{fodlam, 107 | title={{FODLAM}, a first-order deep learning accelerator model}, 108 | author={Adrian Sampson and Mark Buckler}, 109 | note={\url{https://github.com/cucapra/fodlam}. Commit XXX.}, 110 | } 111 | 112 | You can replace that XXX with the Git commit hash for the version of FODLAM you used to help others reproduce your work. 113 | 114 | [capra]: https://capra.cs.cornell.edu 115 | [mit]: https://opensource.org/licenses/MIT 116 | -------------------------------------------------------------------------------- /fodlam.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | from __future__ import division, print_function 3 | 4 | import os 5 | import csv 6 | import json 7 | import sys 8 | from collections import namedtuple 9 | 10 | # The root directory where our data files are. 11 | BASE_DIR = os.path.dirname(__file__) 12 | 13 | # The networks that our accelerators have measurements for. 14 | NETWORKS = ('VGG16', 'AlexNet') 15 | LAYER_KINDS = ('conv', 'fc') 16 | 17 | # Accelerator data files. 18 | DATA_DIR = os.path.join(BASE_DIR, 'data') 19 | EIE_FILE = 'eie-layers.csv' 20 | EYERISS_FILES = { 21 | 'VGG16': 'eyeriss-vgg16.csv', 22 | 'AlexNet': 'eyeriss-alexnet.csv', 23 | } 24 | 25 | # EIE reports latencies in microseconds; Eyeriss in milliseconds. Eyeriss 26 | # reports per-layer power in milliwatts. 27 | EIE_TIME_UNIT = 10 ** (-6) 28 | EYERISS_TIME_UNIT = 10 ** (-3) 29 | EYERISS_POWER_UNIT = 10 ** (-3) 30 | 31 | # Process nodes for published implementations. Both use TSMC processes. 32 | EIE_PROCESS_NM = 45 33 | EYERISS_PROCESS_NM = 65 34 | 35 | # EIE reports only a total design power (in watts). 36 | EIE_POWER = 0.59 37 | 38 | # Data files with neural network statistics. 39 | NETS_DIR = os.path.join(BASE_DIR, 'nets') 40 | NET_FILES = { 41 | 'VGG16': 'VGG_ILSVRC_16_layers_deploy.json', 42 | 'AlexNet': 'alexnet_deploy.json', 43 | } 44 | 45 | # Caffe's names for layer kinds. 46 | CAFFE_KINDS = { 47 | "InnerProduct": "fc", 48 | "Convolution": "conv", 49 | "Deconvolution": "conv", 50 | } 51 | 52 | # Two kinds of layer specs. Lookup layers are precise; we just need to 53 | # look up their costs from the base data. Scale layers are 54 | # approximations; we need to use the average cost per MAC. 55 | LookupLayer = namedtuple('LookupLayer', ['net', 'layer']) 56 | ScaleLayer = namedtuple('ScaleLayer', ['kind', 'macs']) 57 | 58 | 59 | def load_hw_data(): 60 | """Load the published numbers from our data files. Return a dict 61 | with base values reflecting EIE and Eyeriss layer costs. 62 | """ 63 | # Load EIE data (latency only). 64 | eie_latencies = {} 65 | with open(os.path.join(DATA_DIR, EIE_FILE)) as f: 66 | reader = csv.DictReader(f) 67 | for row in reader: 68 | if row['Layer'] == 'Actual Time': 69 | for k, v in row.items(): 70 | # The table has the network and the layer name 71 | # together in one cell. 72 | if ' ' in k: 73 | network, layer = k.split() 74 | if network in NETWORKS: 75 | eie_latencies[network, layer] = \ 76 | float(v) * EIE_TIME_UNIT 77 | 78 | # Load Eyeriss data (latency and energy). 79 | eyeriss = { 80 | 'latency_total': {}, 81 | 'latency_proc': {}, 82 | 'power': {}, 83 | } 84 | for network in NETWORKS: 85 | with open(os.path.join(DATA_DIR, EYERISS_FILES[network])) as f: 86 | reader = csv.DictReader(f) 87 | for row in reader: 88 | layer = row['Layer'] 89 | if layer == 'Total': 90 | continue 91 | eyeriss['latency_total'][network, layer] = \ 92 | float(row['Total Latency (ms)']) * EYERISS_TIME_UNIT 93 | eyeriss['latency_proc'][network, layer] = \ 94 | float(row['Processing Latency (ms)']) * EYERISS_TIME_UNIT 95 | eyeriss['power'][network, layer] = \ 96 | float(row['Power (mW)']) * EYERISS_POWER_UNIT 97 | 98 | return { 'eie': eie_latencies, 'eyeriss': eyeriss } 99 | 100 | 101 | def layer_costs(published): 102 | """Get the latencies (in seconds) and power (in watts) for *all* 103 | layers in VGG-16 by combining EIE and Eyeriss data. 104 | """ 105 | eie_lat = published['eie'] 106 | eyeriss_lat = published['eyeriss']['latency_total'] 107 | eyeriss_pow = published['eyeriss']['power'] 108 | 109 | # Process scaling factor between Eyeriss and EIE. We scale the EIE 110 | # numbers because the magnitudes for Eyeriss are more significant 111 | # and the paper has a more complete evaluation. 112 | proc_scale = EYERISS_PROCESS_NM / EIE_PROCESS_NM 113 | eie_lat_scaled = { k: v * proc_scale for k, v in eie_lat.items() } 114 | eie_power_scaled = EIE_POWER * (proc_scale ** 2) 115 | 116 | # Combine the latencies for all the layers. 117 | latency = dict(eie_lat_scaled) 118 | latency.update(eyeriss_lat) 119 | 120 | # For Eyeriss, we have per-layer power numbers. For EIE, from the paper: 121 | # "Energy is obtained by multiplying computation time and total measured 122 | # power". So we follow their lead and assume constant power. 123 | power = { k: eie_power_scaled for k in eie_lat } 124 | power.update(eyeriss_pow) 125 | 126 | return latency, power 127 | 128 | 129 | def norm_layer_name(name): 130 | """Some heuristics to normalize a layer name from multiple sources. 131 | 132 | For example, some depictions of VGG-16 use use upper case; others 133 | use lower case. Some use hyphens; others use underscores. These 134 | heuristics are by no means complete, but they increase the 135 | likelihood that layer names from multiple sources will align. 136 | """ 137 | return name.upper().replace('_', '-') 138 | 139 | 140 | def load_net(filename): 141 | """Load layer statistics for a single network from a JSON file. 142 | Return a mapping from layer names to ScaleLayer tuples. 143 | """ 144 | with open(os.path.join(NETS_DIR, filename)) as f: 145 | layers = json.load(f) 146 | 147 | # Flatten the list of layer statistics dictionaries into a 148 | # name-to-number mapping. 149 | out = {} 150 | for layer in layers: 151 | if 'macs' in layer: 152 | name = norm_layer_name(layer['name']) 153 | kind = CAFFE_KINDS[layer['type']] 154 | out[name] = ScaleLayer(kind, layer['macs']) 155 | return out 156 | 157 | 158 | def load_net_data(): 159 | """Load statistics about the neural networks from our description 160 | files. Return mappings from layer names to ScaleLayers. 161 | """ 162 | return { network: load_net(filename) 163 | for network, filename in NET_FILES.items() } 164 | 165 | 166 | def scaling_ratios(net_data, costs): 167 | """Get the scaling ratio---the cost per MAC---for convolutional and 168 | fully-connected layers with the given cost set. 169 | """ 170 | # Total numerators and denominators. 171 | totals = { 172 | 'conv': { 'cost': 0, 'macs': 0 }, 173 | 'fc': { 'cost': 0, 'macs': 0 }, 174 | } 175 | 176 | # Sum up the cost and MAC counts for each layer type. 177 | for net, layer_stats in net_data.items(): 178 | for layer, stats in layer_stats.items(): 179 | cost = costs[net, layer] 180 | totals[stats.kind]['macs'] += stats.macs 181 | totals[stats.kind]['cost'] += cost 182 | 183 | # Return ratios. 184 | return { k: v['cost'] / v['macs'] for k, v in totals.items() } 185 | 186 | 187 | def dict_product(a, b): 188 | """Pointwise-multiply the values in two dicts with identical sets of 189 | keys. 190 | """ 191 | assert set(a.keys()) == set(b.keys()) 192 | return { k: v * b[k] for k, v in a.items() } 193 | 194 | 195 | def load_config(config_file): 196 | """Load a neural network configuration from a file-like object. 197 | Return a set of enabled layers, which are instances of either 198 | `LookupLayer` or `ScaleLayer`. 199 | """ 200 | config = json.load(config_file) 201 | if "net" in config: 202 | # A "built-in" (precise) network. 203 | return [LookupLayer(config["net"], norm_layer_name(l)) 204 | for l in config['layers']] 205 | return layers 206 | 207 | elif "netfile" in config: 208 | # A "new" (scaled) network. Load the statistics for this network 209 | # from its file. 210 | net_stats = load_net(config["netfile"]) 211 | return [net_stats[norm_layer_name(l)] for l in config['layers']] 212 | 213 | else: 214 | assert False 215 | 216 | 217 | def load_params(): 218 | """Load and set up all the parameters for the model. 219 | 220 | Return the latency and energy cost mappings and the network shape 221 | statistics. 222 | """ 223 | # Load the hardware cost data. 224 | published_data = load_hw_data() 225 | latency, power = layer_costs(published_data) 226 | energy = dict_product(latency, power) 227 | 228 | # Load the network information. 229 | net_data = load_net_data() 230 | 231 | return latency, energy, net_data 232 | 233 | 234 | def layer_kind(name): 235 | """Return a short string indicating the kind of the named layer. 236 | """ 237 | if name.startswith('CONV'): 238 | return 'conv' 239 | elif name.startswith('FC'): 240 | return 'fc' 241 | assert False 242 | 243 | 244 | def model(config_file): 245 | """Run the model for a configuration given in the specified file. 246 | """ 247 | latency, energy, net_data = load_params() 248 | latency_ratios = scaling_ratios(net_data, latency) 249 | energy_ratios = scaling_ratios(net_data, energy) 250 | 251 | # Load the configuration we're modeling. 252 | layers = load_config(config_file) 253 | 254 | # Initialize accumulators. 255 | totals = {} 256 | for kind in LAYER_KINDS: 257 | totals[kind] = { 'energy': 0.0, 'latency': 0.0 } 258 | 259 | # Add the cost for each layer. 260 | for layer in layers: 261 | if isinstance(layer, LookupLayer): 262 | # Use the built-in numbers for this layer. 263 | kind = layer_kind(layer.layer) 264 | totals[kind]['energy'] += energy[layer] 265 | totals[kind]['latency'] += latency[layer] 266 | 267 | elif isinstance(layer, ScaleLayer): 268 | # Scale the average costs. 269 | energy = energy_ratios[layer.kind] * layer.macs 270 | latency = latency_ratios[layer.kind] * layer.macs 271 | totals[layer.kind]['energy'] += energy 272 | totals[layer.kind]['latency'] += latency 273 | 274 | # Grand totals. 275 | totals['total'] = { 276 | 'energy': sum(totals[k]['energy'] for k in LAYER_KINDS), 277 | 'latency': sum(totals[k]['latency'] for k in LAYER_KINDS), 278 | } 279 | 280 | return totals 281 | 282 | 283 | def diagnose_scaled_cost(net_data, costs): 284 | """Get information for diagnosing FODLAM's scaling logic for a 285 | particular cost dimension. 286 | 287 | For the given cost mapping, return the cost per MAC of each layer 288 | for each model. 289 | """ 290 | out = {} 291 | for net, layer_macs in net_data.items(): 292 | net_costs = {} 293 | for layer, macs in layer_macs.items(): 294 | cost = costs[net, layer] 295 | cost_per_mac = cost / macs 296 | net_costs[layer] = cost_per_mac 297 | out[net] = net_costs 298 | return out 299 | 300 | 301 | def diagnose_scaling(): 302 | """Get per-MAC costs for the latency and energy of each layer and 303 | overall averages. 304 | """ 305 | latency, energy, net_data = load_params() 306 | return { 307 | 'per_layer': { 308 | 'latency': diagnose_scaled_cost(net_data, latency), 309 | 'energy': diagnose_scaled_cost(net_data, energy), 310 | }, 311 | 'average': { 312 | 'latency': scaling_ratios(net_data, latency), 313 | 'energy': scaling_ratios(net_data, energy), 314 | }, 315 | } 316 | 317 | 318 | if __name__ == '__main__': 319 | if sys.argv[1:] and sys.argv[1] == '--diagnose': 320 | out = diagnose_scaling() 321 | else: 322 | out = model(sys.stdin) 323 | print(json.dumps(out, sort_keys=True, indent=2)) 324 | --------------------------------------------------------------------------------