├── nets
    ├── .gitignore
    ├── README.md
    ├── Makefile
    ├── alexnet_deploy.json
    ├── VGG_CNN_M_deploy.json
    ├── extract.py
    ├── faster_rcnn_vggm.json
    ├── VGG_ILSVRC_16_layers_deploy.json
    └── faster_rcnn_vgg16.json
├── config
    ├── vgg16-fconly.json
    ├── alexnet.json
    ├── vgg-m.json
    ├── alexnet-scale.json
    ├── faster-rcnn-vgg-m.json
    ├── vgg16-after-pool1.json
    ├── vgg16.json
    ├── vgg16-scale.json
    └── faster-rcnn-vgg16.json
├── data
    ├── eie-layers.csv
    ├── eyeriss-alexnet.csv
    └── eyeriss-vgg16.csv
├── raw
    ├── eyeriss-tableV.txt
    ├── eyeriss-tableIII.txt
    ├── eyeriss-tableIV.txt
    ├── eyeriss-tableVI.txt
    └── eie-raw.csv
├── LICENSE
├── README.md
└── fodlam.py


/nets/.gitignore:
--------------------------------------------------------------------------------
1 | caffe.proto
2 | caffe_pb2.py
3 | *.prototxt
4 | 


--------------------------------------------------------------------------------
/config/vgg16-fconly.json:
--------------------------------------------------------------------------------
1 | {
2 |     "net": "VGG16",
3 |     "layers": [
4 |         "FC6",
5 |         "FC7",
6 |         "FC8"
7 |     ]
8 | }
9 | 


--------------------------------------------------------------------------------
/data/eie-layers.csv:
--------------------------------------------------------------------------------
1 | Layer,AlexNet FC6,AlexNet FC7,AlexNet FC8,VGG16 FC6,VGG16 FC7,VGG16 FC8,NT- We,NT- Wd,NT- LSTM
2 | Theoretical Time,28.1,11.7,8.9,28.1,7.9,7.3,5.2,13.0,6.5
3 | Actual Time,30.3,12.2,9.9,34.4,8.7,8.4,8.0,13.9,7.5
4 | 


--------------------------------------------------------------------------------
/config/alexnet.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "net": "AlexNet",
 3 |     "layers": [
 4 |         "CONV1",
 5 |         "CONV2",
 6 |         "CONV3",
 7 |         "CONV4",
 8 |         "CONV5",
 9 |         "FC6",
10 |         "FC7",
11 |         "FC8"
12 |     ]
13 | }
14 | 


--------------------------------------------------------------------------------
/config/vgg-m.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "netfile": "VGG_CNN_M_deploy.json",
 3 |     "layers": [
 4 |         "conv1",
 5 |         "conv2",
 6 |         "conv3",
 7 |         "conv4",
 8 |         "conv5",
 9 |         "fc6",
10 |         "fc7",
11 |         "fc8"
12 |     ]
13 | }
14 | 


--------------------------------------------------------------------------------
/config/alexnet-scale.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "netfile": "alexnet_deploy.json",
 3 |     "layers": [
 4 |         "CONV1",
 5 |         "CONV2",
 6 |         "CONV3",
 7 |         "CONV4",
 8 |         "CONV5",
 9 |         "FC6",
10 |         "FC7",
11 |         "FC8"
12 |     ]
13 | }
14 | 


--------------------------------------------------------------------------------
/nets/README.md:
--------------------------------------------------------------------------------
1 | FODLAM Network Statistics
2 | =========================
3 | 
4 | This directory contains cost statistics extracted from network specifications from around the Web.
5 | 
6 | The directory also contains the infrastructure used to fetch the data and perform the configuration, but the JSON files are included here to make FODLAM self-contained.
7 | 


--------------------------------------------------------------------------------
/config/faster-rcnn-vgg-m.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "netfile": "faster_rcnn_vggm.json",
 3 |     "layers": [
 4 |         "conv1",
 5 |         "conv2",
 6 |         "conv3",
 7 |         "conv4",
 8 |         "conv5",
 9 |         "rpn_conv/3x3",
10 |         "rpn_cls_score",
11 |         "rpn_bbox_pred",
12 |         "fc6",
13 |         "fc7",
14 |         "cls_score",
15 |         "bbox_pred"
16 |     ]
17 | }
18 | 


--------------------------------------------------------------------------------
/config/vgg16-after-pool1.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "net": "VGG16",
 3 |     "layers": [
 4 |         "CONV2-1",
 5 |         "CONV2-2",
 6 |         "CONV3-1",
 7 |         "CONV3-2",
 8 |         "CONV3-3",
 9 |         "CONV4-1",
10 |         "CONV4-2",
11 |         "CONV4-3",
12 |         "CONV5-1",
13 |         "CONV5-2",
14 |         "CONV5-3",
15 |         "FC6",
16 |         "FC7",
17 |         "FC8"
18 |     ]
19 | }
20 | 


--------------------------------------------------------------------------------
/config/vgg16.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "net": "VGG16",
 3 |     "layers": [
 4 |         "CONV1-1",
 5 |         "CONV1-2",
 6 |         "CONV2-1",
 7 |         "CONV2-2",
 8 |         "CONV3-1",
 9 |         "CONV3-2",
10 |         "CONV3-3",
11 |         "CONV4-1",
12 |         "CONV4-2",
13 |         "CONV4-3",
14 |         "CONV5-1",
15 |         "CONV5-2",
16 |         "CONV5-3",
17 |         "FC6",
18 |         "FC7",
19 |         "FC8"
20 |     ]
21 | }
22 | 


--------------------------------------------------------------------------------
/config/vgg16-scale.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "netfile": "VGG_ILSVRC_16_layers_deploy.json",
 3 |     "layers": [
 4 |         "CONV1-1",
 5 |         "CONV1-2",
 6 |         "CONV2-1",
 7 |         "CONV2-2",
 8 |         "CONV3-1",
 9 |         "CONV3-2",
10 |         "CONV3-3",
11 |         "CONV4-1",
12 |         "CONV4-2",
13 |         "CONV4-3",
14 |         "CONV5-1",
15 |         "CONV5-2",
16 |         "CONV5-3",
17 |         "FC6",
18 |         "FC7",
19 |         "FC8"
20 |     ]
21 | }
22 | 


--------------------------------------------------------------------------------
/data/eyeriss-alexnet.csv:
--------------------------------------------------------------------------------
1 | Layer,Power (mW),Total Latency (ms),Processing Latency (ms),Num. of MACs (G),Num. of Active PEs,Zeros in Ifmaps,Global Buffer Accesses (MB),DRAM Accesses (MB)
2 | CONV1,332,20.9 ,16.5, 0.42,154,0.01%, 18.5, 5.0
3 | CONV2,288,41.9 ,39.2, 0.90,135,38.7%, 77.6, 4.0
4 | CONV3,266,23.6 ,21.8, 0.60,156,72.5%, 50.2, 3.0
5 | CONV4,235,18.4 ,16.0, 0.45,156,79.3%, 37.4, 2.1
6 | CONV5,236,10.5 ,10.0, 0.30,156,77.6%, 24.9, 1.3
7 | Total,278,115.3,103.5,2.66,148,57.53%,208.5,15.4
8 | 


--------------------------------------------------------------------------------
/config/faster-rcnn-vgg16.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "netfile": "faster_rcnn_vgg16.json",
 3 |     "layers": [
 4 |         "conv1_1",
 5 |         "conv1_2",
 6 |         "conv2_1",
 7 |         "conv2_2",
 8 |         "conv3_1",
 9 |         "conv3_2",
10 |         "conv3_3",
11 |         "conv4_1",
12 |         "conv4_2",
13 |         "conv4_3",
14 |         "conv5_1",
15 |         "conv5_2",
16 |         "conv5_3",
17 |         "rpn_conv/3x3",
18 |         "rpn_cls_score",
19 |         "rpn_bbox_pred",
20 |         "fc6",
21 |         "fc7",
22 |         "cls_score",
23 |         "bbox_pred"
24 |     ]
25 | }
26 | 


--------------------------------------------------------------------------------
/raw/eyeriss-tableV.txt:
--------------------------------------------------------------------------------
 1 |  
 2 | 
 3 |  
 4 | 
 5 |  
 6 | 
 7 |  
 8 | 
 9 |  
10 | 
11 |  
12 | 
13 |  
14 | 
15 |  
16 | 
17 | Lav" iner 10ml Proceseing Num. of Num. «1' Zem in Glnhnl Buﬂ'. DRAM
18 | 
19 | ’ (mW) Lamncy (Ills) Latency (Ills) MACS Active PEs Ifmaps (%) Accesses Awesses
20 | 
21 | CONVI 332 209 165 0420 154(92%) (mm; 18.5 MB 50 MB
22 | CONVZ 288 41.9 39 2 0.906 135 (80%) 38.7% 77.6 MB
23 | CONV3 266 23.6 21.3 0.600 156 (93%) 72. % 5 2 MB
24 | CONV4 235 18.4 16.0 0.455 156 (93%) 79.3 70 37 4 MB
25 | CONVS 236 10.5 10.0 0.300 [56 (93%) 77.6% 24 9 MB
26 | Total 278 115.3 1035 2.666 143 (88%) 5753% 208.5 MB
27 | 
28 |  
29 | 
30 |  
31 | 
32 |  
33 | 
34 |  
35 | 
36 |  
37 | 
38 |  
39 | 
40 |  
41 | 
42 |  
43 | 
44 |  
45 | 
46 |  
47 | 
48 |  
49 | 
50 | 


--------------------------------------------------------------------------------
/raw/eyeriss-tableIII.txt:
--------------------------------------------------------------------------------
 1 |  
 2 | 
 3 | CNN Shape Parameom
 4 | 
 5 | RS Dataﬂuw Mapping Paramenm
 6 | 
 7 | Global Buffer Allocation
 8 | 
 9 |  
10 | 
11 |  
12 | 
13 |  
14 | 
15 |  
16 | 
17 |  
18 | 
19 |  
20 | 
21 |  
22 | 
23 |  
24 | 
25 |  
26 | 
27 |  
28 | 
29 |  
30 | 
31 |  
32 | 
33 |  
34 | 
35 |  
36 | 
37 |  
38 | 
39 |  
40 | 
41 |  
42 | 
43 |  
44 | 
45 |  
46 | 
47 |  
48 | 
49 |  
50 | 
51 |  
52 | 
53 | I‘m' H/W‘ R/S E/F C M U m n e p q r 2 ifmup psum
54 | CONVI 227 11 55 3 96 4 96 1 7 16 1 1 2 155103 721103
55 | CONVZ 3| 5 27 48 256 I 64 l 27 16 2 l l 3,8KE 9LIKB
56 | CONva 15 3 13 256 334 1 64 4 13 16 4 1 4 70103 345105
57 | CONVA 15 3 13 192 3134 1 64 4 13 16 3 2 2 1115103 84.5KB
58 | CONVS 15 3 13 192 256 1 64 4 13 16 3 2 2 105105 345103
59 | 
60 |  
61 | 
62 |  
63 | 
64 | 


--------------------------------------------------------------------------------
/data/eyeriss-vgg16.csv:
--------------------------------------------------------------------------------
 1 | Layer,Power (mW),Total Latency (ms),Processing Latency (ms),MACs (G),PEs,Zeros,Global Buffer Accesses (MB),DRAM Accesses (MB)
 2 | CONV1-1,247,76.2,38.0,0.26,156,1.6%,112.6,15.4
 3 | CONV1-2,218,910.3,810.6,5.55,156,47.7%,2402.8,54.0
 4 | CONV2-1,242,470.3,405.3,2.77,156,24.8%,1201.4,33.4
 5 | CONV2-2,231,894.3,810.8,5.55,156,38.7%,2402.8,48.5
 6 | CONV3-1,254,241.1,204.0,2.77,156,39.7%,607.4,20.2
 7 | CONV3-2,235,460.9,408.1,5.55,156,58.1%,1214.8,32.2
 8 | CONV3-3,233,457.7,408.1,5.55,156,58.7%,1214.8,30.8
 9 | CONV4-1,278,135.8,105.1,2.77,168,64.3%,321.8,17.8
10 | CONV4-2,261,254.8,210.0,5.55,168,74.7%,643.7,28.6
11 | CONV4-3,240,246.3,210.0,5.55,168,85.4%,643.7,22.8
12 | CONV5-1,258,54.3,48.3,1.39,163,79.4%,90.0,6.3
13 | CONV5-2,236,53.7,48.5,1.39,168,87.4%,90.0,5.7
14 | CONV5-3,230,53.7,48.5,1.39,168,88.5%,90.0,5.6
15 | Total,236,4309.5,3755.2,46.04,158,58.6%,11035.8,321.1
16 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | The MIT License
 2 | 
 3 | Copyright (c) 2017 Adrian Sampson
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in
13 | all copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21 | THE SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/nets/Makefile:
--------------------------------------------------------------------------------
 1 | # Network specifications from the Web.
 2 | 
 3 | NETS := VGG_CNN_M_deploy VGG_ILSVRC_16_layers_deploy alexnet_deploy \
 4 | 	faster_rcnn_vgg16 faster_rcnn_vggm
 5 | 
 6 | .PHONY: all clean
 7 | all: $(NETS:%=%.json)
 8 | clean:
 9 | 	rm -f $(NETS:%=%.prototxt) $(NETS:%=%.json)
10 | 
11 | # Prototxt from the web.
12 | 
13 | VGG_CNN_M_deploy.prototxt:
14 | 	curl -LO "https://gist.githubusercontent.com/ksimonyan/f194575702fae63b2829/raw/6516d9be2064680697ee6791d8de77cfc0dd0990/VGG_CNN_M_deploy.prototxt"
15 | 
16 | VGG_ILSVRC_16_layers_deploy.prototxt:
17 | 	curl -LO "https://gist.githubusercontent.com/ksimonyan/211839e770f7b538e2d8/raw/ded9363bd93ec0c770134f4e387d8aaaaa2407ce/VGG_ILSVRC_16_layers_deploy.prototxt"
18 | 
19 | alexnet_deploy.prototxt:
20 | 	curl -L -o $@ "https://raw.githubusercontent.com/BVLC/caffe/master/models/bvlc_alexnet/deploy.prototxt"
21 | 
22 | faster_rcnn_vgg16.prototxt:
23 | 	curl -L -o $@ "https://raw.githubusercontent.com/rbgirshick/py-faster-rcnn/master/models/pascal_voc/VGG16/faster_rcnn_end2end/test.prototxt"
24 | 
25 | faster_rcnn_vggm.prototxt:
26 | 	curl -L -o $@ "https://raw.githubusercontent.com/rbgirshick/py-faster-rcnn/master/models/pascal_voc/VGG_CNN_M_1024/faster_rcnn_end2end/test.prototxt"
27 | 
28 | # Conversion tool.
29 | 
30 | %.json: %.prototxt
31 | 	python2 extract.py $< > $@
32 | 


--------------------------------------------------------------------------------
/raw/eyeriss-tableIV.txt:
--------------------------------------------------------------------------------
  1 |  
  2 | 
  3 | Technology
  4 | 
  5 | TSMC 65nm LP IPQM
  6 | 
  7 |  
  8 | 
  9 | Chip Size
 10 | 
 11 | 4.0 mm x 4.0 mm
 12 | 
 13 |  
 14 | 
 15 | Core Area
 16 | 
 17 | 35 mm x 35 mm
 18 | 
 19 |  
 20 | 
 21 | Gate Count (logic only)
 22 | 
 23 | ll76k (2-inpm NAND)
 24 | 
 25 |  
 26 | 
 27 | On-Chip SRAM
 28 | 
 29 | 181.5K bytes
 30 | 
 31 |  
 32 | 
 33 | Number of PEs
 34 | 
 35 | 168
 36 | 
 37 |  
 38 | 
 39 | Global Buffer
 40 | 
 41 | IOS‘OK bytes (SRAM)
 42 | 
 43 |  
 44 | 
 45 | Scratch Pads
 46 | (per PE)
 47 | 
 48 | ﬁner wcigms: 44s bytcs (SRAM)
 49 | feature maps: 24 bytes (Registers)
 50 | parlial sums: 48 byles (Regislers)
 51 | 
 52 |  
 53 | 
 54 | Supply Voltage
 55 | 
 56 | core: 0,8271‘17 V
 57 | 1/0: 1.8 V
 58 | 
 59 |  
 60 | 
 61 | Clock Rate
 62 | 
 63 | core: 1007250 MHZ
 64 | link: up 10 90 MHz
 65 | 
 66 |  
 67 | 
 68 | Peak Throughput
 69 | 
 70 | 16‘842‘0 GMACS
 71 | 
 72 |  
 73 | 
 74 | Arithmetic Precision
 75 | 
 76 | 16-bit ﬁxed-poinl
 77 | 
 78 |  
 79 | 
 80 |  
 81 | 
 82 | Namely Supported
 83 | CNN Shapes
 84 | 
 85 |  
 86 | 
 87 | ﬁller height (H): 1712
 88 | 
 89 | ﬁlter width (5): 32
 90 | 
 91 | num. of ﬁlms (M): 71024
 92 | um, of channels (C): 1—1024
 93 | venical smde: 1,2,4
 94 | horizomul S&ridc: Hz
 95 | 
 96 |  
 97 | 
 98 |  
 99 | 
100 |  
101 | 
102 |  
103 | 
104 | 


--------------------------------------------------------------------------------
/raw/eyeriss-tableVI.txt:
--------------------------------------------------------------------------------
 1 |  
 2 | 
 3 |  
 4 | 
 5 |  
 6 | 
 7 |  
 8 | 
 9 |  
10 | 
11 |  
12 | 
13 |  
14 | 
15 |  
16 | 
17 |  
18 | 
19 |  
20 | 
21 |  
22 | 
23 |  
24 | 
25 |  
26 | 
27 |  
28 | 
29 |  
30 | 
31 |  
32 | 
33 |  
34 | 
35 | Layer Power Tuml Pmcexcing Num. «r Num. of Zen: in Ghlml Bun. DRAM
36 | ‘ (mW) Latency (ms) Latency (ms) MACx Active PEx Imam (%) Acmm Accemx
37 | CONVIVI 247 76.2 38 0 0,260 156 (93%) 16% 112.5 MB 154 MB
38 | CONVl»2 218 910.3 810.6 5,556 156 (93%) 47.7% 2402.8 MB 54 0 MB
39 | CONVZVI 242 470.3 405.3 2.776 150 193%) 24.8% 1201.4 MB 334 MB
40 | CONv22 231 894.3 1110.3 5556 156 (93%) 38 7% 2402.3 MB 485 MB
41 | CONV3-1 254 241 1 204.0 2.776 156 (93%) 39.7% 607.4 MB 20 2 MB
42 | CONVBVZ 235 460.9 408.1 5.556 |56 (93%) 53 |% 1214.8 MB 322 MB
43 | CONV373 233 457.7 408.1 5,556 156 (93%) 58.7% |2l4.8 MB 308 MB
44 | CoNv4-1 278 135.3 105.1 2.770 168 (100%) (74.3% 321.3 MB 17 8 MB
45 | CONV4.2 261 254 8 210 0 5 556 168 (100%) 74 7% 643 7 MB 20.6 MB
46 | CONV473 240 246.3 210.0 SSSG 168 (100%) 85.4% 643.7 MB 228 MB
47 | CONVS-l 258 54.3 48.3 1.390 163 (100%) 79.4% 90.0 MB 6 3 MB
48 | CONVSVZ 236 . .7 48 5 1,390 168 (100%) 87 4% 90 0 MB 57 MB
49 | CONV5»3 230 53.7 48.5 1,396 168 (100%) 88.5% 90.0 MB 5 6 ME
50 | Tubal 236 4309.5 3755.2 46.040 158 (94%} 58.6% [1035.8 MB 321.1 MB
51 | 
52 |  
53 | 
54 |  
55 | 
56 |  
57 | 
58 |  
59 | 
60 |  
61 | 
62 |  
63 | 
64 |  
65 | 
66 |  
67 | 
68 |  
69 | 
70 |  
71 | 
72 | 


--------------------------------------------------------------------------------
/nets/alexnet_deploy.json:
--------------------------------------------------------------------------------
  1 | [
  2 |   {
  3 |     "name": "data", 
  4 |     "type": "Input"
  5 |   }, 
  6 |   {
  7 |     "macs": 105415200, 
  8 |     "name": "conv1", 
  9 |     "type": "Convolution"
 10 |   }, 
 11 |   {
 12 |     "name": "relu1", 
 13 |     "type": "ReLU"
 14 |   }, 
 15 |   {
 16 |     "name": "norm1", 
 17 |     "type": "LRN"
 18 |   }, 
 19 |   {
 20 |     "name": "pool1", 
 21 |     "type": "Pooling"
 22 |   }, 
 23 |   {
 24 |     "macs": 223948800, 
 25 |     "name": "conv2", 
 26 |     "type": "Convolution"
 27 |   }, 
 28 |   {
 29 |     "name": "relu2", 
 30 |     "type": "ReLU"
 31 |   }, 
 32 |   {
 33 |     "name": "norm2", 
 34 |     "type": "LRN"
 35 |   }, 
 36 |   {
 37 |     "name": "pool2", 
 38 |     "type": "Pooling"
 39 |   }, 
 40 |   {
 41 |     "macs": 149520384, 
 42 |     "name": "conv3", 
 43 |     "type": "Convolution"
 44 |   }, 
 45 |   {
 46 |     "name": "relu3", 
 47 |     "type": "ReLU"
 48 |   }, 
 49 |   {
 50 |     "macs": 112140288, 
 51 |     "name": "conv4", 
 52 |     "type": "Convolution"
 53 |   }, 
 54 |   {
 55 |     "name": "relu4", 
 56 |     "type": "ReLU"
 57 |   }, 
 58 |   {
 59 |     "macs": 74760192, 
 60 |     "name": "conv5", 
 61 |     "type": "Convolution"
 62 |   }, 
 63 |   {
 64 |     "name": "relu5", 
 65 |     "type": "ReLU"
 66 |   }, 
 67 |   {
 68 |     "name": "pool5", 
 69 |     "type": "Pooling"
 70 |   }, 
 71 |   {
 72 |     "macs": 37748736, 
 73 |     "name": "fc6", 
 74 |     "type": "InnerProduct"
 75 |   }, 
 76 |   {
 77 |     "name": "relu6", 
 78 |     "type": "ReLU"
 79 |   }, 
 80 |   {
 81 |     "name": "drop6", 
 82 |     "type": "Dropout"
 83 |   }, 
 84 |   {
 85 |     "macs": 16777216, 
 86 |     "name": "fc7", 
 87 |     "type": "InnerProduct"
 88 |   }, 
 89 |   {
 90 |     "name": "relu7", 
 91 |     "type": "ReLU"
 92 |   }, 
 93 |   {
 94 |     "name": "drop7", 
 95 |     "type": "Dropout"
 96 |   }, 
 97 |   {
 98 |     "macs": 4096000, 
 99 |     "name": "fc8", 
100 |     "type": "InnerProduct"
101 |   }, 
102 |   {
103 |     "name": "prob", 
104 |     "type": "Softmax"
105 |   }
106 | ]
107 | 


--------------------------------------------------------------------------------
/nets/VGG_CNN_M_deploy.json:
--------------------------------------------------------------------------------
  1 | [
  2 |   {
  3 |     "name": "input", 
  4 |     "type": "Input"
  5 |   }, 
  6 |   {
  7 |     "macs": 167664672, 
  8 |     "name": "conv1", 
  9 |     "type": "Convolution"
 10 |   }, 
 11 |   {
 12 |     "name": "relu1", 
 13 |     "type": "ReLU"
 14 |   }, 
 15 |   {
 16 |     "name": "norm1", 
 17 |     "type": "LRN"
 18 |   }, 
 19 |   {
 20 |     "name": "pool1", 
 21 |     "type": "Pooling"
 22 |   }, 
 23 |   {
 24 |     "macs": 415334400, 
 25 |     "name": "conv2", 
 26 |     "type": "Convolution"
 27 |   }, 
 28 |   {
 29 |     "name": "relu2", 
 30 |     "type": "ReLU"
 31 |   }, 
 32 |   {
 33 |     "name": "norm2", 
 34 |     "type": "LRN"
 35 |   }, 
 36 |   {
 37 |     "name": "pool2", 
 38 |     "type": "Pooling"
 39 |   }, 
 40 |   {
 41 |     "macs": 199360512, 
 42 |     "name": "conv3", 
 43 |     "type": "Convolution"
 44 |   }, 
 45 |   {
 46 |     "name": "relu3", 
 47 |     "type": "ReLU"
 48 |   }, 
 49 |   {
 50 |     "macs": 398721024, 
 51 |     "name": "conv4", 
 52 |     "type": "Convolution"
 53 |   }, 
 54 |   {
 55 |     "name": "relu4", 
 56 |     "type": "ReLU"
 57 |   }, 
 58 |   {
 59 |     "macs": 398721024, 
 60 |     "name": "conv5", 
 61 |     "type": "Convolution"
 62 |   }, 
 63 |   {
 64 |     "name": "relu5", 
 65 |     "type": "ReLU"
 66 |   }, 
 67 |   {
 68 |     "name": "pool5", 
 69 |     "type": "Pooling"
 70 |   }, 
 71 |   {
 72 |     "macs": 75497472, 
 73 |     "name": "fc6", 
 74 |     "type": "InnerProduct"
 75 |   }, 
 76 |   {
 77 |     "name": "relu6", 
 78 |     "type": "ReLU"
 79 |   }, 
 80 |   {
 81 |     "name": "drop6", 
 82 |     "type": "Dropout"
 83 |   }, 
 84 |   {
 85 |     "macs": 16777216, 
 86 |     "name": "fc7", 
 87 |     "type": "InnerProduct"
 88 |   }, 
 89 |   {
 90 |     "name": "relu7", 
 91 |     "type": "ReLU"
 92 |   }, 
 93 |   {
 94 |     "name": "drop7", 
 95 |     "type": "Dropout"
 96 |   }, 
 97 |   {
 98 |     "macs": 4096000, 
 99 |     "name": "fc8", 
100 |     "type": "InnerProduct"
101 |   }, 
102 |   {
103 |     "name": "prob", 
104 |     "type": "Softmax"
105 |   }
106 | ]
107 | 


--------------------------------------------------------------------------------
/raw/eie-raw.csv:
--------------------------------------------------------------------------------
 1 | "",Power (%)(mW),(%),Area (μm2),,
 2 | Total,9.157,,"638,024",,
 3 | memory,5.416,(59.15%),"594,786",,(93.22%)
 4 | clock network,1.874,(20.46%),866,,(0.14%)
 5 | register,1.026,(11.20%),"9,465",,(1.48%)
 6 | combinational,0.841,(9.18%),"8,946",,(1.40%)
 7 | filler cell,,,"23,961",,(3.76%)
 8 | Act queue,0.112,(1.23%),758,,(0.12%)
 9 | PtrRead,1.807,(19.73%),"121,849",,(19.10%)
10 | SpmatRead,4.955,(54.11%),"469,412",,(73.57%)
11 | ArithmUnit,1.162,(12.68%),"3,110",,(0.49%)
12 | ActRW,1.122,(12.25%),"18,934",,(2.97%)
13 | filler cell,,,"23,961",,(3.76%)
14 | NT-Platform Batch Matrix,AlexNet,VGG16,,
15 | Size Type,FC6 FC7,FC8 FC6 FC7,FC8 We Wd,LSTM
16 | 470.5CPU 1 dense,7516.2 6187.1,1134.9 35022.8 5372.8,774.2 605.0 1361.4,
17 | 260.0(Core sparse,3066.5 1282.1,890.5 3774.3 545.1,777.3 261.2 437.4,
18 | 28.8i7-5930k) 64 dense,318.4 188.9,45.8 1056.0 188.3,45.7 28.7 69.0,
19 | sparse,1417.6 682.1,407.7 1780.3 274.9,363.1 117.7 176.4,107.4
20 | 51.91GPU dense sparse,541.5 134.8 243.0 65.8,80.5 54.6 1467.8 167.0 243.0 39.8,80.5 48.0 65 17.7 90.1 41.1,18.5
21 | "(Titan X) 2.564 dense",19.8 8.9,5.9 53.6 8.9,5.9 3.2 2.3,
22 | sparse,94.6 51.5,23.2 121.5 24.4,22.0 10.9 11.0,9.0
23 | 956.91mGPU dense sparse,12437.2 2879.3 5765.0 1256.5,2252.1 837.0 35427.0 4377.2 5544.3 626.3,2243.1 745.1 1316 240.6 2565.5 570.6,315
24 | "(Tegra K1) 95.264 dense",1663.6 2056.8,298.0 2001.4 2050.7,483.9 87.8 956.3,
25 | sparse,4003.9 1372.8,576.7 8024.8 660.2,544.1 236.3 187.7,186.5
26 | EIE Theoretical Actual Time Time,28.1 30.3 11.7 12.2,8.9 9.9 28.1 34.4 7.9 8.7,7.3 8.4 5.2 8.0 13.0 13.9,6.57.5
27 | Platform,Core-i7,GeForce,Tegra,A-Eye,Da-,True-,EIE,EIE
28 | "",5930K,Titan X,K1,[14],DianNao,North,"(ours,","(28nm,"
29 | "",,,,,[11],[40],64PE),256PE)
30 | Year,2014,2015,2014,2015,2014,2014,2016,2016
31 | Platform Type,CPU,GPU,mGPU,FPGA,ASIC,ASIC,ASIC,ASIC
32 | Technology,22nm,28nm,28nm,28nm,28nm,28nm,45nm,28nm
33 | Clock (MHz),3500,1075,852,150,606,Async,800,1200
34 | Memory type,DRAM,DRAM,DRAM,DRAM,eDRAM,SRAM,SRAM,SRAM
35 | Max DNN model size (#Params),<16G,<3G,<500M,<500M,18M,256M,84M,336M
36 | Quantization Stategy,32-bit,32-bit,32-bit,16-bit,16-bit,1-bit,4-bit,4-bit
37 | "",float,float,float,fixed,fixed,fixed,fixed,fixed
38 | Area (mm2),356,601,-,-,67.7,430,40.8,63.8
39 | Power (W),73,159,5.1,9.63,15.97,0.18,0.59,2.36
40 | M×V Throughput (Frames/s),162,"4,115",173,33,"147,938","1,989","81,967","426,230"
41 | Area Efficiency ( Frames/s/mm2),0.46,6.85,-,-,"2,185",4.63,"2,009","6,681"
42 | Energy Efficiency (Frames/J),2.22,25.9,33.9,3.43,"9,263","10,839","138,927","180,606"
43 | 


--------------------------------------------------------------------------------
/nets/extract.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python2
 2 | from __future__ import division, print_function
 3 | 
 4 | import sys
 5 | import caffe
 6 | import json
 7 | 
 8 | 
 9 | def _blob_and_weights(net, layer_name):
10 |     """Get the activation blob and the weights blob for the named layer
11 |     in the Caffe network.
12 |     """
13 |     # Get the activation blob for this layer and its parameters
14 |     # (weights).
15 |     blob = net.blobs[net.top_names[layer_name][0]]
16 |     weights = net.params[layer_name][0]
17 |     return blob, weights
18 | 
19 | 
20 | def extract(model_fn):
21 |     """Extract per-layer cost information from a Caffe model file, given
22 |     as the path to a prototxt specification.
23 | 
24 |     Generate a sequence of dicts with each layer's name, type, and (for
25 |     some kinds of layers) the total number of multiply--accumulate
26 |     operations needed for a (forward) computation of the layer.
27 |     """
28 |     # Load the model from the prototxt file.
29 |     net = caffe.Net(model_fn, caffe.TEST)
30 | 
31 |     for name, layer in zip(net._layer_names, net.layers):
32 |         layer_info = {
33 |             'name': name,
34 |             'type': layer.type,
35 |         }
36 | 
37 |         # Convolutional layers.
38 |         if layer.type in ('Convolution', 'Deconvolution'):
39 |             blob, weights = _blob_and_weights(net, name)
40 | 
41 |             # Extract relevant hyperparameters from the layer's
42 |             # activation and weight buffers.
43 |             layer_height = blob.shape[2]
44 |             layer_width = blob.shape[3]
45 |             in_chan = weights.shape[0]
46 |             out_chan = weights.shape[1]
47 |             kernel_height = weights.shape[2]
48 |             kernel_width = weights.shape[3]
49 | 
50 |             # Compute the total number of multiply--accumulate
51 |             # operations for this convolutional layer.
52 |             num_outputs = layer_width * layer_height * out_chan
53 |             num_macs_per_out = in_chan * kernel_height * kernel_width
54 |             num_macs = num_outputs * num_macs_per_out
55 | 
56 |             layer_info['macs'] = num_macs
57 | 
58 |         # Fully-connected layers.
59 |         elif layer.type == "InnerProduct":
60 |             blob, weights = _blob_and_weights(net, name)
61 | 
62 |             # There is one MAC per "synapse" (i.e., each pairing of an
63 |             # input neuron with an output neuron).
64 |             num_output = weights.shape[0]
65 |             num_input = weights.shape[1]
66 |             num_macs = num_input * num_output
67 | 
68 |             layer_info['macs'] = num_macs
69 | 
70 |         yield layer_info
71 | 
72 | 
73 | if __name__ == '__main__':
74 |     out = list(extract(sys.argv[1]))
75 |     print(json.dumps(out, indent=2, sort_keys=True))
76 | 


--------------------------------------------------------------------------------
/nets/faster_rcnn_vggm.json:
--------------------------------------------------------------------------------
  1 | [
  2 |   {
  3 |     "name": "input", 
  4 |     "type": "Input"
  5 |   }, 
  6 |   {
  7 |     "macs": 167664672, 
  8 |     "name": "conv1", 
  9 |     "type": "Convolution"
 10 |   }, 
 11 |   {
 12 |     "name": "relu1", 
 13 |     "type": "ReLU"
 14 |   }, 
 15 |   {
 16 |     "name": "norm1", 
 17 |     "type": "LRN"
 18 |   }, 
 19 |   {
 20 |     "name": "pool1", 
 21 |     "type": "Pooling"
 22 |   }, 
 23 |   {
 24 |     "macs": 415334400, 
 25 |     "name": "conv2", 
 26 |     "type": "Convolution"
 27 |   }, 
 28 |   {
 29 |     "name": "relu2", 
 30 |     "type": "ReLU"
 31 |   }, 
 32 |   {
 33 |     "name": "norm2", 
 34 |     "type": "LRN"
 35 |   }, 
 36 |   {
 37 |     "name": "pool2", 
 38 |     "type": "Pooling"
 39 |   }, 
 40 |   {
 41 |     "macs": 199360512, 
 42 |     "name": "conv3", 
 43 |     "type": "Convolution"
 44 |   }, 
 45 |   {
 46 |     "name": "relu3", 
 47 |     "type": "ReLU"
 48 |   }, 
 49 |   {
 50 |     "macs": 398721024, 
 51 |     "name": "conv4", 
 52 |     "type": "Convolution"
 53 |   }, 
 54 |   {
 55 |     "name": "relu4", 
 56 |     "type": "ReLU"
 57 |   }, 
 58 |   {
 59 |     "macs": 398721024, 
 60 |     "name": "conv5", 
 61 |     "type": "Convolution"
 62 |   }, 
 63 |   {
 64 |     "name": "relu5", 
 65 |     "type": "ReLU"
 66 |   }, 
 67 |   {
 68 |     "name": "conv5_relu5_0_split", 
 69 |     "type": "Split"
 70 |   }, 
 71 |   {
 72 |     "macs": 199360512, 
 73 |     "name": "rpn_conv/3x3", 
 74 |     "type": "Convolution"
 75 |   }, 
 76 |   {
 77 |     "name": "rpn_relu/3x3", 
 78 |     "type": "ReLU"
 79 |   }, 
 80 |   {
 81 |     "name": "rpn/output_rpn_relu/3x3_0_split", 
 82 |     "type": "Split"
 83 |   }, 
 84 |   {
 85 |     "macs": 778752, 
 86 |     "name": "rpn_cls_score", 
 87 |     "type": "Convolution"
 88 |   }, 
 89 |   {
 90 |     "macs": 1557504, 
 91 |     "name": "rpn_bbox_pred", 
 92 |     "type": "Convolution"
 93 |   }, 
 94 |   {
 95 |     "name": "rpn_cls_score_reshape", 
 96 |     "type": "Reshape"
 97 |   }, 
 98 |   {
 99 |     "name": "rpn_cls_prob", 
100 |     "type": "Softmax"
101 |   }, 
102 |   {
103 |     "name": "rpn_cls_prob_reshape", 
104 |     "type": "Reshape"
105 |   }, 
106 |   {
107 |     "name": "proposal", 
108 |     "type": "Python"
109 |   }, 
110 |   {
111 |     "name": "roi_pool5", 
112 |     "type": "ROIPooling"
113 |   }, 
114 |   {
115 |     "macs": 75497472, 
116 |     "name": "fc6", 
117 |     "type": "InnerProduct"
118 |   }, 
119 |   {
120 |     "name": "relu6", 
121 |     "type": "ReLU"
122 |   }, 
123 |   {
124 |     "name": "drop6", 
125 |     "type": "Dropout"
126 |   }, 
127 |   {
128 |     "macs": 4194304, 
129 |     "name": "fc7", 
130 |     "type": "InnerProduct"
131 |   }, 
132 |   {
133 |     "name": "relu7", 
134 |     "type": "ReLU"
135 |   }, 
136 |   {
137 |     "name": "drop7", 
138 |     "type": "Dropout"
139 |   }, 
140 |   {
141 |     "name": "fc7_drop7_0_split", 
142 |     "type": "Split"
143 |   }, 
144 |   {
145 |     "macs": 21504, 
146 |     "name": "cls_score", 
147 |     "type": "InnerProduct"
148 |   }, 
149 |   {
150 |     "macs": 86016, 
151 |     "name": "bbox_pred", 
152 |     "type": "InnerProduct"
153 |   }, 
154 |   {
155 |     "name": "cls_prob", 
156 |     "type": "Softmax"
157 |   }
158 | ]
159 | 


--------------------------------------------------------------------------------
/nets/VGG_ILSVRC_16_layers_deploy.json:
--------------------------------------------------------------------------------
  1 | [
  2 |   {
  3 |     "name": "input", 
  4 |     "type": "Input"
  5 |   }, 
  6 |   {
  7 |     "macs": 86704128, 
  8 |     "name": "conv1_1", 
  9 |     "type": "Convolution"
 10 |   }, 
 11 |   {
 12 |     "name": "relu1_1", 
 13 |     "type": "ReLU"
 14 |   }, 
 15 |   {
 16 |     "macs": 1849688064, 
 17 |     "name": "conv1_2", 
 18 |     "type": "Convolution"
 19 |   }, 
 20 |   {
 21 |     "name": "relu1_2", 
 22 |     "type": "ReLU"
 23 |   }, 
 24 |   {
 25 |     "name": "pool1", 
 26 |     "type": "Pooling"
 27 |   }, 
 28 |   {
 29 |     "macs": 924844032, 
 30 |     "name": "conv2_1", 
 31 |     "type": "Convolution"
 32 |   }, 
 33 |   {
 34 |     "name": "relu2_1", 
 35 |     "type": "ReLU"
 36 |   }, 
 37 |   {
 38 |     "macs": 1849688064, 
 39 |     "name": "conv2_2", 
 40 |     "type": "Convolution"
 41 |   }, 
 42 |   {
 43 |     "name": "relu2_2", 
 44 |     "type": "ReLU"
 45 |   }, 
 46 |   {
 47 |     "name": "pool2", 
 48 |     "type": "Pooling"
 49 |   }, 
 50 |   {
 51 |     "macs": 924844032, 
 52 |     "name": "conv3_1", 
 53 |     "type": "Convolution"
 54 |   }, 
 55 |   {
 56 |     "name": "relu3_1", 
 57 |     "type": "ReLU"
 58 |   }, 
 59 |   {
 60 |     "macs": 1849688064, 
 61 |     "name": "conv3_2", 
 62 |     "type": "Convolution"
 63 |   }, 
 64 |   {
 65 |     "name": "relu3_2", 
 66 |     "type": "ReLU"
 67 |   }, 
 68 |   {
 69 |     "macs": 1849688064, 
 70 |     "name": "conv3_3", 
 71 |     "type": "Convolution"
 72 |   }, 
 73 |   {
 74 |     "name": "relu3_3", 
 75 |     "type": "ReLU"
 76 |   }, 
 77 |   {
 78 |     "name": "pool3", 
 79 |     "type": "Pooling"
 80 |   }, 
 81 |   {
 82 |     "macs": 924844032, 
 83 |     "name": "conv4_1", 
 84 |     "type": "Convolution"
 85 |   }, 
 86 |   {
 87 |     "name": "relu4_1", 
 88 |     "type": "ReLU"
 89 |   }, 
 90 |   {
 91 |     "macs": 1849688064, 
 92 |     "name": "conv4_2", 
 93 |     "type": "Convolution"
 94 |   }, 
 95 |   {
 96 |     "name": "relu4_2", 
 97 |     "type": "ReLU"
 98 |   }, 
 99 |   {
100 |     "macs": 1849688064, 
101 |     "name": "conv4_3", 
102 |     "type": "Convolution"
103 |   }, 
104 |   {
105 |     "name": "relu4_3", 
106 |     "type": "ReLU"
107 |   }, 
108 |   {
109 |     "name": "pool4", 
110 |     "type": "Pooling"
111 |   }, 
112 |   {
113 |     "macs": 462422016, 
114 |     "name": "conv5_1", 
115 |     "type": "Convolution"
116 |   }, 
117 |   {
118 |     "name": "relu5_1", 
119 |     "type": "ReLU"
120 |   }, 
121 |   {
122 |     "macs": 462422016, 
123 |     "name": "conv5_2", 
124 |     "type": "Convolution"
125 |   }, 
126 |   {
127 |     "name": "relu5_2", 
128 |     "type": "ReLU"
129 |   }, 
130 |   {
131 |     "macs": 462422016, 
132 |     "name": "conv5_3", 
133 |     "type": "Convolution"
134 |   }, 
135 |   {
136 |     "name": "relu5_3", 
137 |     "type": "ReLU"
138 |   }, 
139 |   {
140 |     "name": "pool5", 
141 |     "type": "Pooling"
142 |   }, 
143 |   {
144 |     "macs": 102760448, 
145 |     "name": "fc6", 
146 |     "type": "InnerProduct"
147 |   }, 
148 |   {
149 |     "name": "relu6", 
150 |     "type": "ReLU"
151 |   }, 
152 |   {
153 |     "name": "drop6", 
154 |     "type": "Dropout"
155 |   }, 
156 |   {
157 |     "macs": 16777216, 
158 |     "name": "fc7", 
159 |     "type": "InnerProduct"
160 |   }, 
161 |   {
162 |     "name": "relu7", 
163 |     "type": "ReLU"
164 |   }, 
165 |   {
166 |     "name": "drop7", 
167 |     "type": "Dropout"
168 |   }, 
169 |   {
170 |     "macs": 4096000, 
171 |     "name": "fc8", 
172 |     "type": "InnerProduct"
173 |   }, 
174 |   {
175 |     "name": "prob", 
176 |     "type": "Softmax"
177 |   }
178 | ]
179 | 


--------------------------------------------------------------------------------
/nets/faster_rcnn_vgg16.json:
--------------------------------------------------------------------------------
  1 | [
  2 |   {
  3 |     "name": "input", 
  4 |     "type": "Input"
  5 |   }, 
  6 |   {
  7 |     "macs": 86704128, 
  8 |     "name": "conv1_1", 
  9 |     "type": "Convolution"
 10 |   }, 
 11 |   {
 12 |     "name": "relu1_1", 
 13 |     "type": "ReLU"
 14 |   }, 
 15 |   {
 16 |     "macs": 1849688064, 
 17 |     "name": "conv1_2", 
 18 |     "type": "Convolution"
 19 |   }, 
 20 |   {
 21 |     "name": "relu1_2", 
 22 |     "type": "ReLU"
 23 |   }, 
 24 |   {
 25 |     "name": "pool1", 
 26 |     "type": "Pooling"
 27 |   }, 
 28 |   {
 29 |     "macs": 924844032, 
 30 |     "name": "conv2_1", 
 31 |     "type": "Convolution"
 32 |   }, 
 33 |   {
 34 |     "name": "relu2_1", 
 35 |     "type": "ReLU"
 36 |   }, 
 37 |   {
 38 |     "macs": 1849688064, 
 39 |     "name": "conv2_2", 
 40 |     "type": "Convolution"
 41 |   }, 
 42 |   {
 43 |     "name": "relu2_2", 
 44 |     "type": "ReLU"
 45 |   }, 
 46 |   {
 47 |     "name": "pool2", 
 48 |     "type": "Pooling"
 49 |   }, 
 50 |   {
 51 |     "macs": 924844032, 
 52 |     "name": "conv3_1", 
 53 |     "type": "Convolution"
 54 |   }, 
 55 |   {
 56 |     "name": "relu3_1", 
 57 |     "type": "ReLU"
 58 |   }, 
 59 |   {
 60 |     "macs": 1849688064, 
 61 |     "name": "conv3_2", 
 62 |     "type": "Convolution"
 63 |   }, 
 64 |   {
 65 |     "name": "relu3_2", 
 66 |     "type": "ReLU"
 67 |   }, 
 68 |   {
 69 |     "macs": 1849688064, 
 70 |     "name": "conv3_3", 
 71 |     "type": "Convolution"
 72 |   }, 
 73 |   {
 74 |     "name": "relu3_3", 
 75 |     "type": "ReLU"
 76 |   }, 
 77 |   {
 78 |     "name": "pool3", 
 79 |     "type": "Pooling"
 80 |   }, 
 81 |   {
 82 |     "macs": 924844032, 
 83 |     "name": "conv4_1", 
 84 |     "type": "Convolution"
 85 |   }, 
 86 |   {
 87 |     "name": "relu4_1", 
 88 |     "type": "ReLU"
 89 |   }, 
 90 |   {
 91 |     "macs": 1849688064, 
 92 |     "name": "conv4_2", 
 93 |     "type": "Convolution"
 94 |   }, 
 95 |   {
 96 |     "name": "relu4_2", 
 97 |     "type": "ReLU"
 98 |   }, 
 99 |   {
100 |     "macs": 1849688064, 
101 |     "name": "conv4_3", 
102 |     "type": "Convolution"
103 |   }, 
104 |   {
105 |     "name": "relu4_3", 
106 |     "type": "ReLU"
107 |   }, 
108 |   {
109 |     "name": "pool4", 
110 |     "type": "Pooling"
111 |   }, 
112 |   {
113 |     "macs": 462422016, 
114 |     "name": "conv5_1", 
115 |     "type": "Convolution"
116 |   }, 
117 |   {
118 |     "name": "relu5_1", 
119 |     "type": "ReLU"
120 |   }, 
121 |   {
122 |     "macs": 462422016, 
123 |     "name": "conv5_2", 
124 |     "type": "Convolution"
125 |   }, 
126 |   {
127 |     "name": "relu5_2", 
128 |     "type": "ReLU"
129 |   }, 
130 |   {
131 |     "macs": 462422016, 
132 |     "name": "conv5_3", 
133 |     "type": "Convolution"
134 |   }, 
135 |   {
136 |     "name": "relu5_3", 
137 |     "type": "ReLU"
138 |   }, 
139 |   {
140 |     "name": "conv5_3_relu5_3_0_split", 
141 |     "type": "Split"
142 |   }, 
143 |   {
144 |     "macs": 462422016, 
145 |     "name": "rpn_conv/3x3", 
146 |     "type": "Convolution"
147 |   }, 
148 |   {
149 |     "name": "rpn_relu/3x3", 
150 |     "type": "ReLU"
151 |   }, 
152 |   {
153 |     "name": "rpn/output_rpn_relu/3x3_0_split", 
154 |     "type": "Split"
155 |   }, 
156 |   {
157 |     "macs": 1806336, 
158 |     "name": "rpn_cls_score", 
159 |     "type": "Convolution"
160 |   }, 
161 |   {
162 |     "macs": 3612672, 
163 |     "name": "rpn_bbox_pred", 
164 |     "type": "Convolution"
165 |   }, 
166 |   {
167 |     "name": "rpn_cls_score_reshape", 
168 |     "type": "Reshape"
169 |   }, 
170 |   {
171 |     "name": "rpn_cls_prob", 
172 |     "type": "Softmax"
173 |   }, 
174 |   {
175 |     "name": "rpn_cls_prob_reshape", 
176 |     "type": "Reshape"
177 |   }, 
178 |   {
179 |     "name": "proposal", 
180 |     "type": "Python"
181 |   }, 
182 |   {
183 |     "name": "roi_pool5", 
184 |     "type": "ROIPooling"
185 |   }, 
186 |   {
187 |     "macs": 102760448, 
188 |     "name": "fc6", 
189 |     "type": "InnerProduct"
190 |   }, 
191 |   {
192 |     "name": "relu6", 
193 |     "type": "ReLU"
194 |   }, 
195 |   {
196 |     "name": "drop6", 
197 |     "type": "Dropout"
198 |   }, 
199 |   {
200 |     "macs": 16777216, 
201 |     "name": "fc7", 
202 |     "type": "InnerProduct"
203 |   }, 
204 |   {
205 |     "name": "relu7", 
206 |     "type": "ReLU"
207 |   }, 
208 |   {
209 |     "name": "drop7", 
210 |     "type": "Dropout"
211 |   }, 
212 |   {
213 |     "name": "fc7_drop7_0_split", 
214 |     "type": "Split"
215 |   }, 
216 |   {
217 |     "macs": 86016, 
218 |     "name": "cls_score", 
219 |     "type": "InnerProduct"
220 |   }, 
221 |   {
222 |     "macs": 344064, 
223 |     "name": "bbox_pred", 
224 |     "type": "InnerProduct"
225 |   }, 
226 |   {
227 |     "name": "cls_prob", 
228 |     "type": "Softmax"
229 |   }
230 | ]
231 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | First-Order Deep Learning Accelerator Model (FODLAM)
  2 | ====================================================
  3 | 
  4 | FODLAM is a quick, easy model for the power and performance of modern hardware implementations of deep neural networks. It is based on published numbers from two papers:
  5 | 
  6 | * ["EIE: Efficient Inference Engine on Compressed Deep Neural Network."](https://arxiv.org/pdf/1602.01528.pdf)
  7 |   Song Han, Xingyu Liu, Huizi Mao, Jing Pu, Ardavan Pedram, Mark A. Horowitz, and William J. Dally.
  8 |   In ISCA 2016.
  9 | * ["Eyeriss: An Energy-Efficient Reconfigurable Accelerator for Deep Convolutional Neural Networks."](http://www.rle.mit.edu/eems/wp-content/uploads/2016/04/eyeriss_isca_2016.pdf)
 10 |   Yu-Hsin Chen, Tushar Krishna, Joel S. Emer, and Vivienne Sze.
 11 |   In J. Solid-State Circuits, January 2017.
 12 | 
 13 | EIE provides the fully-connected layers; Eyeriss provides the convolutional layers. FODLAM only supports these two kinds of layers.
 14 | 
 15 | 
 16 | Running the Model
 17 | -----------------
 18 | 
 19 | FODLAM is a Python 3 program. It has no other dependencies.
 20 | 
 21 | To specify a DNN, create a JSON file containing two keys:
 22 | 
 23 | * Choose one of these two options to select a network to draw layers from:
 24 |     * `net`: A built-in network name, either `"VGG16"` or `"AlexNet"`. FODLAM will use precise published numbers.
 25 |     * `netfile`: The name of a JSON file in the `nets/` directory that describes any CNN. FODLAM will approximate layer costs using scaling.
 26 | * `layers`: A list of layer names to enable.
 27 | 
 28 | You can see examples in `config/`.
 29 | 
 30 | Run FODLAM by piping in a configuration file, like this:
 31 | 
 32 |     $ python3 fodlam.py < config/vgg16.json
 33 |     {
 34 |       "conv": {
 35 |         "energy": 1.0162585,
 36 |         "latency": 4.3094
 37 |       },
 38 |       "fc": {
 39 |         "energy": 9.157180384087789e-05,
 40 |         "latency": 7.438888888888888e-05
 41 |       },
 42 |       "total": {
 43 |         "energy": 1.016350071803841,
 44 |         "latency": 4.309474388888889
 45 |       }
 46 |     }
 47 | 
 48 | The results are printed as JSON to stdout. The output consists of the total energy in joules and total latency in seconds. The output includes the total for the entire network, just the convolutional layers, and just the fully-connected layers.
 49 | 
 50 | ### Providing a Network
 51 | 
 52 | FODLAM ships with statistics for a few popular neural networks as JSON files under the `nets/` directory. This JSON file describes the total computational cost of each layer in the network.
 53 | 
 54 | To provide a new network specification, you need to produce a similar JSON file. FODLAM has a tool that can extract these statistics from Caffe models, but unlike FODLAM itself, this tool requires a working Caffe installation. (You can even use a funky hacked-up alternative versions of Caffe, such as [the one for Fast and Faster R-CNN][caffe-fast-rcnn].) See the Makefile in that directory for tips on how to extract a JSON statistics file from your network specification.
 55 | 
 56 | [caffe-fast-rcnn]: https://github.com/rbgirshick/caffe-fast-rcnn
 57 | 
 58 | 
 59 | How it Works
 60 | ------------
 61 | 
 62 | The model just totals up the latency and energy for each layer in a given configuration. Because both of the source papers measure AlexNet and VGG-16, layers from those networks are supported directly. For other layers, FODLAM can scale the data from those networks.
 63 | 
 64 | ### Process Normalization
 65 | 
 66 | Because Eyeriss and EIE were evaluated on different process technologies, we have to scale one of them to model a single ASIC. Specifically, Eyeriss is on TSMC 65nm and EIE is on TSMC 45nm; we normalize to 65nm. This works by multiplying EIE time by the scaling factor and multiplying the power by the square of the scaling factor---i.e., Dennard scaling, which is admittedly retro.
 67 | 
 68 | ### Power
 69 | 
 70 | While the Eyeriss paper reports per-layer power, the EIE paper does not. Instead, this is how energy is computed (quoting from the paper):
 71 | 
 72 | > Energy is obtained by multiplying computation time and total measured power...
 73 | 
 74 | So the authors assume that power is constant across layers. FODLAM applies the same assumption to compute EIE layer energy.
 75 | 
 76 | ### New Layers
 77 | 
 78 | To estimate the costs for new layer configurations not found in AlexNet or VGG-16, FODLAM can scale the numbers from those networks. Scaling works by getting the number of multiply--accumulate (MAC) operations required to compute each layer. We compute the average cost per MAC among layers of the same type and use that to estimate the cost of a new layer.
 79 | 
 80 | The assumption underlying this scaling technique is that the cost per MAC is close to constant across layers of varying shape. To validate this hypothesis, run FODLAM in diagnosis mode:
 81 | 
 82 |     $ python3 fodlam.py --diagnose
 83 | 
 84 | FODLAM will print out the energy and latency per MAC for each layer. Notice that the cost per MAC is different for convolutional and fully-connected layers, but it varies by less than an order of magnitude within each layer type.
 85 | 
 86 | 
 87 | Data Extraction
 88 | ---------------
 89 | 
 90 | To make FODLAM, I extracted raw data from tables in the papers. The raw text files from this extraction are in `raw/`.
 91 | 
 92 | * For EIE, I first used [Tabula][] to extract unstructured CSV data. I extracted tables II, IV, and V. (Table III was not referenced in the text; it just seems to characterize the benchmarks.)
 93 | * In the Eyeriss journal paper, the PDF does not have text embedded for the tables. I extracted images of tables III through VI and OCR'd them with [Tesseract][]. There were a lot of errors.
 94 | 
 95 | I then cleaned up the relevant data by hand. The cleaned-up CSVs that FODLAM uses are in `data/`.
 96 | 
 97 | [tabula]: http://tabula.technology
 98 | [tesseract]: https://github.com/tesseract-ocr/tesseract
 99 | 
100 | 
101 | Credits
102 | -------
103 | 
104 | This is a research artifact from [Capra][] at Cornell. The license is [MIT][]. If you use FODLAM in a research paper, please cite it:
105 | 
106 |     @misc{fodlam,
107 |       title={{FODLAM}, a first-order deep learning accelerator model},
108 |       author={Adrian Sampson and Mark Buckler},
109 |       note={\url{https://github.com/cucapra/fodlam}. Commit XXX.},
110 |     }
111 | 
112 | You can replace that XXX with the Git commit hash for the version of FODLAM you used to help others reproduce your work.
113 | 
114 | [capra]: https://capra.cs.cornell.edu
115 | [mit]: https://opensource.org/licenses/MIT
116 | 


--------------------------------------------------------------------------------
/fodlam.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | from __future__ import division, print_function
  3 | 
  4 | import os
  5 | import csv
  6 | import json
  7 | import sys
  8 | from collections import namedtuple
  9 | 
 10 | # The root directory where our data files are.
 11 | BASE_DIR = os.path.dirname(__file__)
 12 | 
 13 | # The networks that our accelerators have measurements for.
 14 | NETWORKS = ('VGG16', 'AlexNet')
 15 | LAYER_KINDS = ('conv', 'fc')
 16 | 
 17 | # Accelerator data files.
 18 | DATA_DIR = os.path.join(BASE_DIR, 'data')
 19 | EIE_FILE = 'eie-layers.csv'
 20 | EYERISS_FILES = {
 21 |     'VGG16': 'eyeriss-vgg16.csv',
 22 |     'AlexNet': 'eyeriss-alexnet.csv',
 23 | }
 24 | 
 25 | # EIE reports latencies in microseconds; Eyeriss in milliseconds. Eyeriss
 26 | # reports per-layer power in milliwatts.
 27 | EIE_TIME_UNIT = 10 ** (-6)
 28 | EYERISS_TIME_UNIT = 10 ** (-3)
 29 | EYERISS_POWER_UNIT = 10 ** (-3)
 30 | 
 31 | # Process nodes for published implementations. Both use TSMC processes.
 32 | EIE_PROCESS_NM = 45
 33 | EYERISS_PROCESS_NM = 65
 34 | 
 35 | # EIE reports only a total design power (in watts).
 36 | EIE_POWER = 0.59
 37 | 
 38 | # Data files with neural network statistics.
 39 | NETS_DIR = os.path.join(BASE_DIR, 'nets')
 40 | NET_FILES = {
 41 |     'VGG16': 'VGG_ILSVRC_16_layers_deploy.json',
 42 |     'AlexNet': 'alexnet_deploy.json',
 43 | }
 44 | 
 45 | # Caffe's names for layer kinds.
 46 | CAFFE_KINDS = {
 47 |     "InnerProduct": "fc",
 48 |     "Convolution": "conv",
 49 |     "Deconvolution": "conv",
 50 | }
 51 | 
 52 | # Two kinds of layer specs. Lookup layers are precise; we just need to
 53 | # look up their costs from the base data. Scale layers are
 54 | # approximations; we need to use the average cost per MAC.
 55 | LookupLayer = namedtuple('LookupLayer', ['net', 'layer'])
 56 | ScaleLayer = namedtuple('ScaleLayer', ['kind', 'macs'])
 57 | 
 58 | 
 59 | def load_hw_data():
 60 |     """Load the published numbers from our data files. Return a dict
 61 |     with base values reflecting EIE and Eyeriss layer costs.
 62 |     """
 63 |     # Load EIE data (latency only).
 64 |     eie_latencies = {}
 65 |     with open(os.path.join(DATA_DIR, EIE_FILE)) as f:
 66 |         reader = csv.DictReader(f)
 67 |         for row in reader:
 68 |             if row['Layer'] == 'Actual Time':
 69 |                 for k, v in row.items():
 70 |                     # The table has the network and the layer name
 71 |                     # together in one cell.
 72 |                     if ' ' in k:
 73 |                         network, layer = k.split()
 74 |                         if network in NETWORKS:
 75 |                             eie_latencies[network, layer] = \
 76 |                                 float(v) * EIE_TIME_UNIT
 77 | 
 78 |     # Load Eyeriss data (latency and energy).
 79 |     eyeriss = {
 80 |         'latency_total': {},
 81 |         'latency_proc': {},
 82 |         'power': {},
 83 |     }
 84 |     for network in NETWORKS:
 85 |         with open(os.path.join(DATA_DIR, EYERISS_FILES[network])) as f:
 86 |             reader = csv.DictReader(f)
 87 |             for row in reader:
 88 |                 layer = row['Layer']
 89 |                 if layer == 'Total':
 90 |                     continue
 91 |                 eyeriss['latency_total'][network, layer] = \
 92 |                     float(row['Total Latency (ms)']) * EYERISS_TIME_UNIT
 93 |                 eyeriss['latency_proc'][network, layer] = \
 94 |                     float(row['Processing Latency (ms)']) * EYERISS_TIME_UNIT
 95 |                 eyeriss['power'][network, layer] = \
 96 |                     float(row['Power (mW)']) * EYERISS_POWER_UNIT
 97 | 
 98 |     return { 'eie': eie_latencies, 'eyeriss': eyeriss }
 99 | 
100 | 
101 | def layer_costs(published):
102 |     """Get the latencies (in seconds) and power (in watts) for *all*
103 |     layers in VGG-16 by combining EIE and Eyeriss data.
104 |     """
105 |     eie_lat = published['eie']
106 |     eyeriss_lat = published['eyeriss']['latency_total']
107 |     eyeriss_pow = published['eyeriss']['power']
108 | 
109 |     # Process scaling factor between Eyeriss and EIE. We scale the EIE
110 |     # numbers because the magnitudes for Eyeriss are more significant
111 |     # and the paper has a more complete evaluation.
112 |     proc_scale = EYERISS_PROCESS_NM / EIE_PROCESS_NM
113 |     eie_lat_scaled = { k: v * proc_scale for k, v in eie_lat.items() }
114 |     eie_power_scaled = EIE_POWER * (proc_scale ** 2)
115 | 
116 |     # Combine the latencies for all the layers.
117 |     latency = dict(eie_lat_scaled)
118 |     latency.update(eyeriss_lat)
119 | 
120 |     # For Eyeriss, we have per-layer power numbers. For EIE, from the paper:
121 |     # "Energy is obtained by multiplying computation time and total measured
122 |     # power". So we follow their lead and assume constant power.
123 |     power = { k: eie_power_scaled for k in eie_lat }
124 |     power.update(eyeriss_pow)
125 | 
126 |     return latency, power
127 | 
128 | 
129 | def norm_layer_name(name):
130 |     """Some heuristics to normalize a layer name from multiple sources.
131 | 
132 |     For example, some depictions of VGG-16 use use upper case; others
133 |     use lower case. Some use hyphens; others use underscores. These
134 |     heuristics are by no means complete, but they increase the
135 |     likelihood that layer names from multiple sources will align.
136 |     """
137 |     return name.upper().replace('_', '-')
138 | 
139 | 
140 | def load_net(filename):
141 |     """Load layer statistics for a single network from a JSON file.
142 |     Return a mapping from layer names to ScaleLayer tuples.
143 |     """
144 |     with open(os.path.join(NETS_DIR, filename)) as f:
145 |         layers = json.load(f)
146 | 
147 |     # Flatten the list of layer statistics dictionaries into a
148 |     # name-to-number mapping.
149 |     out = {}
150 |     for layer in layers:
151 |         if 'macs' in layer:
152 |             name = norm_layer_name(layer['name'])
153 |             kind = CAFFE_KINDS[layer['type']]
154 |             out[name] = ScaleLayer(kind, layer['macs'])
155 |     return out
156 | 
157 | 
158 | def load_net_data():
159 |     """Load statistics about the neural networks from our description
160 |     files. Return mappings from layer names to ScaleLayers.
161 |     """
162 |     return { network: load_net(filename)
163 |              for network, filename in NET_FILES.items() }
164 | 
165 | 
166 | def scaling_ratios(net_data, costs):
167 |     """Get the scaling ratio---the cost per MAC---for convolutional and
168 |     fully-connected layers with the given cost set.
169 |     """
170 |     # Total numerators and denominators.
171 |     totals = {
172 |         'conv': { 'cost': 0, 'macs': 0 },
173 |         'fc': { 'cost': 0, 'macs': 0 },
174 |     }
175 | 
176 |     # Sum up the cost and MAC counts for each layer type.
177 |     for net, layer_stats in net_data.items():
178 |         for layer, stats in layer_stats.items():
179 |             cost = costs[net, layer]
180 |             totals[stats.kind]['macs'] += stats.macs
181 |             totals[stats.kind]['cost'] += cost
182 | 
183 |     # Return ratios.
184 |     return { k: v['cost'] / v['macs'] for k, v in totals.items() }
185 | 
186 | 
187 | def dict_product(a, b):
188 |     """Pointwise-multiply the values in two dicts with identical sets of
189 |     keys.
190 |     """
191 |     assert set(a.keys()) == set(b.keys())
192 |     return { k: v * b[k] for k, v in a.items() }
193 | 
194 | 
195 | def load_config(config_file):
196 |     """Load a neural network configuration from a file-like object.
197 |     Return a set of enabled layers, which are instances of either
198 |     `LookupLayer` or `ScaleLayer`.
199 |     """
200 |     config = json.load(config_file)
201 |     if "net" in config:
202 |         # A "built-in" (precise) network.
203 |         return [LookupLayer(config["net"], norm_layer_name(l))
204 |                 for l in config['layers']]
205 |         return layers
206 | 
207 |     elif "netfile" in config:
208 |         # A "new" (scaled) network. Load the statistics for this network
209 |         # from its file.
210 |         net_stats = load_net(config["netfile"])
211 |         return [net_stats[norm_layer_name(l)] for l in config['layers']]
212 | 
213 |     else:
214 |         assert False
215 | 
216 | 
217 | def load_params():
218 |     """Load and set up all the parameters for the model.
219 | 
220 |     Return the latency and energy cost mappings and the network shape
221 |     statistics.
222 |     """
223 |     # Load the hardware cost data.
224 |     published_data = load_hw_data()
225 |     latency, power = layer_costs(published_data)
226 |     energy = dict_product(latency, power)
227 | 
228 |     # Load the network information.
229 |     net_data = load_net_data()
230 | 
231 |     return latency, energy, net_data
232 | 
233 | 
234 | def layer_kind(name):
235 |     """Return a short string indicating the kind of the named layer.
236 |     """
237 |     if name.startswith('CONV'):
238 |         return 'conv'
239 |     elif name.startswith('FC'):
240 |         return 'fc'
241 |     assert False
242 | 
243 | 
244 | def model(config_file):
245 |     """Run the model for a configuration given in the specified file.
246 |     """
247 |     latency, energy, net_data = load_params()
248 |     latency_ratios = scaling_ratios(net_data, latency)
249 |     energy_ratios = scaling_ratios(net_data, energy)
250 | 
251 |     # Load the configuration we're modeling.
252 |     layers = load_config(config_file)
253 | 
254 |     # Initialize accumulators.
255 |     totals = {}
256 |     for kind in LAYER_KINDS:
257 |         totals[kind] = { 'energy': 0.0, 'latency': 0.0 }
258 | 
259 |     # Add the cost for each layer.
260 |     for layer in layers:
261 |         if isinstance(layer, LookupLayer):
262 |             # Use the built-in numbers for this layer.
263 |             kind = layer_kind(layer.layer)
264 |             totals[kind]['energy'] += energy[layer]
265 |             totals[kind]['latency'] += latency[layer]
266 | 
267 |         elif isinstance(layer, ScaleLayer):
268 |             # Scale the average costs.
269 |             energy = energy_ratios[layer.kind] * layer.macs
270 |             latency = latency_ratios[layer.kind] * layer.macs
271 |             totals[layer.kind]['energy'] += energy
272 |             totals[layer.kind]['latency'] += latency
273 | 
274 |     # Grand totals.
275 |     totals['total'] = {
276 |         'energy': sum(totals[k]['energy'] for k in LAYER_KINDS),
277 |         'latency': sum(totals[k]['latency'] for k in LAYER_KINDS),
278 |     }
279 | 
280 |     return totals
281 | 
282 | 
283 | def diagnose_scaled_cost(net_data, costs):
284 |     """Get information for diagnosing FODLAM's scaling logic for a
285 |     particular cost dimension.
286 | 
287 |     For the given cost mapping, return the cost per MAC of each layer
288 |     for each model.
289 |     """
290 |     out = {}
291 |     for net, layer_macs in net_data.items():
292 |         net_costs = {}
293 |         for layer, macs in layer_macs.items():
294 |             cost = costs[net, layer]
295 |             cost_per_mac = cost / macs
296 |             net_costs[layer] = cost_per_mac
297 |         out[net] = net_costs
298 |     return out
299 | 
300 | 
301 | def diagnose_scaling():
302 |     """Get per-MAC costs for the latency and energy of each layer and
303 |     overall averages.
304 |     """
305 |     latency, energy, net_data = load_params()
306 |     return {
307 |         'per_layer': {
308 |             'latency': diagnose_scaled_cost(net_data, latency),
309 |             'energy': diagnose_scaled_cost(net_data, energy),
310 |         },
311 |         'average': {
312 |             'latency': scaling_ratios(net_data, latency),
313 |             'energy': scaling_ratios(net_data, energy),
314 |         },
315 |     }
316 | 
317 | 
318 | if __name__ == '__main__':
319 |     if sys.argv[1:] and sys.argv[1] == '--diagnose':
320 |         out = diagnose_scaling()
321 |     else:
322 |         out = model(sys.stdin)
323 |     print(json.dumps(out, sort_keys=True, indent=2))
324 | 


--------------------------------------------------------------------------------