├── README.md
└── layers.ipynb
/README.md:
--------------------------------------------------------------------------------
1 | # layers
2 | layers
3 |
Ahoy 👋, I'm Ehsan Paydar
4 | A passionate engineering master's student artificial intelligence originally from Iran
5 |
6 | - 🌱 I’m currently learning about other kind model AI
7 |
8 | - 👨💻 All of my projects are available at [info@epsoft.ir](info@epsoft.ir)
9 |
10 | - 📝 I regularly write articles on [epsoft98@gmail.com](epsoft98@gmail.com)
11 |
12 | - 📫 How to reach me **epsoft98@gmail.com**
13 |
14 | Connect with me:
15 |
16 |
17 |
18 |
19 | Languages and Tools:
20 |
21 |
--------------------------------------------------------------------------------
/layers.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 102,
6 | "metadata": {},
7 | "outputs": [],
8 | "source": [
9 | "import tensorflow as tf\n",
10 | "from tensorflow.keras.layers import MaxPooling2D,Dense,Flatten,Conv2D,Dense,GlobalMaxPooling1D \n",
11 | "from tensorflow.keras.models import Sequential, Model\n"
12 | ]
13 | },
14 | {
15 | "cell_type": "markdown",
16 | "metadata": {},
17 | "source": [
18 | "## نگاهی عمیق تر به لایه ها\n"
19 | ]
20 | },
21 | {
22 | "cell_type": "markdown",
23 | "metadata": {},
24 | "source": [
25 | "#### دسترسی به وزن های لایه ها\n"
26 | ]
27 | },
28 | {
29 | "cell_type": "code",
30 | "execution_count": 57,
31 | "metadata": {},
32 | "outputs": [
33 | {
34 | "name": "stdout",
35 | "output_type": "stream",
36 | "text": [
37 | "Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/mobilenet/mobilenet_1_0_224_tf.h5\n",
38 | "17227776/17225924 [==============================] - 3s 0us/step\n"
39 | ]
40 | }
41 | ],
42 | "source": [
43 | "model = tf.keras.applications.MobileNet(weights= 'imagenet',\n",
44 | " include_top = True)"
45 | ]
46 | },
47 | {
48 | "cell_type": "code",
49 | "execution_count": 58,
50 | "metadata": {},
51 | "outputs": [
52 | {
53 | "name": "stdout",
54 | "output_type": "stream",
55 | "text": [
56 | "Model: \"mobilenet_1.00_224\"\n",
57 | "_________________________________________________________________\n",
58 | "Layer (type) Output Shape Param # \n",
59 | "=================================================================\n",
60 | "input_8 (InputLayer) [(None, 224, 224, 3)] 0 \n",
61 | "_________________________________________________________________\n",
62 | "conv1_pad (ZeroPadding2D) (None, 225, 225, 3) 0 \n",
63 | "_________________________________________________________________\n",
64 | "conv1 (Conv2D) (None, 112, 112, 32) 864 \n",
65 | "_________________________________________________________________\n",
66 | "conv1_bn (BatchNormalization (None, 112, 112, 32) 128 \n",
67 | "_________________________________________________________________\n",
68 | "conv1_relu (ReLU) (None, 112, 112, 32) 0 \n",
69 | "_________________________________________________________________\n",
70 | "conv_dw_1 (DepthwiseConv2D) (None, 112, 112, 32) 288 \n",
71 | "_________________________________________________________________\n",
72 | "conv_dw_1_bn (BatchNormaliza (None, 112, 112, 32) 128 \n",
73 | "_________________________________________________________________\n",
74 | "conv_dw_1_relu (ReLU) (None, 112, 112, 32) 0 \n",
75 | "_________________________________________________________________\n",
76 | "conv_pw_1 (Conv2D) (None, 112, 112, 64) 2048 \n",
77 | "_________________________________________________________________\n",
78 | "conv_pw_1_bn (BatchNormaliza (None, 112, 112, 64) 256 \n",
79 | "_________________________________________________________________\n",
80 | "conv_pw_1_relu (ReLU) (None, 112, 112, 64) 0 \n",
81 | "_________________________________________________________________\n",
82 | "conv_pad_2 (ZeroPadding2D) (None, 113, 113, 64) 0 \n",
83 | "_________________________________________________________________\n",
84 | "conv_dw_2 (DepthwiseConv2D) (None, 56, 56, 64) 576 \n",
85 | "_________________________________________________________________\n",
86 | "conv_dw_2_bn (BatchNormaliza (None, 56, 56, 64) 256 \n",
87 | "_________________________________________________________________\n",
88 | "conv_dw_2_relu (ReLU) (None, 56, 56, 64) 0 \n",
89 | "_________________________________________________________________\n",
90 | "conv_pw_2 (Conv2D) (None, 56, 56, 128) 8192 \n",
91 | "_________________________________________________________________\n",
92 | "conv_pw_2_bn (BatchNormaliza (None, 56, 56, 128) 512 \n",
93 | "_________________________________________________________________\n",
94 | "conv_pw_2_relu (ReLU) (None, 56, 56, 128) 0 \n",
95 | "_________________________________________________________________\n",
96 | "conv_dw_3 (DepthwiseConv2D) (None, 56, 56, 128) 1152 \n",
97 | "_________________________________________________________________\n",
98 | "conv_dw_3_bn (BatchNormaliza (None, 56, 56, 128) 512 \n",
99 | "_________________________________________________________________\n",
100 | "conv_dw_3_relu (ReLU) (None, 56, 56, 128) 0 \n",
101 | "_________________________________________________________________\n",
102 | "conv_pw_3 (Conv2D) (None, 56, 56, 128) 16384 \n",
103 | "_________________________________________________________________\n",
104 | "conv_pw_3_bn (BatchNormaliza (None, 56, 56, 128) 512 \n",
105 | "_________________________________________________________________\n",
106 | "conv_pw_3_relu (ReLU) (None, 56, 56, 128) 0 \n",
107 | "_________________________________________________________________\n",
108 | "conv_pad_4 (ZeroPadding2D) (None, 57, 57, 128) 0 \n",
109 | "_________________________________________________________________\n",
110 | "conv_dw_4 (DepthwiseConv2D) (None, 28, 28, 128) 1152 \n",
111 | "_________________________________________________________________\n",
112 | "conv_dw_4_bn (BatchNormaliza (None, 28, 28, 128) 512 \n",
113 | "_________________________________________________________________\n",
114 | "conv_dw_4_relu (ReLU) (None, 28, 28, 128) 0 \n",
115 | "_________________________________________________________________\n",
116 | "conv_pw_4 (Conv2D) (None, 28, 28, 256) 32768 \n",
117 | "_________________________________________________________________\n",
118 | "conv_pw_4_bn (BatchNormaliza (None, 28, 28, 256) 1024 \n",
119 | "_________________________________________________________________\n",
120 | "conv_pw_4_relu (ReLU) (None, 28, 28, 256) 0 \n",
121 | "_________________________________________________________________\n",
122 | "conv_dw_5 (DepthwiseConv2D) (None, 28, 28, 256) 2304 \n",
123 | "_________________________________________________________________\n",
124 | "conv_dw_5_bn (BatchNormaliza (None, 28, 28, 256) 1024 \n",
125 | "_________________________________________________________________\n",
126 | "conv_dw_5_relu (ReLU) (None, 28, 28, 256) 0 \n",
127 | "_________________________________________________________________\n",
128 | "conv_pw_5 (Conv2D) (None, 28, 28, 256) 65536 \n",
129 | "_________________________________________________________________\n",
130 | "conv_pw_5_bn (BatchNormaliza (None, 28, 28, 256) 1024 \n",
131 | "_________________________________________________________________\n",
132 | "conv_pw_5_relu (ReLU) (None, 28, 28, 256) 0 \n",
133 | "_________________________________________________________________\n",
134 | "conv_pad_6 (ZeroPadding2D) (None, 29, 29, 256) 0 \n",
135 | "_________________________________________________________________\n",
136 | "conv_dw_6 (DepthwiseConv2D) (None, 14, 14, 256) 2304 \n",
137 | "_________________________________________________________________\n",
138 | "conv_dw_6_bn (BatchNormaliza (None, 14, 14, 256) 1024 \n",
139 | "_________________________________________________________________\n",
140 | "conv_dw_6_relu (ReLU) (None, 14, 14, 256) 0 \n",
141 | "_________________________________________________________________\n",
142 | "conv_pw_6 (Conv2D) (None, 14, 14, 512) 131072 \n",
143 | "_________________________________________________________________\n",
144 | "conv_pw_6_bn (BatchNormaliza (None, 14, 14, 512) 2048 \n",
145 | "_________________________________________________________________\n",
146 | "conv_pw_6_relu (ReLU) (None, 14, 14, 512) 0 \n",
147 | "_________________________________________________________________\n",
148 | "conv_dw_7 (DepthwiseConv2D) (None, 14, 14, 512) 4608 \n",
149 | "_________________________________________________________________\n",
150 | "conv_dw_7_bn (BatchNormaliza (None, 14, 14, 512) 2048 \n",
151 | "_________________________________________________________________\n",
152 | "conv_dw_7_relu (ReLU) (None, 14, 14, 512) 0 \n",
153 | "_________________________________________________________________\n",
154 | "conv_pw_7 (Conv2D) (None, 14, 14, 512) 262144 \n",
155 | "_________________________________________________________________\n",
156 | "conv_pw_7_bn (BatchNormaliza (None, 14, 14, 512) 2048 \n",
157 | "_________________________________________________________________\n",
158 | "conv_pw_7_relu (ReLU) (None, 14, 14, 512) 0 \n",
159 | "_________________________________________________________________\n",
160 | "conv_dw_8 (DepthwiseConv2D) (None, 14, 14, 512) 4608 \n",
161 | "_________________________________________________________________\n",
162 | "conv_dw_8_bn (BatchNormaliza (None, 14, 14, 512) 2048 \n",
163 | "_________________________________________________________________\n",
164 | "conv_dw_8_relu (ReLU) (None, 14, 14, 512) 0 \n",
165 | "_________________________________________________________________\n",
166 | "conv_pw_8 (Conv2D) (None, 14, 14, 512) 262144 \n",
167 | "_________________________________________________________________\n",
168 | "conv_pw_8_bn (BatchNormaliza (None, 14, 14, 512) 2048 \n",
169 | "_________________________________________________________________\n",
170 | "conv_pw_8_relu (ReLU) (None, 14, 14, 512) 0 \n",
171 | "_________________________________________________________________\n",
172 | "conv_dw_9 (DepthwiseConv2D) (None, 14, 14, 512) 4608 \n",
173 | "_________________________________________________________________\n",
174 | "conv_dw_9_bn (BatchNormaliza (None, 14, 14, 512) 2048 \n",
175 | "_________________________________________________________________\n",
176 | "conv_dw_9_relu (ReLU) (None, 14, 14, 512) 0 \n",
177 | "_________________________________________________________________\n",
178 | "conv_pw_9 (Conv2D) (None, 14, 14, 512) 262144 \n",
179 | "_________________________________________________________________\n",
180 | "conv_pw_9_bn (BatchNormaliza (None, 14, 14, 512) 2048 \n",
181 | "_________________________________________________________________\n",
182 | "conv_pw_9_relu (ReLU) (None, 14, 14, 512) 0 \n",
183 | "_________________________________________________________________\n",
184 | "conv_dw_10 (DepthwiseConv2D) (None, 14, 14, 512) 4608 \n",
185 | "_________________________________________________________________\n",
186 | "conv_dw_10_bn (BatchNormaliz (None, 14, 14, 512) 2048 \n",
187 | "_________________________________________________________________\n",
188 | "conv_dw_10_relu (ReLU) (None, 14, 14, 512) 0 \n",
189 | "_________________________________________________________________\n",
190 | "conv_pw_10 (Conv2D) (None, 14, 14, 512) 262144 \n",
191 | "_________________________________________________________________\n",
192 | "conv_pw_10_bn (BatchNormaliz (None, 14, 14, 512) 2048 \n",
193 | "_________________________________________________________________\n",
194 | "conv_pw_10_relu (ReLU) (None, 14, 14, 512) 0 \n",
195 | "_________________________________________________________________\n",
196 | "conv_dw_11 (DepthwiseConv2D) (None, 14, 14, 512) 4608 \n",
197 | "_________________________________________________________________\n",
198 | "conv_dw_11_bn (BatchNormaliz (None, 14, 14, 512) 2048 \n",
199 | "_________________________________________________________________\n",
200 | "conv_dw_11_relu (ReLU) (None, 14, 14, 512) 0 \n",
201 | "_________________________________________________________________\n",
202 | "conv_pw_11 (Conv2D) (None, 14, 14, 512) 262144 \n",
203 | "_________________________________________________________________\n",
204 | "conv_pw_11_bn (BatchNormaliz (None, 14, 14, 512) 2048 \n",
205 | "_________________________________________________________________\n",
206 | "conv_pw_11_relu (ReLU) (None, 14, 14, 512) 0 \n",
207 | "_________________________________________________________________\n",
208 | "conv_pad_12 (ZeroPadding2D) (None, 15, 15, 512) 0 \n",
209 | "_________________________________________________________________\n",
210 | "conv_dw_12 (DepthwiseConv2D) (None, 7, 7, 512) 4608 \n",
211 | "_________________________________________________________________\n",
212 | "conv_dw_12_bn (BatchNormaliz (None, 7, 7, 512) 2048 \n",
213 | "_________________________________________________________________\n",
214 | "conv_dw_12_relu (ReLU) (None, 7, 7, 512) 0 \n",
215 | "_________________________________________________________________\n",
216 | "conv_pw_12 (Conv2D) (None, 7, 7, 1024) 524288 \n",
217 | "_________________________________________________________________\n",
218 | "conv_pw_12_bn (BatchNormaliz (None, 7, 7, 1024) 4096 \n",
219 | "_________________________________________________________________\n",
220 | "conv_pw_12_relu (ReLU) (None, 7, 7, 1024) 0 \n",
221 | "_________________________________________________________________\n",
222 | "conv_dw_13 (DepthwiseConv2D) (None, 7, 7, 1024) 9216 \n",
223 | "_________________________________________________________________\n",
224 | "conv_dw_13_bn (BatchNormaliz (None, 7, 7, 1024) 4096 \n",
225 | "_________________________________________________________________\n",
226 | "conv_dw_13_relu (ReLU) (None, 7, 7, 1024) 0 \n",
227 | "_________________________________________________________________\n",
228 | "conv_pw_13 (Conv2D) (None, 7, 7, 1024) 1048576 \n",
229 | "_________________________________________________________________\n",
230 | "conv_pw_13_bn (BatchNormaliz (None, 7, 7, 1024) 4096 \n",
231 | "_________________________________________________________________\n",
232 | "conv_pw_13_relu (ReLU) (None, 7, 7, 1024) 0 \n",
233 | "_________________________________________________________________\n",
234 | "global_average_pooling2d (Gl (None, 1024) 0 \n",
235 | "_________________________________________________________________\n",
236 | "reshape_1 (Reshape) (None, 1, 1, 1024) 0 \n",
237 | "_________________________________________________________________\n",
238 | "dropout (Dropout) (None, 1, 1, 1024) 0 \n",
239 | "_________________________________________________________________\n",
240 | "conv_preds (Conv2D) (None, 1, 1, 1000) 1025000 \n",
241 | "_________________________________________________________________\n",
242 | "reshape_2 (Reshape) (None, 1000) 0 \n",
243 | "_________________________________________________________________\n",
244 | "predictions (Activation) (None, 1000) 0 \n",
245 | "=================================================================\n",
246 | "Total params: 4,253,864\n",
247 | "Trainable params: 4,231,976\n",
248 | "Non-trainable params: 21,888\n",
249 | "_________________________________________________________________\n"
250 | ]
251 | }
252 | ],
253 | "source": [
254 | "model.summary()"
255 | ]
256 | },
257 | {
258 | "cell_type": "markdown",
259 | "metadata": {},
260 | "source": [
261 | "اطلاعات مربوط به لایه های یک شبکه در مشخصه `layers` آن ذخیره شده است."
262 | ]
263 | },
264 | {
265 | "cell_type": "code",
266 | "execution_count": 59,
267 | "metadata": {},
268 | "outputs": [
269 | {
270 | "data": {
271 | "text/plain": [
272 | "93"
273 | ]
274 | },
275 | "execution_count": 59,
276 | "metadata": {},
277 | "output_type": "execute_result"
278 | }
279 | ],
280 | "source": [
281 | "len(model.layers)"
282 | ]
283 | },
284 | {
285 | "cell_type": "code",
286 | "execution_count": 60,
287 | "metadata": {},
288 | "outputs": [
289 | {
290 | "data": {
291 | "text/plain": [
292 | ""
293 | ]
294 | },
295 | "execution_count": 60,
296 | "metadata": {},
297 | "output_type": "execute_result"
298 | }
299 | ],
300 | "source": [
301 | "model.layers[2]"
302 | ]
303 | },
304 | {
305 | "cell_type": "markdown",
306 | "metadata": {},
307 | "source": [
308 | " برای این که به وزن های یک لایه دسترسی پیدا کنیم (اگر آن لایه وزنی داشته باشد) با مشخصه `weights` قابل دسترسی هستند."
309 | ]
310 | },
311 | {
312 | "cell_type": "code",
313 | "execution_count": 65,
314 | "metadata": {},
315 | "outputs": [
316 | {
317 | "data": {
318 | "text/plain": [
319 | ""
327 | ]
328 | },
329 | "execution_count": 65,
330 | "metadata": {},
331 | "output_type": "execute_result"
332 | }
333 | ],
334 | "source": [
335 | "model.layers[3].weights[0]"
336 | ]
337 | },
338 | {
339 | "cell_type": "code",
340 | "execution_count": 66,
341 | "metadata": {},
342 | "outputs": [
343 | {
344 | "data": {
345 | "text/plain": [
346 | "array([ 0.97061753, 0.6502549 , 0.2254817 , 0.52415645, 0.98184997,\n",
347 | " 0.6788391 , 1.1129196 , 0.34201714, 0.6771581 , 3.3115928 ,\n",
348 | " 0.83776003, 1.0192714 , 0.30443215, 0.4222344 , -0.2554651 ,\n",
349 | " 0.96727175, 0.90101403, 0.86320865, 0.86195886, 0.7156465 ,\n",
350 | " 1.0060233 , 0.53907305, 1.3887119 , 0.24158667, 0.7830971 ,\n",
351 | " 0.672174 , 3.0135353 , 1.195009 , 1.8365439 , 0.47269213,\n",
352 | " 0.45962656, 1.0022334 ], dtype=float32)"
353 | ]
354 | },
355 | "execution_count": 66,
356 | "metadata": {},
357 | "output_type": "execute_result"
358 | }
359 | ],
360 | "source": [
361 | "base_model.layers[3].get_weights()[0]"
362 | ]
363 | },
364 | {
365 | "cell_type": "markdown",
366 | "metadata": {},
367 | "source": [
368 | "bias های یک لایه هم با مشخصه `bias` قابل دستترسی هستند."
369 | ]
370 | },
371 | {
372 | "cell_type": "code",
373 | "execution_count": 80,
374 | "metadata": {},
375 | "outputs": [
376 | {
377 | "data": {
378 | "text/plain": [
379 | ""
383 | ]
384 | },
385 | "execution_count": 80,
386 | "metadata": {},
387 | "output_type": "execute_result"
388 | }
389 | ],
390 | "source": [
391 | "model.layers[90].bias[0:10]"
392 | ]
393 | },
394 | {
395 | "cell_type": "code",
396 | "execution_count": 82,
397 | "metadata": {},
398 | "outputs": [
399 | {
400 | "data": {
401 | "text/plain": [
402 | ""
416 | ]
417 | },
418 | "execution_count": 82,
419 | "metadata": {},
420 | "output_type": "execute_result"
421 | }
422 | ],
423 | "source": [
424 | "model.layers[90].kernel[0]"
425 | ]
426 | },
427 | {
428 | "cell_type": "markdown",
429 | "metadata": {},
430 | "source": [
431 | "برای دسترسی به یک لایه یک راه دیگر استفاده از نام آن لایه به جای ایندکس است که به خصوص وقتی شبکه ما بزرگ است به کار می آید.\n",
432 | " مثلا فرض کنید می خواهیم به آخرین لایه کانولوشنال شبکه دسترسی پیدا کنیم."
433 | ]
434 | },
435 | {
436 | "cell_type": "code",
437 | "execution_count": 47,
438 | "metadata": {},
439 | "outputs": [
440 | {
441 | "data": {
442 | "text/plain": [
443 | ""
444 | ]
445 | },
446 | "execution_count": 47,
447 | "metadata": {},
448 | "output_type": "execute_result"
449 | }
450 | ],
451 | "source": [
452 | "base_model.get_layer('conv5_block3_3_conv')"
453 | ]
454 | },
455 | {
456 | "cell_type": "markdown",
457 | "metadata": {},
458 | "source": [
459 | "وزن ها به صورت مشابه قابل دسترسی هستند.\n"
460 | ]
461 | },
462 | {
463 | "cell_type": "code",
464 | "execution_count": 48,
465 | "metadata": {},
466 | "outputs": [
467 | {
468 | "data": {
469 | "text/plain": [
470 | "[,\n",
484 | " ]"
487 | ]
488 | },
489 | "execution_count": 48,
490 | "metadata": {},
491 | "output_type": "execute_result"
492 | }
493 | ],
494 | "source": [
495 | "base_model.get_layer('conv5_block3_3_conv').weights"
496 | ]
497 | },
498 | {
499 | "cell_type": "markdown",
500 | "metadata": {},
501 | "source": []
502 | },
503 | {
504 | "cell_type": "code",
505 | "execution_count": 35,
506 | "metadata": {},
507 | "outputs": [
508 | {
509 | "data": {
510 | "text/plain": [
511 | "[]"
512 | ]
513 | },
514 | "execution_count": 35,
515 | "metadata": {},
516 | "output_type": "execute_result"
517 | }
518 | ],
519 | "source": [
520 | "base_model.get_layer('conv5_block1_2_pad').weights "
521 | ]
522 | },
523 | {
524 | "cell_type": "code",
525 | "execution_count": 52,
526 | "metadata": {},
527 | "outputs": [],
528 | "source": [
529 | "#func_model = Model( inputs = base_model, \n",
530 | "#outputs = Dense(10,activation = 'softmax'))\n",
531 | "#func_model.summary()"
532 | ]
533 | },
534 | {
535 | "cell_type": "markdown",
536 | "metadata": {},
537 | "source": [
538 | "#### دسترسی پیدا کردن به ورودی و خروجی لایه ها"
539 | ]
540 | },
541 | {
542 | "cell_type": "code",
543 | "execution_count": 39,
544 | "metadata": {},
545 | "outputs": [
546 | {
547 | "data": {
548 | "text/plain": [
549 | ""
550 | ]
551 | },
552 | "execution_count": 39,
553 | "metadata": {},
554 | "output_type": "execute_result"
555 | }
556 | ],
557 | "source": [
558 | "base_model.get_layer('conv5_block1_2_pad').input"
559 | ]
560 | },
561 | {
562 | "cell_type": "code",
563 | "execution_count": 40,
564 | "metadata": {},
565 | "outputs": [
566 | {
567 | "data": {
568 | "text/plain": [
569 | ""
570 | ]
571 | },
572 | "execution_count": 40,
573 | "metadata": {},
574 | "output_type": "execute_result"
575 | }
576 | ],
577 | "source": [
578 | "base_model.get_layer('conv5_block1_2_pad').output"
579 | ]
580 | },
581 | {
582 | "cell_type": "code",
583 | "execution_count": 41,
584 | "metadata": {},
585 | "outputs": [
586 | {
587 | "data": {
588 | "text/plain": [
589 | ""
590 | ]
591 | },
592 | "execution_count": 41,
593 | "metadata": {},
594 | "output_type": "execute_result"
595 | }
596 | ],
597 | "source": [
598 | "base_model.input"
599 | ]
600 | },
601 | {
602 | "cell_type": "code",
603 | "execution_count": 44,
604 | "metadata": {},
605 | "outputs": [
606 | {
607 | "name": "stdout",
608 | "output_type": "stream",
609 | "text": [
610 | "Model: \"sequential\"\n",
611 | "_________________________________________________________________\n",
612 | "Layer (type) Output Shape Param # \n",
613 | "=================================================================\n",
614 | "dense (Dense) (None, 224, 224, 10) 40 \n",
615 | "=================================================================\n",
616 | "Total params: 40\n",
617 | "Trainable params: 40\n",
618 | "Non-trainable params: 0\n",
619 | "_________________________________________________________________\n"
620 | ]
621 | }
622 | ],
623 | "source": [
624 | "seq_model = Sequential([base_model.input, \n",
625 | "Dense(10,activation = 'softmax')])\n",
626 | "seq_model.summary()"
627 | ]
628 | },
629 | {
630 | "cell_type": "markdown",
631 | "metadata": {},
632 | "source": [
633 | "### فریز کردن وزن های لایه ها"
634 | ]
635 | },
636 | {
637 | "cell_type": "markdown",
638 | "metadata": {},
639 | "source": [
640 | "پیش از این در خصوص فریز کردن وزن های مدل برای انتقال یادگیری به صورت مفصل بحث کردیم. در این جا می خواهیم ببینیم که چطور می توانیم وزن های یک لایه خاص از مدل را فریز کنیم."
641 | ]
642 | },
643 | {
644 | "cell_type": "code",
645 | "execution_count": 115,
646 | "metadata": {},
647 | "outputs": [
648 | {
649 | "name": "stdout",
650 | "output_type": "stream",
651 | "text": [
652 | "WARNING:tensorflow:`input_shape` is undefined or non-square, or `rows` is not in [128, 160, 192, 224]. Weights for input shape (224, 224) will be loaded as the default.\n"
653 | ]
654 | },
655 | {
656 | "data": {
657 | "text/plain": [
658 | ""
659 | ]
660 | },
661 | "execution_count": 115,
662 | "metadata": {},
663 | "output_type": "execute_result"
664 | }
665 | ],
666 | "source": [
667 | "model = tf.keras.applications.MobileNet(weights= 'imagenet',\n",
668 | " input_shape=(150,150,3),\n",
669 | " include_top = False)\n",
670 | "#model.summary()\n",
671 | "model.input"
672 | ]
673 | },
674 | {
675 | "cell_type": "markdown",
676 | "metadata": {},
677 | "source": [
678 | "\n",
679 | "وقتی که یک لایه را به مدل اضافه می کنیم می توانیم با قرار دادن `trainable = False` کاری کنیم که وزن های آن لایه فریز شوند و در زمان آموزش به روز رسانی نشوند.\n",
680 | "\n"
681 | ]
682 | },
683 | {
684 | "cell_type": "code",
685 | "execution_count": 111,
686 | "metadata": {},
687 | "outputs": [
688 | {
689 | "data": {
690 | "text/plain": [
691 | ""
692 | ]
693 | },
694 | "execution_count": 111,
695 | "metadata": {},
696 | "output_type": "execute_result"
697 | }
698 | ],
699 | "source": [
700 | "model.input"
701 | ]
702 | },
703 | {
704 | "cell_type": "code",
705 | "execution_count": 119,
706 | "metadata": {},
707 | "outputs": [],
708 | "source": [
709 | "\n",
710 | " "
711 | ]
712 | },
713 | {
714 | "cell_type": "code",
715 | "execution_count": null,
716 | "metadata": {},
717 | "outputs": [],
718 | "source": [
719 | "model_1 = Sequential([\n",
720 | " model,\n",
721 | " Flatten(),\n",
722 | " Dense(100,activation = 'relu', trainable = False),\n",
723 | " Dense(10,activation = 'softmax')\n",
724 | "])"
725 | ]
726 | },
727 | {
728 | "cell_type": "code",
729 | "execution_count": null,
730 | "metadata": {},
731 | "outputs": [],
732 | "source": []
733 | },
734 | {
735 | "cell_type": "markdown",
736 | "metadata": {},
737 | "source": [
738 | "با استفاده از API فاکشنال به صورت زیر خواهد بود"
739 | ]
740 | },
741 | {
742 | "cell_type": "code",
743 | "execution_count": 120,
744 | "metadata": {},
745 | "outputs": [],
746 | "source": [
747 | "\n",
748 | "\n",
749 | "inputs = model.input #keras.layers.Input(shape = (150,150,3))\n",
750 | "x = model(inputs,training = False)\n",
751 | "x = Flatten()(x)\n",
752 | "x = Dense(128,activation='relu',trainable = False)(x)\n",
753 | "outputs = Dense(5,activation='softmax' )(x)\n",
754 | "final_model = Model(inputs,outputs)"
755 | ]
756 | },
757 | {
758 | "cell_type": "code",
759 | "execution_count": 99,
760 | "metadata": {},
761 | "outputs": [
762 | {
763 | "name": "stdout",
764 | "output_type": "stream",
765 | "text": [
766 | "Model: \"sequential_6\"\n",
767 | "_________________________________________________________________\n",
768 | "Layer (type) Output Shape Param # \n",
769 | "=================================================================\n",
770 | "mobilenet_1.00_224 (Model) (None, None, None, 1024) 3228864 \n",
771 | "_________________________________________________________________\n",
772 | "flatten_5 (Flatten) (None, None) 0 \n",
773 | "=================================================================\n",
774 | "Total params: 3,228,864\n",
775 | "Trainable params: 2,158,400\n",
776 | "Non-trainable params: 1,070,464\n",
777 | "_________________________________________________________________\n"
778 | ]
779 | }
780 | ],
781 | "source": [
782 | "model_1.summary()"
783 | ]
784 | },
785 | {
786 | "cell_type": "markdown",
787 | "metadata": {},
788 | "source": [
789 | "یک راه دیگر هم استفاده از متد `get_layer()` استفاده است."
790 | ]
791 | },
792 | {
793 | "cell_type": "code",
794 | "execution_count": 96,
795 | "metadata": {},
796 | "outputs": [],
797 | "source": [
798 | "model.get_layer('conv_pw_13').trainable = False\n",
799 | "model.get_layer('conv_pw_13_relu').trainable = False"
800 | ]
801 | },
802 | {
803 | "cell_type": "code",
804 | "execution_count": null,
805 | "metadata": {},
806 | "outputs": [],
807 | "source": []
808 | },
809 | {
810 | "cell_type": "markdown",
811 | "metadata": {},
812 | "source": [
813 | "باید مدل را کامپایل کنیم تا این تغییرات اعمال شوند."
814 | ]
815 | },
816 | {
817 | "cell_type": "code",
818 | "execution_count": 87,
819 | "metadata": {},
820 | "outputs": [],
821 | "source": [
822 | "model.compile(loss = 'categorical_crossentropy')"
823 | ]
824 | },
825 | {
826 | "cell_type": "markdown",
827 | "metadata": {},
828 | "source": [
829 | "دقت کنید که این که می توانیم لایه های مختلف شبکه را فریز کنیم به این معنا نیست که این کار کار خوبی است و صرفا به عنوان مثال این کار را انجام دهیم."
830 | ]
831 | },
832 | {
833 | "cell_type": "markdown",
834 | "metadata": {},
835 | "source": [
836 | "### لایه های lambda"
837 | ]
838 | },
839 | {
840 | "cell_type": "code",
841 | "execution_count": null,
842 | "metadata": {},
843 | "outputs": [],
844 | "source": []
845 | },
846 | {
847 | "cell_type": "markdown",
848 | "metadata": {},
849 | "source": [
850 | "### ساخت لایه های اختصاصی"
851 | ]
852 | },
853 | {
854 | "cell_type": "markdown",
855 | "metadata": {},
856 | "source": [
857 | "\n",
858 | "برای ساخت یک لایه کراس سه تابع را باید در زمان ایجاد کلاس این لایه پیاده سازی کنیم:\n",
859 | "\n",
860 | "1. یک تابع که وزن های قابل آموزش مدل را تعریف می کند. (تابع build)\n",
861 | "\n",
862 | "2. یک تابع که منطق لایه را مشخص می کند (تابع call)\n",
863 | "\n",
864 | "3. و در نهایت یک تابع که ابعاد خروجی را محاسبه و تعیین می کند."
865 | ]
866 | },
867 | {
868 | "cell_type": "code",
869 | "execution_count": null,
870 | "metadata": {},
871 | "outputs": [],
872 | "source": []
873 | },
874 | {
875 | "cell_type": "markdown",
876 | "metadata": {},
877 | "source": [
878 | "لایه های تعریف شده باید از کلاس tf.keras.layers.Layers به ارث برده شده باشند."
879 | ]
880 | },
881 | {
882 | "cell_type": "markdown",
883 | "metadata": {},
884 | "source": [
885 | "### پیاده سازی لایه Noisy Relu"
886 | ]
887 | },
888 | {
889 | "cell_type": "code",
890 | "execution_count": 12,
891 | "metadata": {},
892 | "outputs": [],
893 | "source": [
894 | "class NoisyRelu(tf.keras.layers.Layer):\n",
895 | " def __init__(self,**kwargs):\n",
896 | " super(NoisyRelu,self).__init__(**kwargs)\n",
897 | "\n",
898 | " "
899 | ]
900 | },
901 | {
902 | "cell_type": "markdown",
903 | "metadata": {},
904 | "source": [
905 | "اگر به تعریف تابع فعال سازی noisy relu دقت کرده باشید می بینید که این لایه هیچ پارامتر آموزش پذیری ندارد. برای همین نیازی به تابع build نداریم.\n",
906 | "\n"
907 | ]
908 | },
909 | {
910 | "cell_type": "code",
911 | "execution_count": 13,
912 | "metadata": {},
913 | "outputs": [],
914 | "source": [
915 | "def call(self,x):\n",
916 | " tf.maximum(0, x + tf.random.normal())"
917 | ]
918 | },
919 | {
920 | "cell_type": "code",
921 | "execution_count": 14,
922 | "metadata": {},
923 | "outputs": [],
924 | "source": [
925 | "def compute_output_shape(self,input_shape):\n",
926 | " return input_shape"
927 | ]
928 | },
929 | {
930 | "cell_type": "code",
931 | "execution_count": 25,
932 | "metadata": {},
933 | "outputs": [
934 | {
935 | "data": {
936 | "text/plain": [
937 | ""
938 | ]
939 | },
940 | "execution_count": 25,
941 | "metadata": {},
942 | "output_type": "execute_result"
943 | }
944 | ],
945 | "source": [
946 | "tf.random.normal(shape = (1,1))"
947 | ]
948 | },
949 | {
950 | "cell_type": "markdown",
951 | "metadata": {},
952 | "source": [
953 | "حالا می توانیم همه این ها را کنار هم قرار دهیم."
954 | ]
955 | },
956 | {
957 | "cell_type": "code",
958 | "execution_count": 30,
959 | "metadata": {},
960 | "outputs": [],
961 | "source": [
962 | "class NoisyRelu(tf.keras.layers.Layer):\n",
963 | " def __init__(self,**kwargs):\n",
964 | " super(NoisyRelu,self).__init__(**kwargs)\n",
965 | " def call(self,x):\n",
966 | " return tf.maximum(0.0, x + tf.random.normal(shape = (1,1)))\n",
967 | " def compute_output_shape(self,input_shape):\n",
968 | " return input_shape"
969 | ]
970 | },
971 | {
972 | "cell_type": "markdown",
973 | "metadata": {},
974 | "source": [
975 | "#### آموزش و امتحان لایه جدید ساخته شده"
976 | ]
977 | },
978 | {
979 | "cell_type": "code",
980 | "execution_count": 34,
981 | "metadata": {},
982 | "outputs": [],
983 | "source": [
984 | "model = Sequential()\n",
985 | "model.add(Conv2D(filters = 15,kernel_size=3, input_shape = (28,28,1)))\n",
986 | "model.add(NoisyRelu())\n",
987 | "model.add(MaxPooling2D(pool_size=(2,2)))\n",
988 | "model.add(Flatten())\n",
989 | "model.add(Dense(100,activation='relu'))\n",
990 | "model.add(Dense(10,activation='softmax'))"
991 | ]
992 | },
993 | {
994 | "cell_type": "code",
995 | "execution_count": 35,
996 | "metadata": {},
997 | "outputs": [
998 | {
999 | "name": "stdout",
1000 | "output_type": "stream",
1001 | "text": [
1002 | "Model: \"sequential_6\"\n",
1003 | "_________________________________________________________________\n",
1004 | "Layer (type) Output Shape Param # \n",
1005 | "=================================================================\n",
1006 | "conv2d_6 (Conv2D) (None, 26, 26, 15) 150 \n",
1007 | "_________________________________________________________________\n",
1008 | "noisy_relu_6 (NoisyRelu) (None, 26, 26, 15) 0 \n",
1009 | "_________________________________________________________________\n",
1010 | "max_pooling2d_1 (MaxPooling2 (None, 13, 13, 15) 0 \n",
1011 | "_________________________________________________________________\n",
1012 | "flatten_1 (Flatten) (None, 2535) 0 \n",
1013 | "_________________________________________________________________\n",
1014 | "dense (Dense) (None, 100) 253600 \n",
1015 | "_________________________________________________________________\n",
1016 | "dense_1 (Dense) (None, 10) 1010 \n",
1017 | "=================================================================\n",
1018 | "Total params: 254,760\n",
1019 | "Trainable params: 254,760\n",
1020 | "Non-trainable params: 0\n",
1021 | "_________________________________________________________________\n"
1022 | ]
1023 | }
1024 | ],
1025 | "source": [
1026 | "model.summary()"
1027 | ]
1028 | },
1029 | {
1030 | "cell_type": "code",
1031 | "execution_count": null,
1032 | "metadata": {},
1033 | "outputs": [],
1034 | "source": []
1035 | },
1036 | {
1037 | "cell_type": "markdown",
1038 | "metadata": {},
1039 | "source": [
1040 | "### پیاده سازی لایه Attention"
1041 | ]
1042 | },
1043 | {
1044 | "cell_type": "markdown",
1045 | "metadata": {},
1046 | "source": [
1047 | "حالا نوبت این است که تابع build را تعریف کنیم.\n",
1048 | "\n"
1049 | ]
1050 | },
1051 | {
1052 | "cell_type": "code",
1053 | "execution_count": null,
1054 | "metadata": {},
1055 | "outputs": [],
1056 | "source": [
1057 | "def build(self,input_shape):\n",
1058 | " self."
1059 | ]
1060 | },
1061 | {
1062 | "cell_type": "code",
1063 | "execution_count": null,
1064 | "metadata": {},
1065 | "outputs": [],
1066 | "source": [
1067 | "class ScaledDotProductAttention(Layer):\n",
1068 | " \"\"\"\n",
1069 | " Implementation according to:\n",
1070 | " \"Attention is all you need\" by A Vaswani, N Shazeer, N Parmar (2017)\n",
1071 | " \"\"\"\n",
1072 | "\n",
1073 | " def __init__(self, return_attention=False, **kwargs): \n",
1074 | " self._return_attention = return_attention\n",
1075 | " self.supports_masking = True\n",
1076 | " super(ScaledDotProductAttention, self).__init__(**kwargs)\n",
1077 | " \n",
1078 | " def compute_output_shape(self, input_shape):\n",
1079 | " self._validate_input_shape(input_shape)\n",
1080 | "\n",
1081 | " if not self._return_attention:\n",
1082 | " return input_shape[-1]\n",
1083 | " else:\n",
1084 | " return [input_shape[-1], [input_shape[0][0], input_shape[0][1], input_shape[1][2]]]\n",
1085 | " \n",
1086 | " def _validate_input_shape(self, input_shape):\n",
1087 | " if len(input_shape) != 3:\n",
1088 | " raise ValueError(\"Layer received an input shape {0} but expected three inputs (Q, V, K).\".format(input_shape))\n",
1089 | " else:\n",
1090 | " if input_shape[0][0] != input_shape[1][0] or input_shape[1][0] != input_shape[2][0]:\n",
1091 | " raise ValueError(\"All three inputs (Q, V, K) have to have the same batch size; received batch sizes: {0}, {1}, {2}\".format(input_shape[0][0], input_shape[1][0], input_shape[2][0]))\n",
1092 | " if input_shape[0][1] != input_shape[1][1] or input_shape[1][1] != input_shape[2][1]:\n",
1093 | " raise ValueError(\"All three inputs (Q, V, K) have to have the same length; received lengths: {0}, {1}, {2}\".format(input_shape[0][0], input_shape[1][0], input_shape[2][0]))\n",
1094 | " if input_shape[0][2] != input_shape[1][2]:\n",
1095 | " raise ValueError(\"Input shapes of Q {0} and V {1} do not match.\".format(input_shape[0], input_shape[1]))\n",
1096 | " \n",
1097 | " def build(self, input_shape):\n",
1098 | " self._validate_input_shape(input_shape)\n",
1099 | " \n",
1100 | " super(ScaledDotProductAttention, self).build(input_shape)\n",
1101 | " \n",
1102 | " def call(self, x, mask=None):\n",
1103 | " q, k, v = x\n",
1104 | " d_k = q.shape.as_list()[2]\n",
1105 | "\n",
1106 | " # in pure tensorflow:\n",
1107 | " # weights = tf.matmul(x_batch, tf.transpose(y_batch, perm=[0, 2, 1]))\n",
1108 | " # normalized_weights = tf.nn.softmax(weights/scaling)\n",
1109 | " # output = tf.matmul(normalized_weights, x_batch)\n",
1110 | " \n",
1111 | " weights = K.batch_dot(q, k, axes=[2, 2])\n",
1112 | "\n",
1113 | " if mask is not None:\n",
1114 | " # add mask weights\n",
1115 | " if isinstance(mask, (list, tuple)):\n",
1116 | " if len(mask) > 0:\n",
1117 | " raise ValueError(\"mask can only be a Tensor or a list of length 1 containing a tensor.\")\n",
1118 | "\n",
1119 | " mask = mask[0]\n",
1120 | "\n",
1121 | " weights += -1e10*(1-mask)\n",
1122 | "\n",
1123 | " normalized_weights = K.softmax(weights / np.sqrt(d_k))\n",
1124 | " output = K.batch_dot(normalized_weights, v)\n",
1125 | " \n",
1126 | " if self._return_attention:\n",
1127 | " return [output, normalized_weights]\n",
1128 | " else:\n",
1129 | " return output"
1130 | ]
1131 | },
1132 | {
1133 | "cell_type": "code",
1134 | "execution_count": null,
1135 | "metadata": {},
1136 | "outputs": [],
1137 | "source": []
1138 | },
1139 | {
1140 | "cell_type": "markdown",
1141 | "metadata": {},
1142 | "source": [
1143 | "### منابع"
1144 | ]
1145 | },
1146 | {
1147 | "cell_type": "markdown",
1148 | "metadata": {},
1149 | "source": [
1150 | "https://github.com/zimmerrol/keras-utility-layer-collection/blob/master/kulc/layer_normalization.py\n",
1151 | "https://github.com/zimmerrol/keras-utility-layer-collection\n",
1152 | " https://github.com/Zelgunn/CustomKerasLayers"
1153 | ]
1154 | }
1155 | ],
1156 | "metadata": {
1157 | "kernelspec": {
1158 | "display_name": "Python 3",
1159 | "language": "python",
1160 | "name": "python3"
1161 | },
1162 | "language_info": {
1163 | "codemirror_mode": {
1164 | "name": "ipython",
1165 | "version": 3
1166 | },
1167 | "file_extension": ".py",
1168 | "mimetype": "text/x-python",
1169 | "name": "python",
1170 | "nbconvert_exporter": "python",
1171 | "pygments_lexer": "ipython3",
1172 | "version": "3.7.4"
1173 | }
1174 | },
1175 | "nbformat": 4,
1176 | "nbformat_minor": 4
1177 | }
1178 |
--------------------------------------------------------------------------------