├── LICENSE
├── README.md
├── _config.yml
├── chap10
    ├── README.md
    ├── data
    │   ├── collect_data.py
    │   ├── downscale.py
    │   ├── food_augmentation.py
    │   ├── gen_label_list.py
    │   ├── image_urls
    │   │   ├── 000_urls.txt
    │   │   ├── 001_urls.txt
    │   │   ├── 002_urls.txt
    │   │   ├── 003_urls.txt
    │   │   ├── 004_urls.txt
    │   │   ├── 005_urls.txt
    │   │   ├── 006_urls.txt
    │   │   └── fig-10.7.txt
    │   ├── keywords.txt
    │   ├── link_data_augmentation.sh
    │   ├── remove_dups_from_list.py
    │   ├── remove_invalid_images.py
    │   └── sample_val.py
    ├── download_resnet10_cvgj_weights.sh
    ├── food_resnet_10_cvgj_deploy.prototxt
    ├── food_resnet_10_cvgj_finetune_val.prototxt
    ├── kaoya_shuizhurou_roc_auc.py
    ├── make_confusion_matrix.py
    ├── recognize_food.py
    ├── solver.prototxt
    ├── sort_kaoya_by_pred_prob.py
    ├── val_results.txt
    └── visualize_activation.py
├── chap11
    ├── README.md
    └── prepare_voc_data.sh
├── chap12
    ├── README.md
    ├── gen_pairwise_imglist.py
    ├── mnist_siamese.prototxt
    ├── mnist_siamese_solver.prototxt
    ├── mnist_siamese_train_val.prototxt
    └── visualize_result.py
├── chap5
    ├── README.md
    ├── bar_n_pie_chart.py
    ├── fit_data.py
    ├── scatter_3d.py
    ├── surface_3d.py
    └── three_doors.py
├── chap6
    ├── README.md
    ├── bbox_labeling
    │   ├── bbox_labeling.py
    │   ├── detection_anno_bbox2voc.py
    │   └── samples.labels
    └── data_augmentation
    │   ├── image_augmentation.py
    │   ├── run_augmentation.py
    │   └── run_augmentation_pool_map.py
├── chap7
    ├── README.md
    ├── caffe
    │   ├── gen_hdf5.py
    │   ├── simple_mlp_test.py
    │   ├── simple_mlp_train.py
    │   ├── solver.prototxt
    │   ├── test.prototxt
    │   └── train.prototxt
    ├── gen_data.py
    └── mxnet
    │   └── simple_mlp.py
├── chap8
    ├── README.md
    ├── caffe
    │   ├── lenet.prototxt
    │   ├── lenet_solver.prototxt
    │   ├── lenet_solver_aug.prototxt
    │   ├── lenet_test.prototxt
    │   ├── lenet_train_val.prototxt
    │   ├── lenet_train_val_aug.prototxt
    │   └── recognize_digit.py
    ├── data
    │   ├── convert_mnist.py
    │   ├── download_mnist.sh
    │   ├── gen_caffe_imglist.py
    │   └── gen_mxnet_imglist.py
    └── mxnet
    │   ├── benchmark_model.py
    │   ├── recognize_digit.py
    │   ├── score_model.py
    │   └── train_lenet5.py
├── chap9
    ├── README.md
    ├── deploy.prototxt
    ├── gen_hdf5.py
    ├── gen_label.py
    ├── make_noises.py
    ├── predict.py
    ├── solver.prototxt
    ├── train_val.prototxt
    └── visualize_conv1_kernels.py
├── errata.pdf
├── random_bonus
    ├── README.md
    ├── adversarial_example_caffe
    │   ├── README.md
    │   ├── adversarial_example_demo.py
    │   ├── download-squeezenet-v1.0-weights.sh
    │   ├── little_white_dog.jpg
    │   ├── squeezenet-v1.0-deploy-with-force-backward.prototxt
    │   └── synset_words.txt
    ├── gan_n_cgan_2d_example
    │   ├── LICENSE
    │   ├── README.md
    │   ├── argparser.py
    │   ├── cgan_demo.py
    │   ├── gan_demo.py
    │   ├── inputs
    │   │   ├── U.jpg
    │   │   ├── Z.jpg
    │   │   ├── batman.jpg
    │   │   ├── binary.jpg
    │   │   ├── binary
    │   │   │   ├── 0.jpg
    │   │   │   └── 1.jpg
    │   │   ├── circle.jpg
    │   │   ├── dumbbell.jpg
    │   │   ├── penta.jpg
    │   │   ├── penta
    │   │   │   ├── 0.jpg
    │   │   │   ├── 1.jpg
    │   │   │   ├── 2.jpg
    │   │   │   ├── 3.jpg
    │   │   │   └── 4.jpg
    │   │   ├── random.jpg
    │   │   ├── triangle.jpg
    │   │   ├── vortex.jpg
    │   │   ├── vortex
    │   │   │   ├── 0.jpg
    │   │   │   ├── 1.jpg
    │   │   │   └── 2.jpg
    │   │   └── zig.jpg
    │   ├── networks.py
    │   ├── sampler.py
    │   └── visualizer.py
    ├── generate_mosaic_for_porno_images
    │   ├── README.md
    │   ├── clone_open_nsfw.sh
    │   ├── crop_n_resize.py
    │   ├── deploy_global_pooling.prototxt
    │   └── gen_mosaic.py
    ├── great-circle-interp
    │   ├── README.md
    │   ├── distance-experiment.py
    │   ├── latent-walk-great-circle.py
    │   ├── latent-walk-slerp-vs-lerp.py
    │   └── networks.py
    ├── image-segmentation(updating)
    │   ├── README.md
    │   ├── argparser.py
    │   ├── example.cfg
    │   ├── loss_visualizer.py
    │   ├── main.py
    │   ├── networks.py
    │   └── utils.py
    └── multiple_models_fusion_caffe
    │   ├── README.md
    │   ├── convert_mnist.py
    │   ├── download_mnist.sh
    │   ├── fuse_model.py
    │   ├── gen_img_list.py
    │   ├── lenet_even_solver.prototxt
    │   ├── lenet_even_train_val.prototxt
    │   ├── lenet_fusion_solver.prototxt
    │   ├── lenet_fusion_train_val.prototxt
    │   ├── lenet_odd_solver.prototxt
    │   ├── lenet_odd_train_val.prototxt
    │   └── rename_n_freeze_layers.py
└── reference.pdf


/LICENSE:
--------------------------------------------------------------------------------
 1 | BSD 3-Clause License
 2 | 
 3 | Copyright (c) 2017, frombeijingwithlove
 4 | All rights reserved.
 5 | 
 6 | Redistribution and use in source and binary forms, with or without
 7 | modification, are permitted provided that the following conditions are met:
 8 | 
 9 | * Redistributions of source code must retain the above copyright notice, this
10 |   list of conditions and the following disclaimer.
11 | 
12 | * Redistributions in binary form must reproduce the above copyright notice,
13 |   this list of conditions and the following disclaimer in the documentation
14 |   and/or other materials provided with the distribution.
15 | 
16 | * Neither the name of the copyright holder nor the names of its
17 |   contributors may be used to endorse or promote products derived from
18 |   this software without specific prior written permission.
19 | 
20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
23 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
24 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
26 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
27 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
28 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | 《深度学习与计算机视觉》配套代码  
 2 | ===
 3 | 感谢大家在issue里提的各种问题，因为太久没有更新了，本打算在春节前集中精力更新一把，不过看了一圈之后有些无从下手的感觉，再加上和图书公司的编辑讨论过应该不会出第二版或者重新印刷的版本，所以决定放弃……  
 4 | 
 5 | 我自己还发现的一个错误是关于特征值那块，SVD并不是用于不正定矩阵的，而是用于不对称矩阵的，详见更新后的知乎回答：
 6 | https://www.zhihu.com/question/20507061/answer/120540926
 7 | 其他部分的勘误要么已经在勘误.pdf里，要么在issue里大家的讨论基本上是正确的  
 8 | 
 9 | 本repo预计不会再更新(坦白讲本书后半部分内容已过时)，如实在有问题请给我本人github私信：yeyun11  
10 | 
11 | 祝各位春节快乐，2020年除夕
12 | 
13 | _
14 | === 
15 | 
16 | ![封面](https://raw.githubusercontent.com/frombeijingwithlove/dlcv_book_pretrained_caffe_models/master/fm.jpg)
17 | 
18 | 原名《深度学习与计算机视觉：实例入门》，请注意：**这本书定位是入门书**。
19 | 
20 | 代码[点这里](https://github.com/frombeijingwithlove/dlcv_for_beginners)。所有彩色图表电子版下载[点这里](https://github.com/frombeijingwithlove/dlcv_book_pretrained_caffe_models/tree/master/figs_n_plots)，第五章和第六章的彩色图表参见在线版：[第五章上](https://zhuanlan.zhihu.com/p/24162430)，[第五章下](https://zhuanlan.zhihu.com/p/24309547)，[第六章](https://zhuanlan.zhihu.com/p/24425116)。
21 | 
22 | 因为某些我无法理解的原因，书中英文被出版社要求强行翻译，最后：1）部分英文不同程度的被翻译成了中文，2）导致英文文献占大部分的文献列表未能放到书中。引用文献列表[点这里](https://github.com/frombeijingwithlove/dlcv_for_beginners/blob/master/reference.pdf)。  
23 | 
24 | 内容错误请到[这里](https://github.com/frombeijingwithlove/dlcv_for_beginners/issues)提出。勘误表点[这里](https://github.com/frombeijingwithlove/dlcv_for_beginners/blob/master/errata.pdf)。
25 | 
26 | 购买链接：[京东](https://item.jd.com/12152559.html)，[亚马逊](https://www.amazon.cn/gp/product/B074JWSF99)，[当当](http://product.dangdang.com/25138676.html)
27 | 
28 | ## 代码快速指引
29 | [第五章：numpy、matplotlib可视化的例子](https://github.com/frombeijingwithlove/dlcv_for_beginners/tree/master/chap5)  
30 | [第六章：物体检测标注小工具和本地数据增强小工具](https://github.com/frombeijingwithlove/dlcv_for_beginners/tree/master/chap6)  
31 | [第七章：二维平面分类，分别基于Caffe和MXNet](https://github.com/frombeijingwithlove/dlcv_for_beginners/tree/master/chap7)  
32 | [第八章：MNIST分类，分别基于Caffe和MXNet](https://github.com/frombeijingwithlove/dlcv_for_beginners/tree/master/chap8)  
33 | [第九章：基于Caffe回归图像混乱程度，及卷积核可视化](https://github.com/frombeijingwithlove/dlcv_for_beginners/tree/master/chap9)   
34 | [第十章：从ImageNet预训练模型进行迁移学习美食分类模型，混淆矩阵，ROC曲线绘制及模型类别响应图可视化，基于Caffe](https://github.com/frombeijingwithlove/dlcv_for_beginners/tree/master/chap10)  
35 | [第十二章：用MNIST训练Siamese网络，t-SNE可视化，基于Caffe](https://github.com/frombeijingwithlove/dlcv_for_beginners/tree/master/chap12)  
36 | [书中未包含杂七杂八：包括制造对抗样本(Caffe)、二维GAN及训练过程可视化(PyTorch)、给色情图片自动打马赛克(Caffe)、模型融合(Caffe)、图像分割(PyTorch)](https://github.com/frombeijingwithlove/dlcv_for_beginners/tree/master/random_bonus)  
37 | [模型剪枝(PyTorch)](https://github.com/yeyun11/pytorch-network-slimming)
38 | 


--------------------------------------------------------------------------------
/_config.yml:
--------------------------------------------------------------------------------
1 | theme: jekyll-theme-minimal


--------------------------------------------------------------------------------
/chap10/README.md:
--------------------------------------------------------------------------------
 1 | ## 第十章 迁移学习和模型微调
 2 | 
 3 | 从搜索引擎上下载关键词对应的图片，并利用模型微调进行训练，以及最后的模型评估，分析和可视化。
 4 | 
 5 | 具体用法参考本书第十章内容。
 6 | 
 7 | 书中例子对应的预训练模型可以在下面地址下载：  
 8 | https://github.com/frombeijingwithlove/dlcv_book_pretrained_caffe_models/blob/master/food_resnet-10_iter_10000.caffemodel  
 9 | 或  
10 | http://pan.baidu.com/s/1jHRLsLw


--------------------------------------------------------------------------------
/chap10/data/collect_data.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import re
 3 | import urllib
 4 | from multiprocessing import Process
 5 | 
 6 | SUPPORTED_FORMATS = ['jpg', 'png', 'jpeg']
 7 | URL_TEMPLATE = r'http://image.b***u.com/search/flip?tn=b***uimage&ie=utf-8&word={keyword}&pn={index}'
 8 | 
 9 | def download_images_from_b***u(dir_name, keyword, start_index, end_index):
10 |     index = start_index
11 |     while index < end_index:
12 |         url = URL_TEMPLATE.format(keyword=keyword, index=index)
13 |         try:
14 |             html_text = urllib.urlopen(url).read().decode('utf-8', 'ignore')
15 |             image_urls = re.findall(r'"objURL":"(.*?)"', html_text)
16 |             if not image_urls:
17 |                 print('Cannot retrieve anymore image urls \nStopping ...'.format(url))
18 |                 break
19 |         except IOError as e:
20 |             print(e)
21 |             print('Cannot open {}. \nStopping ...'.format(url))
22 |             break
23 | 
24 |         downloaded_urls = []
25 |         for url in image_urls:
26 |             filename = url.split('/')[-1]
27 |             ext = filename[filename.rfind('.')+1:]
28 |             if ext.lower() not in SUPPORTED_FORMATS:
29 |                 index += 1
30 |                 continue
31 |             filename = '{}/{:0>6d}.{}'.format(dir_name, index, ext)
32 |             cmd = 'wget "{}" -t 3 -T 5 -O {}'.format(url, filename)
33 |             os.system(cmd)
34 |             
35 |             if os.path.exists(filename) and os.path.getsize(filename) > 1024:
36 |                 index_url = '{:0>6d},{}'.format(index, url)
37 |                 downloaded_urls.append(index_url)
38 |             else:
39 |                 os.system('rm {}'.format(filename))
40 | 
41 |             index += 1
42 |             if index >= end_index:
43 |                 break
44 | 
45 |         with open('{}_urls.txt'.format(dir_name), 'a') as furls:
46 |             urls_text = '{}\n'.format('\n'.join(downloaded_urls))
47 |             if len(urls_text) > 11:
48 |                 furls.write(urls_text)
49 | 
50 | def download_images(keywords, num_per_kw, procs_per_kw):
51 |     args_list = []
52 |     for class_id, keyword in enumerate(keywords):
53 |         dir_name = '{:0>3d}'.format(class_id)
54 |         os.system('mkdir -p {}'.format(dir_name))
55 |         num_per_proc = int(round(float(num_per_kw/procs_per_kw)))
56 |         for i in range(procs_per_kw):
57 |             start_index = i * num_per_proc
58 |             end_index = start_index + num_per_proc - 1
59 |             args_list.append((dir_name, keyword, start_index, end_index))
60 | 
61 |     processes = [Process(target=download_images_from_b***u, args=x) for x in args_list]
62 | 
63 |     print('Starting to download images with {} processes ...'.format(len(processes)))
64 | 
65 |     for p in processes:
66 |         p.start()
67 | 
68 |     for p in processes:
69 |         p.join()
70 | 
71 |     print('Done!')
72 | 
73 | if __name__ == "__main__":
74 |     with open('keywords.txt', 'rb') as f:
75 |         foods = f.read().split()
76 |     download_images(foods, 2000, 3)
77 | 


--------------------------------------------------------------------------------
/chap10/data/downscale.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import cv2
 3 | import sys
 4 | 
 5 | input_path = sys.argv[1].rstrip(os.sep)
 6 | target_short_edge = int(sys.argv[2])
 7 | 
 8 | for root, dirs, files in os.walk(input_path):
 9 |     print('scanning {} ...'.format(root))
10 |     for filename in files:
11 |         filepath = os.sep.join([root, filename])
12 | 
13 |         img = cv2.imread(filepath)
14 |         h, w = img.shape[:2]
15 |         short_edge = min(w, h)
16 | 
17 |         if short_edge > target_short_edge:
18 |             scale = float(target_short_edge) / float(short_edge)
19 |             new_w = int(round(w*scale))
20 |             new_h = int(round(h*scale))
21 |             print('Down sampling {} from {}x{} to {}x{} ...'.format(
22 |                 filepath, w, h, new_w, new_h
23 |             ))
24 |             img = cv2.resize(img, (new_w, new_h))
25 |             cv2.imwrite(filepath, img)
26 | 
27 | print('Done!')
28 | 


--------------------------------------------------------------------------------
/chap10/data/food_augmentation.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | n_total = 3000
 4 | 
 5 | class_dirs = os.listdir('train')
 6 | 
 7 | for class_dir in class_dirs:
 8 |     src_path = 'train/{}'.format(class_dir)
 9 |     n_samples = len(os.listdir(src_path))
10 |     n_aug = n_total - n_samples
11 |     cmd = 'python run_augmentation.py {} temp {}'.format(src_path, n_aug)
12 |     os.system(cmd)
13 |     cmd = 'mv temp/* {}'.format(src_path)
14 |     os.system(cmd)
15 | 
16 | os.system('rm -r temp')
17 | 


--------------------------------------------------------------------------------
/chap10/data/gen_label_list.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import sys
 3 | 
 4 | dataset = sys.argv[1].rstrip(os.sep)
 5 | 
 6 | class_dirs = os.listdir(dataset)
 7 | 
 8 | with open('{}.txt'.format(dataset), 'w') as f:
 9 |     for class_dir in class_dirs:
10 |         class_path = os.sep.join([dataset, class_dir])
11 |         label = int(class_dir)
12 |         lines = ['{}/{} {}'.format(class_path, x, label) for x in os.listdir(class_path)]
13 |         f.write('\n'.join(lines) + '\n')
14 | 


--------------------------------------------------------------------------------
/chap10/data/image_urls/fig-10.7.txt:
--------------------------------------------------------------------------------
1 | http://www.seelvyou.com/uploadfile/jingqu/beijing/beijing33.jpg
2 | http://pic51.huitu.com/res/20160201/859521_20160201110841329500_1.jpg
3 | http://www.wazsjg.com/image-pic/bd729074.jpg.jpg
4 | 


--------------------------------------------------------------------------------
/chap10/data/keywords.txt:
--------------------------------------------------------------------------------
1 | 烤鸭
2 | 羊肉串
3 | 水煮肉
4 | 鸡汤
5 | 麻小
6 | 面条
7 | 包子
8 | 


--------------------------------------------------------------------------------
/chap10/data/link_data_augmentation.sh:
--------------------------------------------------------------------------------
1 | ln -s ../../chap6/data_augmentation/run_augmentation.py run_augmentation.py
2 | ln -s ../../chap6/data_augmentation/image_augmentation.py image_augmentation.py
3 | 
4 | 


--------------------------------------------------------------------------------
/chap10/data/remove_dups_from_list.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import sys
 3 | 
 4 | dup_list = sys.argv[1]
 5 | 
 6 | with open(dup_list, 'r') as f:
 7 |     lines = f.readlines()
 8 |     for line in lines:
 9 |         dups = line.split()
10 |         print('Removing duplicates of {}'.format(dups[0]))
11 |         for dup in dups[1:]:
12 |             cmd = 'rm {}'.format(dup)
13 |             os.system(cmd)
14 | 


--------------------------------------------------------------------------------
/chap10/data/remove_invalid_images.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import sys
 3 | import cv2
 4 | from collect_data import SUPPORTED_FORMATS
 5 | 
 6 | input_path = sys.argv[1]
 7 | 
 8 | for root, dirs, files in os.walk(input_path):
 9 |     for filename in files:
10 |         ext = filename[filename.rfind('.')+1:].lower()
11 |         if ext not in SUPPORTED_FORMATS:
12 |             continue
13 |         filepath = os.sep.join([root, filename])
14 |         if cv2.imread(filepath) is None:
15 |             os.system('rm {}'.format(filepath))
16 |             print('{} is not a valid image file. Deleted!'.format(filepath))
17 | 


--------------------------------------------------------------------------------
/chap10/data/sample_val.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import random
 3 | 
 4 | N = 300
 5 | 
 6 | os.system('mkdir -p val')
 7 | class_dirs = os.listdir('train')
 8 | 
 9 | for class_dir in class_dirs:
10 |     os.system('mkdir -p val/{}'.format(class_dir))
11 |     root = 'train/{}'.format(class_dir)
12 |     print('Sampling validation set with {} images from {} ...'.format(N, root))
13 |     filenames = os.listdir(root)
14 |     random.shuffle(filenames)
15 |     val_filenames = filenames[:N]
16 |     for filename in val_filenames:
17 |         src_filepath = os.sep.join([root, filename])
18 |         dst_filepath = os.sep.join(['val', class_dir, filename])
19 |         cmd = 'mv {} {}'.format(src_filepath, dst_filepath)
20 |         os.system(cmd)
21 | 


--------------------------------------------------------------------------------
/chap10/download_resnet10_cvgj_weights.sh:
--------------------------------------------------------------------------------
1 | wget https://upload.uni-jena.de/data/58493041de6f79.63214979/resnet10_cvgj_iter_320000.caffemodel
2 | 
3 | 


--------------------------------------------------------------------------------
/chap10/kaoya_shuizhurou_roc_auc.py:
--------------------------------------------------------------------------------
 1 | import matplotlib.pyplot as plt
 2 | from sklearn.metrics import confusion_matrix, roc_curve, auc
 3 | 
 4 | result_filepath = 'val_results.txt'
 5 | 
 6 | # the index of ky & szr are 0 and 2, respectively
 7 | is_ky = []
 8 | pred_ky = []
 9 | is_szr = []
10 | pred_szr = []
11 | ky_scores = []
12 | szr_scores = []
13 | with open(result_filepath, 'r') as f:
14 |     lines = f.readlines()
15 |     for line in lines:
16 |         tokens = line.split()
17 |         true_label = int(tokens[1])
18 |         pred_label = int(tokens[2])
19 |         ky_prob = float(tokens[3])
20 |         szr_prob = float(tokens[5])
21 | 
22 |         is_ky.append(1 if true_label == 0 else 0)
23 |         pred_ky.append(1 if pred_label == 0 else 0)
24 |         ky_scores.append(ky_prob)
25 | 
26 |         is_szr.append(1 if true_label == 2 else 0)
27 |         szr_scores.append(szr_prob)
28 | 
29 | ky_cnf_mat = confusion_matrix(is_ky, pred_ky, labels=[1, 0])
30 | print(ky_cnf_mat)
31 | 
32 | ky_fpr, ky_tpr, ky_ths = roc_curve(is_ky, ky_scores)
33 | ky_auc = auc(ky_fpr, ky_tpr)
34 | 
35 | szr_fpr, szr_tpr, szr_ths = roc_curve(is_szr, szr_scores)
36 | szr_auc = auc(szr_fpr, szr_tpr)
37 | 
38 | plt.plot(ky_fpr, ky_tpr, 'k--', lw=2,
39 |          label='Kao Ya ROC curve (auc = {:.2f})'.format(ky_auc))
40 | plt.plot(szr_fpr, szr_tpr, 'b-.', lw=2,
41 |          label='Shui Zhu Rou ROC curve (auc = {:.2f})'.format(szr_auc))
42 | plt.plot([0, 1], [0, 1], 'k', lw=1)
43 | plt.plot([0, 0, 1], [0, 1, 1], 'k:', lw=2)
44 | plt.xlim([-0.02, 1.0])
45 | plt.ylim([0.0, 1.02])
46 | plt.xlabel('False Positive Rate', fontsize=16)
47 | plt.ylabel('True Positive Rate', fontsize=16)
48 | plt.title('Receiver operating characteristic example')
49 | plt.legend(loc="lower right")
50 | plt.show()
51 | 


--------------------------------------------------------------------------------
/chap10/make_confusion_matrix.py:
--------------------------------------------------------------------------------
 1 | import itertools
 2 | import numpy as np
 3 | import matplotlib.pyplot as plt
 4 | from sklearn.metrics import confusion_matrix
 5 | 
 6 | def plot_confusion_matrix(cm, classes,
 7 |                           normalize=False,
 8 |                           title='Confusion matrix',
 9 |                           cmap=plt.cm.Blues):
10 | 
11 |     plt.imshow(cm, interpolation='nearest', cmap=cmap)
12 |     plt.title(title)
13 |     plt.colorbar()
14 |     tick_marks = np.arange(len(classes))
15 |     plt.xticks(tick_marks, classes, rotation=45)
16 |     plt.yticks(tick_marks, classes)
17 | 
18 |     if normalize:
19 |         cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
20 |         print("Normalized confusion matrix")
21 |     else:
22 |         print('Confusion matrix, without normalization')
23 | 
24 |     print(cm)
25 | 
26 |     thresh = cm.max() / 2.
27 |     for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
28 |         plt.text(j, i, cm[i, j],
29 |                  horizontalalignment="center",
30 |                  color="white" if cm[i, j] > thresh else "black")
31 | 
32 |     plt.tight_layout()
33 |     plt.ylabel('True label')
34 |     plt.xlabel('Predicted label')
35 | 
36 | result_filepath = 'val_results.txt'
37 | 
38 | true_labels = []
39 | pred_labels = []
40 | n_correct = 0
41 | with open(result_filepath, 'r') as f:
42 |     lines = f.readlines()
43 |     for line in lines:
44 |         tokens = line.split()
45 |         true_label = int(tokens[1])
46 |         pred_label = int(tokens[2])
47 |         true_labels.append(true_label)
48 |         pred_labels.append(pred_label)
49 |         n_correct += 1 if true_label == pred_label else 0
50 | 
51 | print('Accuracy = {:.2f}%'.format(float(n_correct)/float(len(true_labels))*100))
52 | cnf_mat = confusion_matrix(true_labels, pred_labels)
53 | foods = ['kaoya', 'yangrouchuan', 'shuizhurou', 'jitang', 'maxiao', 'miantiao', 'baozi']
54 | plot_confusion_matrix(cnf_mat, classes=foods)
55 | plt.show()
56 | 


--------------------------------------------------------------------------------
/chap10/recognize_food.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | import numpy as np
 3 | sys.path.append('/path/to/caffe/python')
 4 | import caffe
 5 | 
 6 | WEIGHTS_FILE = 'food_resnet-10_iter_10000.caffemodel'
 7 | DEPLOY_FILE = 'food_resnet_10_cvgj_deploy.prototxt'
 8 | 
 9 | #caffe.set_mode_cpu()
10 | net = caffe.Net(DEPLOY_FILE, WEIGHTS_FILE, caffe.TEST)
11 | 
12 | transformer = caffe.io.Transformer({'data': net.blobs['data'].data.shape})
13 | transformer.set_transpose('data', (2, 0, 1))
14 | transformer.set_raw_scale('data', 255)
15 | transformer.set_channel_swap('data', (2, 1, 0))
16 | 
17 | image_list = sys.argv[1]
18 | result_list = '{}_results.txt'.format(image_list[:image_list.rfind('.')])
19 | 
20 | foods = open('/path/to/keywords.txt', 'rb').read().split()
21 | with open(image_list, 'r') as f, open(result_list, 'w') as f_ret:
22 |     for line in f.readlines():
23 |         filepath, label = line.split()
24 |         label = int(label)
25 |         image = caffe.io.load_image(filepath)
26 |         transformed_image = transformer.preprocess('data', image)
27 |         net.blobs['data'].data[...] = transformed_image
28 | 
29 |         output = net.forward()
30 |         probs = output['prob'][0]
31 |         pred = np.argmax(probs)
32 | 
33 |         print('{}, predicted: {}, true: {}'.format(filepath, foods[pred], foods[label]))
34 |         result_line = '{} {} {} {}\n'.format(filepath, label, pred, ' '.join([str(x) for x in probs]))
35 |         f_ret.write(result_line)
36 | 


--------------------------------------------------------------------------------
/chap10/solver.prototxt:
--------------------------------------------------------------------------------
 1 | net: "food_resnet_10_cvgj_finetune_val.prototxt"
 2 | test_iter: 300
 3 | test_interval: 1000
 4 | base_lr: 0.1
 5 | display: 100
 6 | max_iter: 20000
 7 | lr_policy: "fixed"
 8 | momentum: 0.9
 9 | delta: 1e-8
10 | weight_decay: 0.0001
11 | snapshot: 10000
12 | snapshot_prefix: "food_resnet-10"
13 | test_initialization: false
14 | solver_mode: GPU
15 | type: "AdaDelta"
16 | 


--------------------------------------------------------------------------------
/chap10/sort_kaoya_by_pred_prob.py:
--------------------------------------------------------------------------------
 1 | from operator import itemgetter
 2 | import numpy as np
 3 | import matplotlib.pyplot as plt
 4 | from sklearn.metrics import precision_recall_curve, average_precision_score
 5 | 
 6 | result_filepath = 'val_results.txt'
 7 | 
 8 | ky_probs = []
 9 | with open(result_filepath, 'r') as f:
10 |     lines = f.readlines()
11 |     for line in lines:
12 |         tokens = line.split()
13 |         true_label = int(tokens[1])
14 |         is_ky = 1 if true_label == 0 else 0
15 |         ky_prob = float(tokens[3])
16 |         ky_probs.append([is_ky, ky_prob])
17 | 
18 | ky_probs_sorted = np.array(sorted(ky_probs, key=itemgetter(1), reverse=True))
19 | for is_ky, ky_prob in ky_probs_sorted:
20 |     print('{:.0f} {:.6f}'.format(is_ky, ky_prob))
21 | 
22 | labels = ky_probs_sorted[:, 0]
23 | probs = ky_probs_sorted[:, 1]
24 | 
25 | precision, recall, ths = precision_recall_curve(labels, probs)
26 | ap = average_precision_score(labels, probs)
27 | 
28 | plt.figure('Kao Ya Precision-Recall Curve')
29 | plt.plot(recall, precision, 'k', lw=2, label='Kao Ya')
30 | plt.xlabel('Recall', fontsize=16)
31 | plt.ylabel('Precision', fontsize=16)
32 | plt.ylim([0.0, 1.05])
33 | plt.xlim([0.0, 1.0])
34 | plt.title('Precision-Recall Curve: Average Precision={:.4f}'.format(ap))
35 | plt.legend(loc="lower left")
36 | plt.show()
37 | 
38 | 


--------------------------------------------------------------------------------
/chap10/visualize_activation.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | import numpy as np
 3 | import matplotlib.pyplot as plt
 4 | import cv2
 5 | sys.path.append('/path/to/caffe/python')
 6 | import caffe
 7 | 
 8 | WEIGHTS_FILE = 'food_resnet-10_iter_10000.caffemodel'
 9 | DEPLOY_FILE = 'food_resnet_10_cvgj_deploy.prototxt'
10 | FEATURE_MAPS = 'layer_512_1_sum'
11 | FC_LAYER = 'fc_food'
12 | 
13 | #caffe.set_mode_cpu()
14 | net = caffe.Net(DEPLOY_FILE, WEIGHTS_FILE, caffe.TEST)
15 | 
16 | transformer = caffe.io.Transformer({'data': net.blobs['data'].data.shape})
17 | transformer.set_transpose('data', (2, 0, 1))
18 | transformer.set_raw_scale('data', 255)
19 | transformer.set_channel_swap('data', (2, 1, 0))
20 | 
21 | image_list = sys.argv[1]
22 | 
23 | cmap = plt.get_cmap('jet')
24 | with open(image_list, 'r') as f:
25 |     for line in f.readlines():
26 |         filepath = line.split()[0]
27 |         image = caffe.io.load_image(filepath)
28 |         # uncomment the following 2 lines to forward with
29 |         # original image size and corresponding activation maps
30 |         #transformer.inputs['data'] = (1, 3, image.shape[0], image.shape[1])
31 |         #net.blobs['data'].reshape(1, 3, image.shape[0], image.shape[1])
32 |         transformed_image = transformer.preprocess('data', image)
33 |         net.blobs['data'].data[...] = transformed_image
34 | 
35 |         output = net.forward()
36 |         pred = np.argmax(output['prob'][0])
37 | 
38 |         feature_maps = net.blobs[FEATURE_MAPS].data[0]
39 |         fc_params = net.params[FC_LAYER]
40 |         fc_w = fc_params[0].data[pred]
41 |         #fc_b = fc_params[1].data[pred]
42 | 
43 |         activation_map = np.zeros_like(feature_maps[0])
44 |         for feature_map, w in zip(feature_maps, fc_w):
45 |             activation_map += feature_map * w
46 |         #activation_map += fc_b
47 | 
48 |         # Visualize as
49 |         # left: original image
50 |         # middle: activation map
51 |         # right: original image overlaid with activation map in 'jet' colormap
52 |         image = np.round(image*255).astype(np.uint8)
53 |         h, w = image.shape[:2]
54 |         activation_map = cv2.resize(activation_map, (w, h), interpolation=cv2.INTER_CUBIC)
55 |         activation_map -= activation_map.min()
56 |         activation_map /= activation_map.max()
57 |         activation_color_map = np.round(cmap(activation_map)[:, :, :3]*255).astype(np.uint8)
58 |         activation_map = np.stack(np.round([activation_map*255]*3).astype(np.uint8))
59 |         activation_map = activation_map.transpose(1, 2, 0)
60 |         overlay_img = image/2 + activation_color_map/2
61 |         vis_img = np.hstack([image, activation_map, overlay_img])
62 |         vis_img = cv2.cvtColor(vis_img, cv2.COLOR_RGB2BGR)
63 | 
64 |         cv2.imshow('Activation Map Visualization', vis_img)
65 |         cv2.waitKey()
66 | 


--------------------------------------------------------------------------------
/chap11/README.md:
--------------------------------------------------------------------------------
 1 | # 第十一章 目标检测
 2 | 
 3 | ## 主要文献链接
 4 | 
 5 | ### 11.1.1 滑窗（Sliding Window）法  
 6 | Mitosis Detection in Breast Cancer Histology Images
 7 | with Deep Neural Networks
 8 | http://people.idsia.ch/~ciresan/data/miccai2013.pdf
 9 | 
10 | 
11 | ### 11.1.3 Selective Search和R-CNN  
12 | Selective Search for Object Recognition  
13 | https://ivi.fnwi.uva.nl/isis/publications/2013/UijlingsIJCV2013/UijlingsIJCV2013.pdf
14 | 
15 | R-CNN  
16 | http://www.cv-foundation.org/openaccess/content_cvpr_2014/papers/Girshick_Rich_Feature_Hierarchies_2014_CVPR_paper.pdf
17 | 
18 | ### 11.1.4 SPP, ROI Pooling和Fast R-CNN
19 | 
20 | SPP  
21 | https://arxiv.org/pdf/1406.4729v1.pdf
22 | 
23 | Fast R-CNN  
24 | http://www.cv-foundation.org/openaccess/content_iccv_2015/papers/Girshick_Fast_R-CNN_ICCV_2015_paper.pdf
25 | 
26 | ### 11.1.5 RPN和Faster R-CNN
27 | Faster R-CNN: Towards Real-Time Object Detection with Region Proposal Networks  
28 | http://papers.nips.cc/paper/5638-faster-r-cnn-towards-real-time-object-detection-with-region-proposal-networks.pdf
29 | 
30 | ### 11.1.6 YOLO/SSD
31 | You Only Look Once: Unified, Real-Time Object Detection  
32 | http://www.cv-foundation.org/openaccess/content_cvpr_2016/papers/Redmon_You_Only_Look_CVPR_2016_paper.pdf
33 | 
34 | SSD: Single Shot MultiBox Detector  
35 | https://arxiv.org/pdf/1512.02325v5.pdf
36 | 
37 | ## 预训练模型下载链接
38 | vgg16_reduced.zip & ssd_300_vgg16_reduced_voc0712_trainval.zip  
39 | http://pan.baidu.com/s/1sli1TE1
40 | 


--------------------------------------------------------------------------------
/chap11/prepare_voc_data.sh:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 | wget http://host.robots.ox.ac.uk/pascal/VOC/voc2012/VOCtrainval_11-May-2012.tar
3 | wget http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCtrainval_06-Nov-2007.tar
4 | wget http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCtest_06-Nov-2007.tar
5 | tar -xvf VOCtrainval_11-May-2012.tar
6 | tar -xvf VOCtrainval_06-Nov-2007.tar
7 | tar -xvf VOCtest_06-Nov-2007.tar
8 | 


--------------------------------------------------------------------------------
/chap12/README.md:
--------------------------------------------------------------------------------
 1 | ## Metric Learning with Siamese Network
 2 | ### step 1
 3 | 按照第八章准备MNIST数据的方法生成图片，然后执行
 4 | > ln -s /path/to/mnist mnist
 5 | 
 6 | 在目录下链接mnist图片所在的目录
 7 | 
 8 | ### step 2
 9 | > python gen_pairwise_imglist.py
10 | 
11 | 生成成对图片的列表
12 | 
13 | ### step 3
14 | > /path/to/caffe/build/tools/convert_imageset ./ train.txt train_lmdb --gray  
15 | > /path/to/caffe/build/tools/convert_imageset ./ train_p.txt train_p_lmdb --gray  
16 | > /path/to/caffe/build/tools/convert_imageset ./ val.txt val_lmdb --gray  
17 | > /path/to/caffe/build/tools/convert_imageset ./ val_p.txt val_p_lmdb --gray  
18 | 
19 | 生成lmdb
20 | 
21 | ### step 4
22 | > /path/to/caffe/build/tools/caffe train -solver mnist_siamese_solver.prototxt -log_dir ./ 
23 | 
24 | 训练模型
25 | 
26 | ### step 5
27 | 
28 | > python visualize_result.py
29 | 
30 | 进行结果可视化
31 | 
32 | ## 预训练模型下载链接
33 | https://github.com/frombeijingwithlove/dlcv_book_pretrained_caffe_models/blob/master/mnist_siamese_iter_20000.caffemodel
34 | 或  
35 | http://pan.baidu.com/s/1qYk5MDQ


--------------------------------------------------------------------------------
/chap12/gen_pairwise_imglist.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import random
 3 | import re
 4 | 
 5 | train_dir = 'mnist/train'
 6 | val_dir = 'mnist/val'
 7 | n_train = 100000
 8 | n_val = 10000
 9 | 
10 | pattern = re.compile('\d+_(\d)\.jpg')
11 | 
12 | for img_dir, n_pairs in zip([train_dir, val_dir], [n_train, n_val]):
13 |     imglist = os.listdir(img_dir)
14 |     n_samples = len(imglist)
15 |     dataset = img_dir[img_dir.rfind(os.sep)+1:]
16 |     with open('{}.txt'.format(dataset), 'w') as f, \
17 |             open('{}_p.txt'.format(dataset), 'w') as f_p:
18 |         for i in range(n_pairs):
19 |             filename = imglist[random.randint(0, n_samples-1)]
20 |             digit = pattern.findall(filename)[0]
21 |             filepath = os.sep.join([img_dir, filename])
22 | 
23 |             filename_p = imglist[random.randint(0, n_samples-1)]
24 |             digit_p = pattern.findall(filename_p)[0]
25 |             filepath_p = os.sep.join([img_dir, filename_p])
26 | 
27 |             label = 1 if digit == digit_p else 0
28 | 
29 |             f.write('{} {}\n'.format(filepath, label))
30 |             f_p.write('{} {}\n'.format(filepath_p, label))
31 | 


--------------------------------------------------------------------------------
/chap12/mnist_siamese.prototxt:
--------------------------------------------------------------------------------
  1 | name: "mnist_siamese"
  2 | layer {
  3 |   name: "data"
  4 |   type: "Input"
  5 |   top: "data"
  6 |   input_param {
  7 |     shape: { dim: 10000 dim: 1 dim: 28 dim: 28 }
  8 |   }
  9 | }
 10 | layer {
 11 |   name: "conv1"
 12 |   type: "Convolution"
 13 |   bottom: "data"
 14 |   top: "conv1"
 15 |   param {
 16 |     lr_mult: 1
 17 |   }
 18 |   param {
 19 |     lr_mult: 2
 20 |   }
 21 |   convolution_param {
 22 |     num_output: 20
 23 |     kernel_size: 5
 24 |     stride: 1
 25 |   }
 26 | }
 27 | layer {
 28 |   name: "pool1"
 29 |   type: "Pooling"
 30 |   bottom: "conv1"
 31 |   top: "pool1"
 32 |   pooling_param {
 33 |     pool: MAX
 34 |     kernel_size: 2
 35 |     stride: 2
 36 |   }
 37 | }
 38 | layer {
 39 |   name: "conv2"
 40 |   type: "Convolution"
 41 |   bottom: "pool1"
 42 |   top: "conv2"
 43 |   param {
 44 |     lr_mult: 1
 45 |   }
 46 |   param {
 47 |     lr_mult: 2
 48 |   }
 49 |   convolution_param {
 50 |     num_output: 50
 51 |     kernel_size: 5
 52 |     stride: 1
 53 |   }
 54 | }
 55 | layer {
 56 |   name: "pool2"
 57 |   type: "Pooling"
 58 |   bottom: "conv2"
 59 |   top: "pool2"
 60 |   pooling_param {
 61 |     pool: MAX
 62 |     kernel_size: 2
 63 |     stride: 2
 64 |   }
 65 | }
 66 | layer {
 67 |   name: "ip1"
 68 |   type: "InnerProduct"
 69 |   bottom: "pool2"
 70 |   top: "ip1"
 71 |   param {
 72 |     lr_mult: 1
 73 |   }
 74 |   param {
 75 |     lr_mult: 2
 76 |   }
 77 |   inner_product_param {
 78 |     num_output: 500
 79 |   }
 80 | }
 81 | layer {
 82 |   name: "relu1"
 83 |   type: "ReLU"
 84 |   bottom: "ip1"
 85 |   top: "ip1"
 86 | }
 87 | layer {
 88 |   name: "ip2"
 89 |   type: "InnerProduct"
 90 |   bottom: "ip1"
 91 |   top: "ip2"
 92 |   param {
 93 |     lr_mult: 1
 94 |   }
 95 |   param {
 96 |     lr_mult: 2
 97 |   }
 98 |   inner_product_param {
 99 |     num_output: 10
100 |   }
101 | }
102 | layer {
103 |   name: "feat"
104 |   type: "InnerProduct"
105 |   bottom: "ip2"
106 |   top: "feat"
107 |   param {
108 |     lr_mult: 1
109 |   }
110 |   param {
111 |     lr_mult: 2
112 |   }
113 |   inner_product_param {
114 |     num_output: 2
115 |   }
116 | }
117 | 


--------------------------------------------------------------------------------
/chap12/mnist_siamese_solver.prototxt:
--------------------------------------------------------------------------------
 1 | # The train/test net protocol buffer definition
 2 | net: "mnist_siamese_train_val.prototxt"
 3 | # test_iter specifies how many forward passes the test should carry out.
 4 | # In the case of MNIST, we have test batch size 100 and 100 test iterations,
 5 | # covering the full 10,000 testing images.
 6 | test_iter: 100
 7 | # Carry out testing every 500 training iterations.
 8 | test_interval: 500
 9 | # The base learning rate, momentum and the weight decay of the network.
10 | base_lr: 0.01
11 | momentum: 0.9
12 | weight_decay: 0.0000
13 | # The learning rate policy
14 | lr_policy: "inv"
15 | gamma: 0.0001
16 | power: 0.75
17 | # Display every 100 iterations
18 | display: 100
19 | # The maximum number of iterations
20 | max_iter: 50000
21 | # snapshot intermediate results
22 | snapshot: 5000
23 | snapshot_prefix: "mnist_siamese"
24 | # solver mode: CPU or GPU
25 | solver_mode: GPU
26 | 


--------------------------------------------------------------------------------
/chap12/mnist_siamese_train_val.prototxt:
--------------------------------------------------------------------------------
  1 | name: "mnist_siamese_train_test"
  2 | layer {
  3 |   name: "mnist"
  4 |   type: "Data"
  5 |   top: "data"
  6 |   top: "label"
  7 |   include {
  8 |     phase: TRAIN
  9 |   }
 10 |   transform_param {
 11 |     mean_value: 128
 12 |     scale: 0.00390625
 13 |   }
 14 |   data_param {
 15 |     source: "train_lmdb"
 16 |     batch_size: 64
 17 |     backend: LMDB
 18 |   }
 19 | }
 20 | layer {
 21 |   name: "mnist_p"
 22 |   type: "Data"
 23 |   top: "data_p"
 24 |   include {
 25 |     phase: TRAIN
 26 |   }
 27 |   transform_param {
 28 |     mean_value: 128
 29 |     scale: 0.00390625
 30 |   }
 31 |   data_param {
 32 |     source: "train_p_lmdb"
 33 |     batch_size: 64
 34 |     backend: LMDB
 35 |   }
 36 | }
 37 | layer {
 38 |   name: "mnist"
 39 |   type: "Data"
 40 |   top: "data"
 41 |   top: "label"
 42 |   include {
 43 |     phase: TEST
 44 |   }
 45 |   transform_param {
 46 |     mean_value: 128
 47 |     scale: 0.00390625
 48 |   }
 49 |   data_param {
 50 |     source: "val_lmdb"
 51 |     batch_size: 100
 52 |     backend: LMDB
 53 |   }
 54 | }
 55 | layer {
 56 |   name: "mnist_p"
 57 |   type: "Data"
 58 |   top: "data_p"
 59 |   include {
 60 |     phase: TEST
 61 |   }
 62 |   transform_param {
 63 |     mean_value: 128
 64 |     scale: 0.00390625
 65 |   }
 66 |   data_param {
 67 |     source: "val_p_lmdb"
 68 |     batch_size: 100
 69 |     backend: LMDB
 70 |   }
 71 | }
 72 | layer {
 73 |   name: "conv1"
 74 |   type: "Convolution"
 75 |   bottom: "data"
 76 |   top: "conv1"
 77 |   param {
 78 |     name: "conv1_w"
 79 |     lr_mult: 1
 80 |   }
 81 |   param {
 82 |     name: "conv1_b"
 83 |     lr_mult: 2
 84 |   }
 85 |   convolution_param {
 86 |     num_output: 20
 87 |     kernel_size: 5
 88 |     stride: 1
 89 |     weight_filler {
 90 |       type: "xavier"
 91 |     }
 92 |     bias_filler {
 93 |       type: "constant"
 94 |     }
 95 |   }
 96 | }
 97 | layer {
 98 |   name: "pool1"
 99 |   type: "Pooling"
100 |   bottom: "conv1"
101 |   top: "pool1"
102 |   pooling_param {
103 |     pool: MAX
104 |     kernel_size: 2
105 |     stride: 2
106 |   }
107 | }
108 | layer {
109 |   name: "conv2"
110 |   type: "Convolution"
111 |   bottom: "pool1"
112 |   top: "conv2"
113 |   param {
114 |     name: "conv2_w"
115 |     lr_mult: 1
116 |   }
117 |   param {
118 |     name: "conv2_b"
119 |     lr_mult: 2
120 |   }
121 |   convolution_param {
122 |     num_output: 50
123 |     kernel_size: 5
124 |     stride: 1
125 |     weight_filler {
126 |       type: "xavier"
127 |     }
128 |     bias_filler {
129 |       type: "constant"
130 |     }
131 |   }
132 | }
133 | layer {
134 |   name: "pool2"
135 |   type: "Pooling"
136 |   bottom: "conv2"
137 |   top: "pool2"
138 |   pooling_param {
139 |     pool: MAX
140 |     kernel_size: 2
141 |     stride: 2
142 |   }
143 | }
144 | layer {
145 |   name: "ip1"
146 |   type: "InnerProduct"
147 |   bottom: "pool2"
148 |   top: "ip1"
149 |   param {
150 |     name: "ip1_w"
151 |     lr_mult: 1
152 |   }
153 |   param {
154 |     name: "ip1_b"
155 |     lr_mult: 2
156 |   }
157 |   inner_product_param {
158 |     num_output: 500
159 |     weight_filler {
160 |       type: "xavier"
161 |     }
162 |     bias_filler {
163 |       type: "constant"
164 |     }
165 |   }
166 | }
167 | layer {
168 |   name: "relu1"
169 |   type: "ReLU"
170 |   bottom: "ip1"
171 |   top: "ip1"
172 | }
173 | layer {
174 |   name: "ip2"
175 |   type: "InnerProduct"
176 |   bottom: "ip1"
177 |   top: "ip2"
178 |   param {
179 |     name: "ip2_w"
180 |     lr_mult: 1
181 |   }
182 |   param {
183 |     name: "ip2_b"
184 |     lr_mult: 2
185 |   }
186 |   inner_product_param {
187 |     num_output: 10
188 |     weight_filler {
189 |       type: "xavier"
190 |     }
191 |     bias_filler {
192 |       type: "constant"
193 |     }
194 |   }
195 | }
196 | layer {
197 |   name: "feat"
198 |   type: "InnerProduct"
199 |   bottom: "ip2"
200 |   top: "feat"
201 |   param {
202 |     name: "feat_w"
203 |     lr_mult: 1
204 |   }
205 |   param {
206 |     name: "feat_b"
207 |     lr_mult: 2
208 |   }
209 |   inner_product_param {
210 |     num_output: 2
211 |     weight_filler {
212 |       type: "xavier"
213 |     }
214 |     bias_filler {
215 |       type: "constant"
216 |     }
217 |   }
218 | }
219 | layer {
220 |   name: "conv1_p"
221 |   type: "Convolution"
222 |   bottom: "data_p"
223 |   top: "conv1_p"
224 |   param {
225 |     name: "conv1_w"
226 |     lr_mult: 1
227 |   }
228 |   param {
229 |     name: "conv1_b"
230 |     lr_mult: 2
231 |   }
232 |   convolution_param {
233 |     num_output: 20
234 |     kernel_size: 5
235 |     stride: 1
236 |     weight_filler {
237 |       type: "xavier"
238 |     }
239 |     bias_filler {
240 |       type: "constant"
241 |     }
242 |   }
243 | }
244 | layer {
245 |   name: "pool1_p"
246 |   type: "Pooling"
247 |   bottom: "conv1_p"
248 |   top: "pool1_p"
249 |   pooling_param {
250 |     pool: MAX
251 |     kernel_size: 2
252 |     stride: 2
253 |   }
254 | }
255 | layer {
256 |   name: "conv2_p"
257 |   type: "Convolution"
258 |   bottom: "pool1_p"
259 |   top: "conv2_p"
260 |   param {
261 |     name: "conv2_w"
262 |     lr_mult: 1
263 |   }
264 |   param {
265 |     name: "conv2_b"
266 |     lr_mult: 2
267 |   }
268 |   convolution_param {
269 |     num_output: 50
270 |     kernel_size: 5
271 |     stride: 1
272 |     weight_filler {
273 |       type: "xavier"
274 |     }
275 |     bias_filler {
276 |       type: "constant"
277 |     }
278 |   }
279 | }
280 | layer {
281 |   name: "pool2_p"
282 |   type: "Pooling"
283 |   bottom: "conv2_p"
284 |   top: "pool2_p"
285 |   pooling_param {
286 |     pool: MAX
287 |     kernel_size: 2
288 |     stride: 2
289 |   }
290 | }
291 | layer {
292 |   name: "ip1_p"
293 |   type: "InnerProduct"
294 |   bottom: "pool2_p"
295 |   top: "ip1_p"
296 |   param {
297 |     name: "ip1_w"
298 |     lr_mult: 1
299 |   }
300 |   param {
301 |     name: "ip1_b"
302 |     lr_mult: 2
303 |   }
304 |   inner_product_param {
305 |     num_output: 500
306 |     weight_filler {
307 |       type: "xavier"
308 |     }
309 |     bias_filler {
310 |       type: "constant"
311 |     }
312 |   }
313 | }
314 | layer {
315 |   name: "relu1_p"
316 |   type: "ReLU"
317 |   bottom: "ip1_p"
318 |   top: "ip1_p"
319 | }
320 | layer {
321 |   name: "ip2_p"
322 |   type: "InnerProduct"
323 |   bottom: "ip1_p"
324 |   top: "ip2_p"
325 |   param {
326 |     name: "ip2_w"
327 |     lr_mult: 1
328 |   }
329 |   param {
330 |     name: "ip2_b"
331 |     lr_mult: 2
332 |   }
333 |   inner_product_param {
334 |     num_output: 10
335 |     weight_filler {
336 |       type: "xavier"
337 |     }
338 |     bias_filler {
339 |       type: "constant"
340 |     }
341 |   }
342 | }
343 | layer {
344 |   name: "feat_p"
345 |   type: "InnerProduct"
346 |   bottom: "ip2_p"
347 |   top: "feat_p"
348 |   param {
349 |     name: "feat_w"
350 |     lr_mult: 1
351 |   }
352 |   param {
353 |     name: "feat_b"
354 |     lr_mult: 2
355 |   }
356 |   inner_product_param {
357 |     num_output: 2
358 |     weight_filler {
359 |       type: "xavier"
360 |     }
361 |     bias_filler {
362 |       type: "constant"
363 |     }
364 |   }
365 | }
366 | layer {
367 |   name: "loss"
368 |   type: "ContrastiveLoss"
369 |   bottom: "feat"
370 |   bottom: "feat_p"
371 |   bottom: "label"
372 |   top: "loss"
373 |   contrastive_loss_param {
374 |     margin: 1
375 |   }
376 | }
377 | 


--------------------------------------------------------------------------------
/chap12/visualize_result.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import sys
 3 | sys.path.append('/path/to/caffe/python')
 4 | import re
 5 | import numpy as np
 6 | import matplotlib.pyplot as plt
 7 | from sklearn.manifold import TSNE
 8 | import cv2
 9 | import caffe
10 | 
11 | WEIGHTS_FILE = 'mnist_siamese_iter_20000.caffemodel'
12 | DEPLOY_FILE = 'mnist_siamese.prototxt'
13 | IMG_DIR = 'mnist/test'
14 | MEAN = 128
15 | SCALE = 0.00390625
16 | 
17 | caffe.set_mode_gpu()
18 | caffe.set_device(0)
19 | net = caffe.Net(DEPLOY_FILE, WEIGHTS_FILE, caffe.TEST)
20 | 
21 | pattern = re.compile('\d+_(\d)\.jpg')
22 | 
23 | image_list = os.listdir(IMG_DIR)
24 | n_imgs = len(image_list)
25 | 
26 | net.blobs['data'].reshape(n_imgs, 1, 28, 28)
27 | 
28 | labels = []
29 | for i, filename in enumerate(image_list):
30 |     digit = int(pattern.findall(filename)[0])
31 |     labels.append(digit)
32 |     filepath = os.sep.join([IMG_DIR, filename])
33 |     image = cv2.imread(filepath, cv2.IMREAD_GRAYSCALE).astype(np.float) - MEAN
34 |     image *= SCALE
35 |     net.blobs['data'].data[i, ...] = image
36 | 
37 | labels = np.array(labels)
38 | 
39 | output = net.forward()
40 | feat = output['feat']
41 | 
42 | colors = ['#ff0000', '#ffff00', '#00ff00', '#00ffff', '#0000ff',
43 |           '#ff00ff', '#990000', '#999900', '#009900', '#009999']
44 | legend = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9']
45 | 
46 | plt.figure('feat')
47 | for i in range(10):
48 |     plt.plot(feat[labels==i,0].flatten(),
49 |              feat[labels==i,1].flatten(),
50 |              '.', c=colors[i])
51 | plt.legend(legend)
52 | 
53 | plt.figure('ip2')
54 | ip2_feat = net.blobs['ip2'].data
55 | model = TSNE(n_components=2)
56 | 
57 | ip2_vis_feat = model.fit_transform(ip2_feat)
58 | for i in range(10):
59 |     plt.plot(ip2_vis_feat[labels==i,0].flatten(),
60 |              ip2_vis_feat[labels==i,1].flatten(),
61 |              '.', c=colors[i])
62 | plt.legend(legend)
63 | 
64 | plt.show()
65 | 


--------------------------------------------------------------------------------
/chap5/README.md:
--------------------------------------------------------------------------------
1 | ## 第5章示例代码
2 | 
3 | 参考本书第5章，或在线版本：
4 | https://zhuanlan.zhihu.com/p/24162430  
5 | https://zhuanlan.zhihu.com/p/24309547 
6 | 
7 | 


--------------------------------------------------------------------------------
/chap5/bar_n_pie_chart.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import matplotlib as mpl
 3 | import matplotlib.pyplot as plt
 4 | 
 5 | mpl.rcParams['axes.titlesize'] = 20
 6 | mpl.rcParams['xtick.labelsize'] = 16
 7 | mpl.rcParams['ytick.labelsize'] = 16
 8 | mpl.rcParams['axes.labelsize'] = 16
 9 | mpl.rcParams['xtick.major.size'] = 0
10 | mpl.rcParams['ytick.major.size'] = 0
11 | 
12 | speed_map = {
13 |     'dog': (48, '#7199cf'),
14 |     'cat': (45, '#4fc4aa'),
15 |     'cheetah': (120, '#e1a7a2')
16 | }
17 | 
18 | fig = plt.figure('Bar chart & Pie chart')
19 | 
20 | ax = fig.add_subplot(121)
21 | ax.set_title('Running speed - bar chart')
22 | 
23 | xticks = np.arange(3)
24 | 
25 | bar_width = 0.5
26 | 
27 | animals = speed_map.keys()
28 | speeds = [x[0] for x in speed_map.values()]
29 | colors = [x[1] for x in speed_map.values()]
30 | bars = ax.bar(xticks, speeds, width=bar_width, edgecolor='none')
31 | 
32 | ax.set_ylabel('Speed(km/h)')
33 | ax.set_xticks(xticks+bar_width/2)
34 | ax.set_xticklabels(animals)
35 | ax.set_xlim([bar_width/2-0.5, 3-bar_width/2])
36 | ax.set_ylim([0, 125])
37 | 
38 | for bar, color in zip(bars, colors):
39 |     bar.set_color(color)
40 | 
41 | ax = fig.add_subplot(122)
42 | ax.set_title('Running speed - pie chart')
43 | 
44 | labels = ['{}\n{} km/h'.format(a, s) for a, s in zip(animals, speeds)]
45 | 
46 | ax.pie(speeds, labels=labels, colors=colors)
47 | 
48 | plt.axis('equal')
49 | plt.show()
50 | 


--------------------------------------------------------------------------------
/chap5/fit_data.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import matplotlib as mpl
 3 | import matplotlib.pyplot as plt
 4 | 
 5 | mpl.rcParams['xtick.labelsize'] = 24
 6 | mpl.rcParams['ytick.labelsize'] = 24
 7 | 
 8 | np.random.seed(42)
 9 | 
10 | x = np.linspace(0, 5, 100)
11 | y = 2*np.sin(x) + 0.3*x**2
12 | y_data = y + np.random.normal(scale=0.3, size=100)
13 | 
14 | plt.figure('data')
15 | plt.plot(x, y_data, '.')
16 | 
17 | plt.figure('model')
18 | plt.plot(x, y)
19 | 
20 | plt.figure('data & model')
21 | plt.plot(x, y, 'k', lw=3)
22 | plt.scatter(x, y_data)
23 | 
24 | plt.show()
25 | 


--------------------------------------------------------------------------------
/chap5/scatter_3d.py:
--------------------------------------------------------------------------------
 1 | import matplotlib.pyplot as plt
 2 | import numpy as np
 3 | 
 4 | from mpl_toolkits.mplot3d import Axes3D
 5 | 
 6 | np.random.seed(42)
 7 | 
 8 | n_samples = 500
 9 | dim = 3
10 | 
11 | samples = np.random.multivariate_normal(
12 |     np.zeros(dim),
13 |     np.eye(dim),
14 |     n_samples
15 | )
16 | 
17 | for i in range(samples.shape[0]):
18 |     r = np.power(np.random.random(), 1.0/3.0)
19 |     samples[i] *= r / np.linalg.norm(samples[i])
20 | 
21 | upper_samples = []
22 | lower_samples = []
23 | for x, y, z in samples:
24 |     if z > 3*x + 2*y - 1:
25 |         upper_samples.append((x, y, z))
26 |     else:
27 |         lower_samples.append((x, y, z))
28 | 
29 | fig = plt.figure('3D scatter plot')
30 | ax = fig.add_subplot(111, projection='3d')
31 | 
32 | uppers = np.array(upper_samples)
33 | lowers = np.array(lower_samples)
34 | 
35 | ax.scatter(uppers[:, 0], uppers[:, 1], uppers[:, 2], c='r', marker='o')
36 | ax.scatter(lowers[:, 0], lowers[:, 1], lowers[:, 2], c='g', marker='^')
37 | 
38 | plt.show()
39 | 


--------------------------------------------------------------------------------
/chap5/surface_3d.py:
--------------------------------------------------------------------------------
 1 | import matplotlib.pyplot as plt
 2 | import numpy as np
 3 | 
 4 | from mpl_toolkits.mplot3d import Axes3D     
 5 | 
 6 | np.random.seed(42)
 7 | 
 8 | n_grids = 51            
 9 | c = n_grids / 2         
10 | nf = 2                  
11 | 
12 | x = np.linspace(0, 1, n_grids)
13 | y = np.linspace(0, 1, n_grids)
14 | X, Y = np.meshgrid(x, y)
15 | 
16 | spectrum = np.zeros((n_grids, n_grids), dtype=np.complex)
17 | noise = [np.complex(x, y) for x, y in np.random.uniform(-1,1,((2*nf+1)**2/2, 2))]
18 | noisy_block = np.concatenate((noise, [0j], np.conjugate(noise[::-1])))
19 | 
20 | spectrum[c-nf:c+nf+1, c-nf:c+nf+1] = noisy_block.reshape((2*nf+1, 2*nf+1))
21 | Z = np.real(np.fft.ifft2(np.fft.ifftshift(spectrum)))
22 | 
23 | fig = plt.figure('3D surface & wire')
24 | 
25 | ax = fig.add_subplot(1, 2, 1, projection='3d')
26 | ax.plot_surface(X, Y, Z, alpha=0.7, cmap='jet', rstride=1, cstride=1, lw=0)
27 | 
28 | ax = fig.add_subplot(1, 2, 2, projection='3d')
29 | ax.plot_wireframe(X, Y, Z, rstride=3, cstride=3, lw=0.5)
30 | 
31 | plt.show()
32 | 


--------------------------------------------------------------------------------
/chap5/three_doors.py:
--------------------------------------------------------------------------------
 1 | import numpy.random as random
 2 | 
 3 | random.seed(42)
 4 | 
 5 | n_tests = 10000
 6 | 
 7 | winning_doors = random.randint(0, 3, n_tests)
 8 | change_mind_wins = 0
 9 | insist_wins = 0
10 | 
11 | for winning_door in winning_doors:
12 | 
13 |     first_try = random.randint(0, 3)
14 |     remaining_choices = [i for i in range(3) if i != first_try]
15 |     wrong_choices = [i for i in range(3) if i != winning_door]
16 | 
17 |     if first_try in wrong_choices:
18 |         wrong_choices.remove(first_try)
19 |     
20 |     screened_out = random.choice(wrong_choices)
21 |     remaining_choices.remove(screened_out)
22 |     
23 |     changed_mind_try = remaining_choices[0]
24 | 
25 |     change_mind_wins += 1 if changed_mind_try == winning_door else 0
26 |     insist_wins += 1 if first_try == winning_door else 0
27 | 
28 | print(
29 |     'You win {1} out of {0} tests if you changed your mind\n'
30 |     'You win {2} out of {0} tests if you insist on the initial choice'.format(
31 |         n_tests, change_mind_wins, insist_wins
32 |         )
33 | )
34 | 


--------------------------------------------------------------------------------
/chap6/README.md:
--------------------------------------------------------------------------------
1 | ## 第6章示例代码
2 | 包含并行数据增加工具和物体框标注小工具。  
3 | 参考本书第6章，或在线版本：
4 | https://zhuanlan.zhihu.com/p/24425116
5 | 
6 | 书中的数据增加小工具使用的是手动均衡，使用Python进程池的自动均衡版本，可以参考run_augmentation_pool_map.py
7 | 


--------------------------------------------------------------------------------
/chap6/bbox_labeling/bbox_labeling.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import cv2
  3 | from tkFileDialog import askdirectory
  4 | from tkMessageBox import askyesno
  5 | 
  6 | WINDOW_NAME = 'Simple Bounding Box Labeling Tool'
  7 | FPS = 24
  8 | SUPPOTED_FORMATS = ['jpg', 'jpeg', 'png']
  9 | DEFAULT_COLOR = {'Object': (255, 0, 0)}
 10 | COLOR_GRAY = (192, 192, 192)
 11 | BAR_HEIGHT = 16
 12 | 
 13 | KEY_UP = 65362
 14 | KEY_DOWN = 65364
 15 | KEY_LEFT = 65361
 16 | KEY_RIGHT = 65363
 17 | KEY_ESC = 27
 18 | KEY_DELETE = 65535
 19 | KEY_EMPTY = 0
 20 | 
 21 | get_bbox_name = '{}.bbox'.format
 22 | 
 23 | 
 24 | class SimpleBBoxLabeling:
 25 | 
 26 |     def __init__(self, data_dir, fps=FPS, window_name=None):
 27 |         self._data_dir = data_dir
 28 |         self.fps = fps
 29 |         self.window_name = window_name if window_name else WINDOW_NAME
 30 | 
 31 |         self._pt0 = None
 32 |         self._pt1 = None
 33 |         self._drawing = False
 34 |         self._cur_label = None
 35 |         self._bboxes = []
 36 | 
 37 |         label_path = '{}.labels'.format(self._data_dir)
 38 |         self.label_colors = DEFAULT_COLOR if not os.path.exists(label_path) else self.load_labels(label_path)
 39 | 
 40 |         imagefiles = [x for x in os.listdir(self._data_dir) if x[x.rfind('.') + 1:].lower() in SUPPOTED_FORMATS]
 41 |         labeled = [x for x in imagefiles if os.path.exists(get_bbox_name(x))]
 42 |         to_be_labeled = [x for x in imagefiles if x not in labeled]
 43 | 
 44 |         self._filelist = labeled + to_be_labeled
 45 |         self._index = len(labeled)
 46 |         if self._index > len(self._filelist) - 1:
 47 |             self._index = len(self._filelist) - 1
 48 | 
 49 |     def _mouse_ops(self, event, x, y, flags, param):
 50 | 
 51 |         if event == cv2.EVENT_LBUTTONDOWN:
 52 |             self._drawing = True
 53 |             self._pt0 = (x, y)
 54 | 
 55 |         elif event == cv2.EVENT_LBUTTONUP:
 56 |             self._drawing = False
 57 |             self._pt1 = (x, y)
 58 |             self._bboxes.append((self._cur_label, (self._pt0, self._pt1)))
 59 | 
 60 |         elif event == cv2.EVENT_MOUSEMOVE:
 61 |             self._pt1 = (x, y)
 62 | 
 63 |         elif event == cv2.EVENT_RBUTTONUP:
 64 |             if self._bboxes:
 65 |                 self._bboxes.pop()
 66 | 
 67 |     def _clean_bbox(self):
 68 |         self._pt0 = None
 69 |         self._pt1 = None
 70 |         self._drawing = False
 71 |         self._bboxes = []
 72 | 
 73 |     def _draw_bbox(self, img):
 74 | 
 75 |         h, w = img.shape[:2]
 76 |         canvas = cv2.copyMakeBorder(img, 0, BAR_HEIGHT, 0, 0, cv2.BORDER_CONSTANT, value=COLOR_GRAY)
 77 | 
 78 |         label_msg = '{}: {}, {}'.format(self._cur_label, self._pt0, self._pt1) \
 79 |             if self._drawing \
 80 |             else 'Current label: {}'.format(self._cur_label)
 81 |         msg = '{}/{}: {} | {}'.format(self._index + 1, len(self._filelist), self._filelist[self._index], label_msg)
 82 | 
 83 |         cv2.putText(canvas, msg, (1, h+12),
 84 |                     cv2.FONT_HERSHEY_SIMPLEX,
 85 |                     0.5, (0, 0, 0), 1)
 86 |         for label, (bpt0, bpt1) in self._bboxes:
 87 |             label_color = self.label_colors[label] if label in self.label_colors else COLOR_GRAY
 88 |             cv2.rectangle(canvas, bpt0, bpt1, label_color, thickness=2)
 89 |             cv2.putText(canvas, label, (bpt0[0]+3, bpt0[1]+15),
 90 |                         cv2.FONT_HERSHEY_SIMPLEX,
 91 |                         0.5, label_color, 2)
 92 |         if self._drawing:
 93 |             label_color = self.label_colors[self._cur_label] if self._cur_label in self.label_colors else COLOR_GRAY
 94 |             if self._pt1[0] >= self._pt0[0] and self._pt1[1] >= self._pt0[1]:
 95 |                 cv2.rectangle(canvas, self._pt0, self._pt1, label_color, thickness=2)
 96 |             cv2.putText(canvas, self._cur_label, (self._pt0[0] + 3, self._pt0[1] + 15),
 97 |                         cv2.FONT_HERSHEY_SIMPLEX,
 98 |                         0.5, label_color, 2)
 99 |         return canvas
100 | 
101 |     @staticmethod
102 |     def export_bbox(filepath, bboxes):
103 |         if bboxes:
104 |             with open(filepath, 'w') as f:
105 |                 for bbox in bboxes:
106 |                     line = repr(bbox) + '\n'
107 |                     f.write(line)
108 |         elif os.path.exists(filepath):
109 |             os.remove(filepath)
110 | 
111 |     @staticmethod
112 |     def load_bbox(filepath):
113 |         bboxes = []
114 |         with open(filepath, 'r') as f:
115 |             line = f.readline().rstrip()
116 |             while line:
117 |                 bboxes.append(eval(line))
118 |                 line = f.readline().rstrip()
119 |         return bboxes
120 | 
121 |     @staticmethod
122 |     def load_labels(filepath):
123 |         label_colors = {}
124 |         with open(filepath, 'r') as f:
125 |             line = f.readline().rstrip()
126 |             while line:
127 |                 label, color = eval(line)
128 |                 label_colors[label] = color
129 |                 line = f.readline().rstrip()
130 |         return label_colors
131 | 
132 |     @staticmethod
133 |     def load_sample(filepath):
134 |         img = cv2.imread(filepath)
135 |         bbox_filepath = get_bbox_name(filepath)
136 |         bboxes = []
137 |         if os.path.exists(bbox_filepath):
138 |             bboxes = SimpleBBoxLabeling.load_bbox(bbox_filepath)
139 |         return img, bboxes
140 | 
141 |     def _export_n_clean_bbox(self):
142 |         bbox_filepath = os.sep.join([self._data_dir, get_bbox_name(self._filelist[self._index])])
143 |         self.export_bbox(bbox_filepath, self._bboxes)
144 |         self._clean_bbox()
145 | 
146 |     def _delete_current_sample(self):
147 |         filename = self._filelist[self._index]
148 |         filepath = os.sep.join([self._data_dir, filename])
149 |         if os.path.exists(filepath):
150 |             os.remove(filepath)
151 |         filepath = get_bbox_name(filepath)
152 |         if os.path.exists(filepath):
153 |             os.remove(filepath)
154 |         self._filelist.pop(self._index)
155 |         print('{} is deleted!'.format(filename))
156 | 
157 |     def start(self):
158 | 
159 |         last_filename = ''
160 |         label_index = 0
161 |         labels = self.label_colors.keys()
162 |         n_labels = len(labels)
163 | 
164 |         cv2.namedWindow(self.window_name)
165 |         cv2.setMouseCallback(self.window_name, self._mouse_ops)
166 |         key = KEY_EMPTY
167 |         delay = int(1000 / FPS)
168 | 
169 |         while key != KEY_ESC:
170 | 
171 |             if key == KEY_UP:
172 |                 if label_index == 0:
173 |                     pass
174 |                 else:
175 |                     label_index -= 1
176 | 
177 |             elif key == KEY_DOWN:
178 |                 if label_index == n_labels - 1:
179 |                     pass
180 |                 else:
181 |                     label_index += 1
182 | 
183 |             elif key == KEY_LEFT:
184 |                 if self._index > 0:
185 |                     self._export_n_clean_bbox()
186 | 
187 |                 self._index -= 1
188 |                 if self._index < 0:
189 |                     self._index = 0
190 | 
191 |             elif key == KEY_RIGHT:
192 |                 if self._index < len(self._filelist) - 1:
193 |                     self._export_n_clean_bbox()
194 | 
195 |                 self._index += 1
196 |                 if self._index > len(self._filelist) - 1:
197 |                     self._index = len(self._filelist) - 1
198 | 
199 |             elif key == KEY_DELETE:
200 |                 if askyesno('Delete Sample', 'Are you sure?'):
201 |                     self._delete_current_sample()
202 |                     key = KEY_EMPTY
203 |                     continue
204 | 
205 |             filename = self._filelist[self._index]
206 |             if filename != last_filename:
207 |                 filepath = os.sep.join([self._data_dir, filename])
208 |                 img, self._bboxes = self.load_sample(filepath)
209 | 
210 |             self._cur_label = labels[label_index]
211 | 
212 |             canvas = self._draw_bbox(img)
213 |             cv2.imshow(self.window_name, canvas)
214 |             key = cv2.waitKey(delay)
215 | 
216 |             last_filename = filename
217 | 
218 |         print('Finished!')
219 | 
220 |         cv2.destroyAllWindows()
221 |         self.export_bbox(os.sep.join([self._data_dir, get_bbox_name(filename)]), self._bboxes)
222 | 
223 |         print('Labels updated!')
224 | 
225 | if __name__ == '__main__':
226 |     dir_with_images = askdirectory(title='Where are the images?')
227 |     labeling_task = SimpleBBoxLabeling(dir_with_images)
228 |     labeling_task.start()
229 | 


--------------------------------------------------------------------------------
/chap6/bbox_labeling/detection_anno_bbox2voc.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import sys
 3 | import xml.etree.ElementTree as ET
 4 | #import xml.dom.minidom as minidom
 5 | import cv2
 6 | from bbox_labeling import SimpleBBoxLabeling
 7 | 
 8 | input_dir = sys.argv[1].rstrip(os.sep)
 9 | 
10 | bbox_filenames = [x for x in os.listdir(input_dir) if x.endswith('.bbox')]
11 | 
12 | for bbox_filename in bbox_filenames:
13 |     bbox_filepath = os.sep.join([input_dir, bbox_filename])
14 |     jpg_filepath = bbox_filepath[:-5]
15 |     if not os.path.exists(jpg_filepath):
16 |         print('Something is wrong with {}!'.format(bbox_filepath))
17 |         break
18 | 
19 |     root = ET.Element('annotation')
20 | 
21 |     filename = ET.SubElement(root, 'filename')
22 |     jpg_filename = jpg_filepath.split(os.sep)[-1]
23 |     filename.text = jpg_filename
24 | 
25 |     img = cv2.imread(jpg_filepath)
26 |     h, w, c = img.shape
27 |     size = ET.SubElement(root, 'size')
28 |     width = ET.SubElement(size, 'width')
29 |     width.text = str(w)
30 |     height = ET.SubElement(size, 'height')
31 |     height.text = str(h)
32 |     depth = ET.SubElement(size, 'depth')
33 |     depth.text = str(c)
34 | 
35 |     bboxes = SimpleBBoxLabeling.load_bbox(bbox_filepath)
36 |     for obj_name, coord in bboxes:
37 |         obj = ET.SubElement(root, 'object')
38 |         name = ET.SubElement(obj, 'name')
39 |         name.text = obj_name
40 |         bndbox = ET.SubElement(obj, 'bndbox')
41 |         xmin = ET.SubElement(bndbox, 'xmin')
42 |         xmax = ET.SubElement(bndbox, 'xmax')
43 |         ymin = ET.SubElement(bndbox, 'ymin')
44 |         ymax = ET.SubElement(bndbox, 'ymax')
45 |         (left, top), (right, bottom) = coord
46 |         xmin.text = str(left)
47 |         xmax.text = str(right)
48 |         ymin.text = str(top)
49 |         ymax.text = str(bottom)
50 | 
51 |     xml_filepath = jpg_filepath[:jpg_filepath.rfind('.')] + '.xml'
52 |     with open(xml_filepath, 'w') as f:
53 |         anno_xmlstr = ET.tostring(root)
54 | 
55 |         # In case a nicely formatted xml is needed
56 |         # uncomment the following 2 lines and minidom import
57 |         #anno_xml = minidom.parseString(anno_xmlstr)
58 |         #anno_xmlstr = anno_xml.toprettyxml()
59 |         f.write(anno_xmlstr)
60 | 


--------------------------------------------------------------------------------
/chap6/bbox_labeling/samples.labels:
--------------------------------------------------------------------------------
1 | 'Horse', (255, 255, 0)
2 | 'Hill', (0, 255, 255)
3 | 'DiaoSi', (0, 0, 255)
4 | 


--------------------------------------------------------------------------------
/chap6/data_augmentation/image_augmentation.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import cv2
 3 | 
 4 | crop_image = lambda img, x0, y0, w, h: img[y0:y0+h, x0:x0+w]
 5 | 
 6 | def random_crop(img, area_ratio, hw_vari):
 7 |     h, w = img.shape[:2]
 8 |     hw_delta = np.random.uniform(-hw_vari, hw_vari)
 9 |     hw_mult = 1 + hw_delta
10 |     w_crop = int(round(w*np.sqrt(area_ratio*hw_mult)))
11 |     if w_crop > w - 2:
12 |         w_crop = w - 2
13 |     h_crop = int(round(h*np.sqrt(area_ratio/hw_mult)))
14 |     if h_crop > h - 2:
15 |         h_crop = h - 2
16 |     x0 = np.random.randint(0, w-w_crop-1)
17 |     y0 = np.random.randint(0, h-h_crop-1)
18 |     return crop_image(img, x0, y0, w_crop, h_crop)
19 | 
20 | def rotate_image(img, angle, crop):
21 |     h, w = img.shape[:2]
22 |     angle %= 360
23 |     M_rotate = cv2.getRotationMatrix2D((w/2, h/2), angle, 1)
24 |     img_rotated = cv2.warpAffine(img, M_rotate, (w, h))
25 | 
26 |     if crop:
27 |         angle_crop = angle % 180
28 |         if angle_crop > 90:
29 |             angle_crop = 180 - angle_crop
30 |         theta = angle_crop * np.pi / 180.0
31 |         hw_ratio = float(h) / float(w)
32 |         tan_theta = np.tan(theta)
33 |         numerator = np.cos(theta) + np.sin(theta) * tan_theta
34 |         r = hw_ratio if h > w else 1 / hw_ratio
35 |         denominator = r * tan_theta + 1
36 |         crop_mult = numerator / denominator
37 |         w_crop = int(round(crop_mult*w))
38 |         h_crop = int(round(crop_mult*h))
39 |         x0 = int((w-w_crop)/2)
40 |         y0 = int((h-h_crop)/2)
41 | 
42 |         img_rotated = crop_image(img_rotated, x0, y0, w_crop, h_crop)
43 | 
44 |     return img_rotated
45 | 
46 | def random_rotate(img, angle_vari, p_crop):
47 |     angle = np.random.uniform(-angle_vari, angle_vari)
48 |     crop = False if np.random.random() > p_crop else True
49 |     return rotate_image(img, angle, crop)
50 | 
51 | def hsv_transform(img, hue_delta, sat_mult, val_mult):
52 |     img_hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV).astype(np.float)
53 |     img_hsv[:, :, 0] = (img_hsv[:, :, 0] + hue_delta) % 180
54 |     img_hsv[:, :, 1] *= sat_mult
55 |     img_hsv[:, :, 2] *= val_mult
56 |     img_hsv[img_hsv > 255] = 255
57 |     return cv2.cvtColor(np.round(img_hsv).astype(np.uint8), cv2.COLOR_HSV2BGR)
58 | 
59 | def random_hsv_transform(img, hue_vari, sat_vari, val_vari):
60 |     hue_delta = np.random.randint(-hue_vari, hue_vari)
61 |     sat_mult = 1 + np.random.uniform(-sat_vari, sat_vari)
62 |     val_mult = 1 + np.random.uniform(-val_vari, val_vari)
63 |     return hsv_transform(img, hue_delta, sat_mult, val_mult)
64 | 
65 | def gamma_transform(img, gamma):
66 |     gamma_table = [np.power(x / 255.0, gamma) * 255.0 for x in range(256)]
67 |     gamma_table = np.round(np.array(gamma_table)).astype(np.uint8)
68 |     return cv2.LUT(img, gamma_table)
69 | 
70 | def random_gamma_transform(img, gamma_vari):
71 |     log_gamma_vari = np.log(gamma_vari)
72 |     alpha = np.random.uniform(-log_gamma_vari, log_gamma_vari)
73 |     gamma = np.exp(alpha)
74 |     return gamma_transform(img, gamma)
75 | 
76 | 
77 | 


--------------------------------------------------------------------------------
/chap6/data_augmentation/run_augmentation.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import argparse
  3 | import random
  4 | import math
  5 | from multiprocessing import Process, cpu_count
  6 | 
  7 | import cv2
  8 | 
  9 | import image_augmentation as ia
 10 | 
 11 | def parse_args():
 12 |     parser = argparse.ArgumentParser(
 13 |         description='A Simple Image Data Augmentation Tool',
 14 |         formatter_class=argparse.ArgumentDefaultsHelpFormatter)
 15 | 
 16 |     parser.add_argument('input_dir',
 17 |                         help='Directory containing images')
 18 |     parser.add_argument('output_dir',
 19 |                         help='Directory for augmented images')
 20 |     parser.add_argument('num',
 21 |                         help='Number of images to be augmented',
 22 |                         type=int)
 23 | 
 24 |     parser.add_argument('--num_procs',
 25 |                         help='Number of processes for paralleled augmentation',
 26 |                         type=int, default=cpu_count())
 27 | 
 28 |     parser.add_argument('--p_mirror',
 29 |                         help='Ratio to mirror an image',
 30 |                         type=float, default=0.5)
 31 | 
 32 |     parser.add_argument('--p_crop',
 33 |                         help='Ratio to randomly crop an image',
 34 |                         type=float, default=1.0)
 35 |     parser.add_argument('--crop_size',
 36 |                         help='The ratio of cropped image size to original image size, in area',
 37 |                         type=float, default=0.8)
 38 |     parser.add_argument('--crop_hw_vari',
 39 |                         help='Variation of h/w ratio',
 40 |                         type=float, default=0.1)
 41 | 
 42 |     parser.add_argument('--p_rotate',
 43 |                         help='Ratio to randomly rotate an image',
 44 |                         type=float, default=1.0)
 45 |     parser.add_argument('--p_rotate_crop',
 46 |                         help='Ratio to crop out the empty part in a rotated image',
 47 |                         type=float, default=1.0)
 48 |     parser.add_argument('--rotate_angle_vari',
 49 |                         help='Variation range of rotate angle',
 50 |                         type=float, default=10.0)
 51 | 
 52 |     parser.add_argument('--p_hsv',
 53 |                         help='Ratio to randomly change gamma of an image',
 54 |                         type=float, default=1.0)
 55 |     parser.add_argument('--hue_vari',
 56 |                         help='Variation of hue',
 57 |                         type=int, default=10)
 58 |     parser.add_argument('--sat_vari',
 59 |                         help='Variation of saturation',
 60 |                         type=float, default=0.1)
 61 |     parser.add_argument('--val_vari',
 62 |                         help='Variation of value',
 63 |                         type=float, default=0.1)
 64 | 
 65 |     parser.add_argument('--p_gamma',
 66 |                         help='Ratio to randomly change gamma of an image',
 67 |                         type=float, default=1.0)
 68 |     parser.add_argument('--gamma_vari',
 69 |                         help='Variation of gamma',
 70 |                         type=float, default=2.0)
 71 | 
 72 |     args = parser.parse_args()
 73 |     args.input_dir = args.input_dir.rstrip('/')
 74 |     args.output_dir = args.output_dir.rstrip('/')
 75 | 
 76 |     return args
 77 | 
 78 | def generate_image_list(args):
 79 |     filenames = os.listdir(args.input_dir)
 80 |     num_imgs = len(filenames)
 81 | 
 82 |     num_ave_aug = int(math.floor(args.num/num_imgs))
 83 |     rem = args.num - num_ave_aug*num_imgs
 84 |     lucky_seq = [True]*rem + [False]*(num_imgs-rem)
 85 |     random.shuffle(lucky_seq)
 86 | 
 87 |     img_list = [
 88 |         (os.sep.join([args.input_dir, filename]), num_ave_aug+1 if lucky else num_ave_aug)
 89 |         for filename, lucky in zip(filenames, lucky_seq)
 90 |     ]
 91 | 
 92 |     random.shuffle(img_list)  # in case the file size are not uniformly distributed
 93 | 
 94 |     length = float(num_imgs) / float(args.num_procs)
 95 |     indices = [int(round(i * length)) for i in range(args.num_procs + 1)]
 96 |     return [img_list[indices[i]:indices[i + 1]] for i in range(args.num_procs)]
 97 | 
 98 | def augment_images(filelist, args):
 99 |     for filepath, n in filelist:
100 |         img = cv2.imread(filepath)
101 |         filename = filepath.split(os.sep)[-1]
102 |         dot_pos = filename.rfind('.')
103 |         imgname = filename[:dot_pos]
104 |         ext = filename[dot_pos:]
105 | 
106 |         print('Augmenting {} ...'.format(filename))
107 |         for i in range(n):
108 |             img_varied = img.copy()
109 |             varied_imgname = '{}_{:0>3d}_'.format(imgname, i)
110 |             if random.random() < args.p_mirror:
111 |                 img_varied = cv2.flip(img_varied, 1)
112 |                 varied_imgname += 'm'
113 |             if random.random() < args.p_crop:
114 |                 img_varied = ia.random_crop(
115 |                     img_varied,
116 |                     args.crop_size,
117 |                     args.crop_hw_vari)
118 |                 varied_imgname += 'c'
119 |             if random.random() < args.p_rotate:
120 |                 img_varied = ia.random_rotate(
121 |                     img_varied,
122 |                     args.rotate_angle_vari,
123 |                     args.p_rotate_crop)
124 |                 varied_imgname += 'r'
125 |             if random.random() < args.p_hsv:
126 |                 img_varied = ia.random_hsv_transform(
127 |                     img_varied,
128 |                     args.hue_vari,
129 |                     args.sat_vari,
130 |                     args.val_vari)
131 |                 varied_imgname += 'h'
132 |             if random.random() < args.p_gamma:
133 |                 img_varied = ia.random_gamma_transform(
134 |                     img_varied,
135 |                     args.gamma_vari)
136 |                 varied_imgname += 'g'
137 |             output_filepath = os.sep.join([
138 |                 args.output_dir,
139 |                 '{}{}'.format(varied_imgname, ext)])
140 |             cv2.imwrite(output_filepath, img_varied)
141 | 
142 | def main():
143 |     args = parse_args()
144 |     params_str = str(args)[10:-1]
145 | 
146 |     if not os.path.exists(args.output_dir):
147 |         os.mkdir(args.output_dir)
148 | 
149 |     print('Starting image data augmentation for {}\n'
150 |           'with\n{}\n'.format(args.input_dir, params_str))
151 | 
152 |     sublists = generate_image_list(args)
153 |     processes = [Process(target=augment_images, args=(x, args, )) for x in sublists]
154 | 
155 |     for p in processes:
156 |         p.start()
157 | 
158 |     for p in processes:
159 |         p.join()
160 | 
161 |     print('\nDone!')
162 | 
163 | if __name__ == '__main__':
164 |     main()
165 | 


--------------------------------------------------------------------------------
/chap6/data_augmentation/run_augmentation_pool_map.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import argparse
  3 | import random
  4 | import math
  5 | from multiprocessing import cpu_count, Pool
  6 | from functools import partial
  7 | 
  8 | import cv2
  9 | 
 10 | import image_augmentation as ia
 11 | 
 12 | def parse_args():
 13 |     parser = argparse.ArgumentParser(
 14 |         description='A Simple Image Data Augmentation Tool',
 15 |         formatter_class=argparse.ArgumentDefaultsHelpFormatter)
 16 | 
 17 |     parser.add_argument('input_dir',
 18 |                         help='Directory containing images')
 19 |     parser.add_argument('output_dir',
 20 |                         help='Directory for augmented images')
 21 |     parser.add_argument('num',
 22 |                         help='Number of images to be augmented',
 23 |                         type=int)
 24 | 
 25 |     parser.add_argument('--num_procs',
 26 |                         help='Number of processes for paralleled augmentation',
 27 |                         type=int, default=cpu_count())
 28 | 
 29 |     parser.add_argument('--p_mirror',
 30 |                         help='Ratio to mirror an image',
 31 |                         type=float, default=0.5)
 32 | 
 33 |     parser.add_argument('--p_crop',
 34 |                         help='Ratio to randomly crop an image',
 35 |                         type=float, default=1.0)
 36 |     parser.add_argument('--crop_size',
 37 |                         help='The ratio of cropped image size to original image size, in area',
 38 |                         type=float, default=0.8)
 39 |     parser.add_argument('--crop_hw_vari',
 40 |                         help='Variation of h/w ratio',
 41 |                         type=float, default=0.1)
 42 | 
 43 |     parser.add_argument('--p_rotate',
 44 |                         help='Ratio to randomly rotate an image',
 45 |                         type=float, default=1.0)
 46 |     parser.add_argument('--p_rotate_crop',
 47 |                         help='Ratio to crop out the empty part in a rotated image',
 48 |                         type=float, default=1.0)
 49 |     parser.add_argument('--rotate_angle_vari',
 50 |                         help='Variation range of rotate angle',
 51 |                         type=float, default=10.0)
 52 | 
 53 |     parser.add_argument('--p_hsv',
 54 |                         help='Ratio to randomly change gamma of an image',
 55 |                         type=float, default=1.0)
 56 |     parser.add_argument('--hue_vari',
 57 |                         help='Variation of hue',
 58 |                         type=int, default=10)
 59 |     parser.add_argument('--sat_vari',
 60 |                         help='Variation of saturation',
 61 |                         type=float, default=0.1)
 62 |     parser.add_argument('--val_vari',
 63 |                         help='Variation of value',
 64 |                         type=float, default=0.1)
 65 | 
 66 |     parser.add_argument('--p_gamma',
 67 |                         help='Ratio to randomly change gamma of an image',
 68 |                         type=float, default=1.0)
 69 |     parser.add_argument('--gamma_vari',
 70 |                         help='Variation of gamma',
 71 |                         type=float, default=2.0)
 72 | 
 73 |     args = parser.parse_args()
 74 |     args.input_dir = args.input_dir.rstrip('/')
 75 |     args.output_dir = args.output_dir.rstrip('/')
 76 | 
 77 |     return args
 78 | 
 79 | def generate_image_list(args):
 80 |     filenames = os.listdir(args.input_dir)
 81 |     num_imgs = len(filenames)
 82 | 
 83 |     num_ave_aug = int(math.floor(args.num/num_imgs))
 84 |     rem = args.num - num_ave_aug*num_imgs
 85 |     lucky_seq = [True]*rem + [False]*(num_imgs-rem)
 86 |     random.shuffle(lucky_seq)
 87 | 
 88 |     img_list = [
 89 |         (os.sep.join([args.input_dir, filename]), num_ave_aug+1 if lucky else num_ave_aug)
 90 |         for filename, lucky in zip(filenames, lucky_seq)
 91 |     ]
 92 | 
 93 |     random.shuffle(img_list)  # in case the file size are not uniformly distributed
 94 |     return img_list
 95 | 
 96 | def augment_image(image_num_pair, args):
 97 |     filepath, n = image_num_pair
 98 |     img = cv2.imread(filepath)
 99 |     filename = filepath.split(os.sep)[-1]
100 |     dot_pos = filename.rfind('.')
101 |     imgname = filename[:dot_pos]
102 |     ext = filename[dot_pos:]
103 | 
104 |     print('Augmenting {} ...'.format(filename))
105 |     for i in range(n):
106 |         img_varied = img.copy()
107 |         varied_imgname = '{}_{:0>3d}_'.format(imgname, i)
108 |         if random.random() < args.p_mirror:
109 |             img_varied = cv2.flip(img_varied, 1)
110 |             varied_imgname += 'm'
111 |         if random.random() < args.p_crop:
112 |             img_varied = ia.random_crop(
113 |                 img_varied,
114 |                 args.crop_size,
115 |                 args.crop_hw_vari)
116 |             varied_imgname += 'c'
117 |         if random.random() < args.p_rotate:
118 |             img_varied = ia.random_rotate(
119 |                 img_varied,
120 |                 args.rotate_angle_vari,
121 |                 args.p_rotate_crop)
122 |             varied_imgname += 'r'
123 |         if random.random() < args.p_hsv:
124 |             img_varied = ia.random_hsv_transform(
125 |                 img_varied,
126 |                 args.hue_vari,
127 |                 args.sat_vari,
128 |                 args.val_vari)
129 |             varied_imgname += 'h'
130 |         if random.random() < args.p_gamma:
131 |             img_varied = ia.random_gamma_transform(
132 |                 img_varied,
133 |                 args.gamma_vari)
134 |             varied_imgname += 'g'
135 |         output_filepath = os.sep.join([
136 |             args.output_dir,
137 |             '{}{}'.format(varied_imgname, ext)])
138 |         cv2.imwrite(output_filepath, img_varied)
139 | 
140 | def main():
141 |     args = parse_args()
142 |     params_str = str(args)[10:-1]
143 | 
144 |     if not os.path.exists(args.output_dir):
145 |         os.mkdir(args.output_dir)
146 | 
147 |     print('Starting image data augmentation for {}\n'
148 |           'with\n{}\n'.format(args.input_dir, params_str))
149 | 
150 |     image_list = generate_image_list(args)
151 |     aug_img = partial(augment_image, args=args)
152 |     pool = Pool(args.num_procs)
153 |     pool.map(aug_img, image_list)
154 | 
155 |     print('\nDone!')
156 | 
157 | if __name__ == '__main__':
158 |     main()
159 | 


--------------------------------------------------------------------------------
/chap7/README.md:
--------------------------------------------------------------------------------
 1 | ## Prepare Data
 2 | 运行 *gen_data.py* 产生随机数据并用pickle导出为文件
 3 | 
 4 | ## MXNet
 5 | 在mxnet文件夹下运行 *simple_mlp.py* 训练模型并进行结果可视化
 6 | 
 7 | ## Caffe
 8 | ### step 1
 9 | 在caffe文件夹下运行 *gen_hdf5.py* 将数据转换为HDF5格式
10 | ### step 2
11 | 运行 *simple_mlp_train.py* 训练模型
12 | ### step 3
13 | 运行 *simple_mlp_test.py* 测试模型及可视化


--------------------------------------------------------------------------------
/chap7/caffe/gen_hdf5.py:
--------------------------------------------------------------------------------
 1 | import pickle
 2 | import numpy as np
 3 | import h5py
 4 | 
 5 | with open('../data.pkl', 'rb') as f:
 6 |     samples, labels = pickle.load(f)
 7 | sample_size = len(labels)
 8 | 
 9 | samples = np.array(samples).reshape((sample_size, 2))
10 | labels = np.array(labels).reshape((sample_size, 1))
11 | 
12 | h5_filename = 'data.h5'
13 | with h5py.File(h5_filename, 'w') as h:
14 |     h.create_dataset('data', data=samples)
15 |     h.create_dataset('label', data=labels)
16 | 
17 | with open('data_h5.txt', 'w') as f:
18 |     f.write(h5_filename)
19 | 


--------------------------------------------------------------------------------
/chap7/caffe/simple_mlp_test.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | import pickle
 3 | import numpy as np
 4 | import matplotlib.pyplot as plt
 5 | from mpl_toolkits.mplot3d import Axes3D
 6 | sys.path.append('/path/to/caffe/python')
 7 | import caffe
 8 | 
 9 | net = caffe.Net('test.prototxt', 'simple_mlp_iter_2000.caffemodel', caffe.TEST)
10 | 
11 | # load original data
12 | with open('../data.pkl', 'rb') as f:
13 |     samples, labels = pickle.load(f)
14 | samples = np.array(samples)
15 | labels = np.array(labels)
16 | 
17 | # Visualize result
18 | X = np.arange(0, 1.05, 0.05)
19 | Y = np.arange(0, 1.05, 0.05)
20 | X, Y = np.meshgrid(X, Y)
21 | 
22 | # Plot the surface of probability
23 | grids = np.array([[X[i][j], Y[i][j]] for i in range(X.shape[0]) for j in range(X.shape[1])])
24 | grid_probs = []
25 | for grid in grids:
26 |     net.blobs['data'].data[...] = grid.reshape((1, 2))[...]
27 |     output = net.forward()
28 |     grid_probs.append(output['prob'][0][1])
29 | 
30 | grid_probs = np.array(grid_probs).reshape(X.shape)
31 | 
32 | fig = plt.figure('Sample Surface')
33 | ax = fig.gca(projection='3d')
34 | 
35 | ax.plot_surface(X, Y, grid_probs, alpha=0.15, color='k', rstride=2, cstride=2, lw=0.5)
36 | 
37 | # Plot the predicted probability of samples
38 | samples0 = samples[labels==0]
39 | samples0_probs = []
40 | for sample in samples0:
41 |     net.blobs['data'].data[...] = sample.reshape((1, 2))[...]
42 |     output = net.forward()
43 |     samples0_probs.append(output['prob'][0][1])
44 | 
45 | samples1 = samples[labels==1]
46 | samples1_probs = []
47 | for sample in samples1:
48 |     net.blobs['data'].data[...] = sample.reshape((1, 2))[...]
49 |     output = net.forward()
50 |     samples1_probs.append(output['prob'][0][1])
51 | 
52 | ax.scatter(samples0[:, 0], samples0[:, 1], samples0_probs, c='b', marker='^', s=50)
53 | ax.scatter(samples1[:, 0], samples1[:, 1], samples1_probs, c='r', marker='o', s=50)
54 | 
55 | plt.show()
56 | 


--------------------------------------------------------------------------------
/chap7/caffe/simple_mlp_train.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | import numpy as np
 3 | 
 4 | sys.path.append('/path/to/caffe/python')
 5 | import caffe
 6 | 
 7 | solver = caffe.SGDSolver('solver.prototxt')
 8 | solver.solve()
 9 | 
10 | net = solver.net
11 | net.blobs['data'] = np.array([[0.5, 0.5]])
12 | output = net.forward()
13 | print(output)
14 | 


--------------------------------------------------------------------------------
/chap7/caffe/solver.prototxt:
--------------------------------------------------------------------------------
1 | net: "train.prototxt"
2 | base_lr: 0.15
3 | lr_policy: "fixed"
4 | display: 100
5 | max_iter: 2000
6 | momentum: 0.95
7 | snapshot_prefix: "simple_mlp"
8 | solver_mode: CPU


--------------------------------------------------------------------------------
/chap7/caffe/test.prototxt:
--------------------------------------------------------------------------------
 1 | name: "SimpleMLP"
 2 | input: "data"
 3 | input_shape {
 4 |   dim: 1
 5 |   dim: 2
 6 | }
 7 | layer {
 8 |   name: "fc1"
 9 |   type: "InnerProduct"
10 |   bottom: "data"
11 |   top: "fc1"
12 |   inner_product_param {
13 |     num_output: 2
14 |   }
15 | }
16 | layer {
17 |   name: "sigmoid1"
18 |   type: "Sigmoid"
19 |   bottom: "fc1"
20 |   top: "sigmoid1"
21 | }
22 | layer {
23 |   name: "fc2"
24 |   type: "InnerProduct"
25 |   bottom: "sigmoid1"
26 |   top: "fc2"
27 |   inner_product_param {
28 |     num_output: 2
29 |   }
30 | }
31 | layer {
32 |   name: "softmax"
33 |   type: "Softmax"
34 |   bottom: "fc2"
35 |   top: "prob"
36 | }
37 | 


--------------------------------------------------------------------------------
/chap7/caffe/train.prototxt:
--------------------------------------------------------------------------------
 1 | name: "SimpleMLP"
 2 | layer {
 3 |   name: "data"
 4 |   type: "HDF5Data"
 5 |   top: "data"
 6 |   top: "label"
 7 |   include {
 8 |     phase: TRAIN
 9 |   }
10 |   hdf5_data_param {
11 |     source: "data_h5.txt"
12 |     batch_size: 41
13 |   }
14 | }
15 | layer {
16 |   name: "fc1"
17 |   type: "InnerProduct"
18 |   bottom: "data"
19 |   top: "fc1"
20 |   inner_product_param {
21 |     num_output: 2
22 |     weight_filler {
23 |       type: "uniform"
24 |     }
25 |   }
26 | }
27 | layer {
28 |   name: "sigmoid1"
29 |   type: "Sigmoid"
30 |   bottom: "fc1"
31 |   top: "sigmoid1"
32 | }
33 | layer {
34 |   name: "fc2"
35 |   type: "InnerProduct"
36 |   bottom: "sigmoid1"
37 |   top: "fc2"
38 |   inner_product_param {
39 |     num_output: 2
40 |     weight_filler {
41 |       type: "uniform"
42 |     }
43 |   }
44 | }
45 | layer {
46 |   name: "loss"
47 |   type: "SoftmaxWithLoss"
48 |   bottom: "fc2"
49 |   bottom: "label"
50 |   top: "loss"
51 | }
52 | 


--------------------------------------------------------------------------------
/chap7/gen_data.py:
--------------------------------------------------------------------------------
 1 | import pickle
 2 | import numpy as np
 3 | import matplotlib.pyplot as plt
 4 | 
 5 | 
 6 | def cos_curve(x):
 7 |     return 0.25*np.sin(2*x*np.pi+0.5*np.pi) + 0.5
 8 | 
 9 | np.random.seed(123)
10 | samples = []
11 | labels = []
12 | 
13 | sample_density = 50
14 | for i in range(sample_density):
15 |     x1, x2 = np.random.random(2)
16 |     bound = cos_curve(x1)
17 |     if bound - 0.1 < x2 <= bound + 0.1:
18 |         continue
19 |     else:
20 |         samples.append((x1, x2))
21 |         if x2 > bound:
22 |             labels.append(1)
23 |         else:
24 |             labels.append(0)
25 | 
26 | with open('data.pkl', 'wb') as f:
27 |     pickle.dump((samples, labels), f)
28 | 
29 | for i, sample in enumerate(samples):
30 |     plt.plot(sample[0], sample[1],
31 |              'o' if labels[i] else '^',
32 |              mec='r' if labels[i] else 'b',
33 |              mfc='none',
34 |              markersize=10)
35 | 
36 | x1 = np.linspace(0, 1)
37 | plt.plot(x1, cos_curve(x1), 'k--')
38 | plt.show()
39 | 


--------------------------------------------------------------------------------
/chap7/mxnet/simple_mlp.py:
--------------------------------------------------------------------------------
 1 | import pickle
 2 | import logging
 3 | import numpy as np
 4 | import mxnet as mx
 5 | import matplotlib.pyplot as plt
 6 | from mpl_toolkits.mplot3d import Axes3D
 7 | 
 8 | # Define the network
 9 | data = mx.sym.Variable('data')
10 | fc1 = mx.sym.FullyConnected(data=data, name='fc1', num_hidden=2)
11 | sigmoid1 = mx.sym.Activation(data=fc1, name='sigmoid1', act_type='sigmoid')
12 | fc2 = mx.sym.FullyConnected(data=sigmoid1, name='fc2', num_hidden=2)
13 | mlp = mx.sym.SoftmaxOutput(data=fc2, name='softmax')
14 | 
15 | shape = {'data': (2,)}
16 | mlp_dot = mx.viz.plot_network(symbol=mlp, shape=shape)
17 | mlp_dot.render('simple_mlp.gv', view=True)
18 | 
19 | # Load data & train the model
20 | with open('../data.pkl', 'rb') as f:
21 |     samples, labels = pickle.load(f)
22 | 
23 | logging.getLogger().setLevel(logging.DEBUG)
24 | 
25 | batch_size = len(labels)
26 | samples = np.array(samples)
27 | labels = np.array(labels)
28 | 
29 | train_iter = mx.io.NDArrayIter(samples, labels, batch_size)
30 | 
31 | model = mx.model.FeedForward.create(
32 |     symbol=mlp,
33 |     X=train_iter,
34 |     num_epoch=1000,
35 |     learning_rate=0.1,
36 |     momentum=0.99)
37 | 
38 | '''
39 | # Alternative interface to train the model
40 | model = mx.model.FeedForward(
41 |     symbol=mlp,
42 |     num_epoch=1000,
43 |     learning_rate=0.1,
44 |     momentum=0.99)
45 | model.fit(X=train_iter)
46 | '''
47 | 
48 | print(model.predict(mx.nd.array([[0.5, 0.5]])))
49 | 
50 | # Visualize result
51 | X = np.arange(0, 1.05, 0.05)
52 | Y = np.arange(0, 1.05, 0.05)
53 | X, Y = np.meshgrid(X, Y)
54 | 
55 | grids = mx.nd.array([[X[i][j], Y[i][j]] for i in range(X.shape[0]) for j in range(X.shape[1])])
56 | grid_probs = model.predict(grids)[:, 1].reshape(X.shape)
57 | 
58 | fig = plt.figure('Sample Surface')
59 | ax = fig.gca(projection='3d')
60 | 
61 | ax.plot_surface(X, Y, grid_probs, alpha=0.15, color='k', rstride=2, cstride=2, lw=0.5)
62 | samples0 = samples[labels==0]
63 | samples0_probs = model.predict(samples0)[:, 1]
64 | samples1 = samples[labels==1]
65 | samples1_probs = model.predict(samples1)[:, 1]
66 | 
67 | ax.scatter(samples0[:, 0], samples0[:, 1], samples0_probs, c='b', marker='^', s=50)
68 | ax.scatter(samples1[:, 0], samples1[:, 1], samples1_probs, c='r', marker='o', s=50)
69 | 
70 | plt.show()
71 | 


--------------------------------------------------------------------------------
/chap8/README.md:
--------------------------------------------------------------------------------
 1 | ## Prepare Data
 2 | ### step 1
 3 | > ./download_mnist.sh
 4 | 
 5 | 下载mnist.pkl.gz，然后运行 *convert_mnist.py* 将pickle格式的数据转换为图片
 6 | 
 7 | 如果原链接无法下载可以到下面云盘地址下载：
 8 | http://pan.baidu.com/s/1bHmm7s
 9 | 
10 | ### step 2
11 | > python gen_caffe_imglist.py mnist/train train.txt
12 | 
13 | > python gen_caffe_imglist.py mnist/val val.txt
14 | 
15 | > python gen_caffe_imglist.py mnist/test test.txt
16 | 
17 | 得到图片的列表，然后运行： 
18 | > /path/to/caffe/built/tools/convert_imageset ./ train.txt train_lmdb --gray --shuffle
19 | 
20 | > /path/to/caffe/built/tools/convert_imageset ./ val.txt val_lmdb --gray --shuffle
21 | 
22 | > /path/to/caffe/built/tools/convert_imageset ./ test.txt test_lmdb --gray --shuffle
23 | 
24 | 
25 | 产生lmdb
26 | 
27 | ### step 3
28 | > python gen_mxnet_imglist.py mnist/train train.lst
29 | 
30 | > python gen_mxnet_imglist.py mnist/val val.lst
31 | 
32 | > python gen_mxnet_imglist.py mnist/test test.lst
33 | 
34 | 用于产生图片的列表，然后运行
35 | 
36 | > /path/to/mxnet/bin/im2rec train.lst ./ train.rec color=0
37 | 
38 | > /path/to/mxnet/bin/im2rec val.lst ./ val.rec color=0
39 | 
40 | > /path/to/mxnet/bin/im2rec test.lst ./ test.rec color=0
41 | 
42 | 产生ImageRecordio文件
43 | 
44 | 
45 | ## MXNet
46 | 
47 | 运行 *train_lenet5.py* 训练模型
48 | 
49 | 运行 *score_model.py* 在测试集上评估模型
50 | 
51 | 运行 *benchmark_model.py* 测试模型性能
52 | 
53 | 运行 *recognize_digit.py* 跟图片路径作为参数用于手写数字图片识别
54 | 
55 | ## Caffe
56 | *lenet_train_val.prototxt* & *lenet_train_val_aug.prototxt* 分别是用原始数据和增加后数据训练模型的网络结构和数据定义文件
57 | 
58 | *lenet_solver.prototxt* & *lenet_solver_aug.prototxt* 分别是训练原始数据和增加后数据的solver文件
59 | 
60 | *lenet_test.prototxt* 是用于在测试数据上测试模型的网络结构和数据源定义文件
61 | 
62 | *lenet.prototxt* 是用于部署的网络结构定义文件
63 | 
64 | 运行 *recognize_digit.py* 接测试文件的列表用来演示手写数字图片识别


--------------------------------------------------------------------------------
/chap8/caffe/lenet.prototxt:
--------------------------------------------------------------------------------
  1 | name: "LeNet"
  2 | layer {
  3 |   name: "data"
  4 |   type: "Input"
  5 |   top: "data"
  6 |   input_param { 
  7 |     shape: { 
  8 |       dim: 64
  9 |       dim: 1 
 10 |       dim: 28 
 11 |       dim: 28 
 12 |     } 
 13 |   }
 14 | }
 15 | layer {
 16 |   name: "conv1"
 17 |   type: "Convolution"
 18 |   bottom: "data"
 19 |   top: "conv1"
 20 |   param {
 21 |     lr_mult: 1
 22 |   }
 23 |   param {
 24 |     lr_mult: 2
 25 |   }
 26 |   convolution_param {
 27 |     num_output: 20
 28 |     kernel_size: 5
 29 |     stride: 1
 30 |     weight_filler {
 31 |       type: "xavier"
 32 |     }
 33 |     bias_filler {
 34 |       type: "constant"
 35 |     }
 36 |   }
 37 | }
 38 | layer {
 39 |   name: "pool1"
 40 |   type: "Pooling"
 41 |   bottom: "conv1"
 42 |   top: "pool1"
 43 |   pooling_param {
 44 |     pool: MAX
 45 |     kernel_size: 2
 46 |     stride: 2
 47 |   }
 48 | }
 49 | layer {
 50 |   name: "conv2"
 51 |   type: "Convolution"
 52 |   bottom: "pool1"
 53 |   top: "conv2"
 54 |   param {
 55 |     lr_mult: 1
 56 |   }
 57 |   param {
 58 |     lr_mult: 2
 59 |   }
 60 |   convolution_param {
 61 |     num_output: 50
 62 |     kernel_size: 5
 63 |     stride: 1
 64 |     weight_filler {
 65 |       type: "xavier"
 66 |     }
 67 |     bias_filler {
 68 |       type: "constant"
 69 |     }
 70 |   }
 71 | }
 72 | layer {
 73 |   name: "pool2"
 74 |   type: "Pooling"
 75 |   bottom: "conv2"
 76 |   top: "pool2"
 77 |   pooling_param {
 78 |     pool: MAX
 79 |     kernel_size: 2
 80 |     stride: 2
 81 |   }
 82 | }
 83 | layer {
 84 |   name: "ip1"
 85 |   type: "InnerProduct"
 86 |   bottom: "pool2"
 87 |   top: "ip1"
 88 |   param {
 89 |     lr_mult: 1
 90 |   }
 91 |   param {
 92 |     lr_mult: 2
 93 |   }
 94 |   inner_product_param {
 95 |     num_output: 500
 96 |     weight_filler {
 97 |       type: "xavier"
 98 |     }
 99 |     bias_filler {
100 |       type: "constant"
101 |     }
102 |   }
103 | }
104 | layer {
105 |   name: "relu1"
106 |   type: "ReLU"
107 |   bottom: "ip1"
108 |   top: "ip1"
109 | }
110 | layer {
111 |   name: "ip2"
112 |   type: "InnerProduct"
113 |   bottom: "ip1"
114 |   top: "ip2"
115 |   param {
116 |     lr_mult: 1
117 |   }
118 |   param {
119 |     lr_mult: 2
120 |   }
121 |   inner_product_param {
122 |     num_output: 10
123 |     weight_filler {
124 |       type: "xavier"
125 |     }
126 |     bias_filler {
127 |       type: "constant"
128 |     }
129 |   }
130 | }
131 | layer {
132 |   name: "prob"
133 |   type: "Softmax"
134 |   bottom: "ip2"
135 |   top: "prob"
136 | }
137 | 
138 | 


--------------------------------------------------------------------------------
/chap8/caffe/lenet_solver.prototxt:
--------------------------------------------------------------------------------
 1 | # The train/validate net protocol buffer definition
 2 | net: "lenet_train_val.prototxt"
 3 | # test_iter specifies how many forward passes the test should carry out.
 4 | # In the case of MNIST, we have test batch size 100 and 100 test iterations,
 5 | # covering the full 10,000 testing images.
 6 | test_iter: 100
 7 | # Carry out testing every 500 training iterations.
 8 | test_interval: 500
 9 | # The base learning rate, momentum and the weight decay of the network.
10 | base_lr: 0.01
11 | momentum: 0.9
12 | weight_decay: 0.0005
13 | # The learning rate policy
14 | lr_policy: "inv"
15 | gamma: 0.0001
16 | power: 0.75
17 | # Display every 100 iterations
18 | display: 100
19 | # The maximum number of iterations
20 | max_iter: 36000
21 | # snapshot intermediate results
22 | snapshot: 5000
23 | snapshot_prefix: "mnist_lenet"
24 | # solver mode: CPU or GPU
25 | solver_mode: GPU
26 | 


--------------------------------------------------------------------------------
/chap8/caffe/lenet_solver_aug.prototxt:
--------------------------------------------------------------------------------
 1 | # The train/validate net protocol buffer definition
 2 | net: "lenet_train_val_aug.prototxt"
 3 | # test_iter specifies how many forward passes the test should carry out.
 4 | # In the case of MNIST, we have test batch size 100 and 100 test iterations,
 5 | # covering the full 10,000 testing images.
 6 | test_iter: 100
 7 | # Carry out testing every 500 training iterations.
 8 | test_interval: 500
 9 | # The base learning rate, momentum and the weight decay of the network.
10 | base_lr: 0.01
11 | momentum: 0.9
12 | weight_decay: 0.0005
13 | # The learning rate policy
14 | lr_policy: "inv"
15 | gamma: 0.0001
16 | power: 0.75
17 | # Display every 100 iterations
18 | display: 100
19 | # The maximum number of iterations
20 | max_iter: 36000
21 | #max_iter: 120000
22 | # snapshot intermediate results
23 | snapshot: 5000
24 | snapshot_prefix: "mnist_aug_lenet"
25 | # solver mode: CPU or GPU
26 | solver_mode: GPU
27 | 


--------------------------------------------------------------------------------
/chap8/caffe/lenet_test.prototxt:
--------------------------------------------------------------------------------
  1 | name: "LeNet Test"
  2 | layer {
  3 |   name: "mnist"
  4 |   type: "Data"
  5 |   top: "data"
  6 |   top: "label"
  7 |   include {
  8 |     phase: TEST
  9 |   }
 10 |   transform_param {
 11 |     mean_value: 128
 12 |     scale: 0.00390625
 13 |   }
 14 |   data_param {
 15 |     source: "../data/test_lmdb"
 16 |     batch_size: 100
 17 |     backend: LMDB
 18 |   }
 19 | }
 20 | layer {
 21 |   name: "conv1"
 22 |   type: "Convolution"
 23 |   bottom: "data"
 24 |   top: "conv1"
 25 |   param {
 26 |     lr_mult: 1
 27 |   }
 28 |   param {
 29 |     lr_mult: 2
 30 |   }
 31 |   convolution_param {
 32 |     num_output: 20
 33 |     kernel_size: 5
 34 |     stride: 1
 35 |     weight_filler {
 36 |       type: "xavier"
 37 |     }
 38 |     bias_filler {
 39 |       type: "constant"
 40 |     }
 41 |   }
 42 | }
 43 | layer {
 44 |   name: "pool1"
 45 |   type: "Pooling"
 46 |   bottom: "conv1"
 47 |   top: "pool1"
 48 |   pooling_param {
 49 |     pool: MAX
 50 |     kernel_size: 2
 51 |     stride: 2
 52 |   }
 53 | }
 54 | layer {
 55 |   name: "conv2"
 56 |   type: "Convolution"
 57 |   bottom: "pool1"
 58 |   top: "conv2"
 59 |   param {
 60 |     lr_mult: 1
 61 |   }
 62 |   param {
 63 |     lr_mult: 2
 64 |   }
 65 |   convolution_param {
 66 |     num_output: 50
 67 |     kernel_size: 5
 68 |     stride: 1
 69 |     weight_filler {
 70 |       type: "xavier"
 71 |     }
 72 |     bias_filler {
 73 |       type: "constant"
 74 |     }
 75 |   }
 76 | }
 77 | layer {
 78 |   name: "pool2"
 79 |   type: "Pooling"
 80 |   bottom: "conv2"
 81 |   top: "pool2"
 82 |   pooling_param {
 83 |     pool: MAX
 84 |     kernel_size: 2
 85 |     stride: 2
 86 |   }
 87 | }
 88 | layer {
 89 |   name: "ip1"
 90 |   type: "InnerProduct"
 91 |   bottom: "pool2"
 92 |   top: "ip1"
 93 |   param {
 94 |     lr_mult: 1
 95 |   }
 96 |   param {
 97 |     lr_mult: 2
 98 |   }
 99 |   inner_product_param {
100 |     num_output: 500
101 |     weight_filler {
102 |       type: "xavier"
103 |     }
104 |     bias_filler {
105 |       type: "constant"
106 |     }
107 |   }
108 | }
109 | layer {
110 |   name: "relu1"
111 |   type: "ReLU"
112 |   bottom: "ip1"
113 |   top: "ip1"
114 | }
115 | layer {
116 |   name: "ip2"
117 |   type: "InnerProduct"
118 |   bottom: "ip1"
119 |   top: "ip2"
120 |   param {
121 |     lr_mult: 1
122 |   }
123 |   param {
124 |     lr_mult: 2
125 |   }
126 |   inner_product_param {
127 |     num_output: 10
128 |     weight_filler {
129 |       type: "xavier"
130 |     }
131 |     bias_filler {
132 |       type: "constant"
133 |     }
134 |   }
135 | }
136 | layer {
137 |   name: "accuracy"
138 |   type: "Accuracy"
139 |   bottom: "ip2"
140 |   bottom: "label"
141 |   top: "accuracy"
142 |   include {
143 |     phase: TEST
144 |   }
145 | }
146 | 


--------------------------------------------------------------------------------
/chap8/caffe/lenet_train_val.prototxt:
--------------------------------------------------------------------------------
  1 | name: "LeNet"
  2 | layer {
  3 |   name: "mnist"
  4 |   type: "Data"
  5 |   top: "data"
  6 |   top: "label"
  7 |   include {
  8 |     phase: TRAIN
  9 |   }
 10 |   transform_param {
 11 |     mean_value: 128
 12 |     scale: 0.00390625
 13 |   }
 14 |   data_param {
 15 |     source: "../data/train_lmdb"
 16 |     batch_size: 50
 17 |     backend: LMDB
 18 |   }
 19 | }
 20 | layer {
 21 |   name: "mnist"
 22 |   type: "Data"
 23 |   top: "data"
 24 |   top: "label"
 25 |   include {
 26 |     phase: TEST
 27 |   }
 28 |   transform_param {
 29 |     mean_value: 128
 30 |     scale: 0.00390625
 31 |   }
 32 |   data_param {
 33 |     source: "../data/val_lmdb"
 34 |     batch_size: 100
 35 |     backend: LMDB
 36 |   }
 37 | }
 38 | layer {
 39 |   name: "conv1"
 40 |   type: "Convolution"
 41 |   bottom: "data"
 42 |   top: "conv1"
 43 |   param {
 44 |     lr_mult: 1
 45 |   }
 46 |   param {
 47 |     lr_mult: 2
 48 |   }
 49 |   convolution_param {
 50 |     num_output: 20
 51 |     kernel_size: 5
 52 |     stride: 1
 53 |     weight_filler {
 54 |       type: "xavier"
 55 |     }
 56 |     bias_filler {
 57 |       type: "constant"
 58 |     }
 59 |   }
 60 | }
 61 | layer {
 62 |   name: "pool1"
 63 |   type: "Pooling"
 64 |   bottom: "conv1"
 65 |   top: "pool1"
 66 |   pooling_param {
 67 |     pool: MAX
 68 |     kernel_size: 2
 69 |     stride: 2
 70 |   }
 71 | }
 72 | layer {
 73 |   name: "conv2"
 74 |   type: "Convolution"
 75 |   bottom: "pool1"
 76 |   top: "conv2"
 77 |   param {
 78 |     lr_mult: 1
 79 |   }
 80 |   param {
 81 |     lr_mult: 2
 82 |   }
 83 |   convolution_param {
 84 |     num_output: 50
 85 |     kernel_size: 5
 86 |     stride: 1
 87 |     weight_filler {
 88 |       type: "xavier"
 89 |     }
 90 |     bias_filler {
 91 |       type: "constant"
 92 |     }
 93 |   }
 94 | }
 95 | layer {
 96 |   name: "pool2"
 97 |   type: "Pooling"
 98 |   bottom: "conv2"
 99 |   top: "pool2"
100 |   pooling_param {
101 |     pool: MAX
102 |     kernel_size: 2
103 |     stride: 2
104 |   }
105 | }
106 | layer {
107 |   name: "ip1"
108 |   type: "InnerProduct"
109 |   bottom: "pool2"
110 |   top: "ip1"
111 |   param {
112 |     lr_mult: 1
113 |   }
114 |   param {
115 |     lr_mult: 2
116 |   }
117 |   inner_product_param {
118 |     num_output: 500
119 |     weight_filler {
120 |       type: "xavier"
121 |     }
122 |     bias_filler {
123 |       type: "constant"
124 |     }
125 |   }
126 | }
127 | layer {
128 |   name: "relu1"
129 |   type: "ReLU"
130 |   bottom: "ip1"
131 |   top: "ip1"
132 | }
133 | layer {
134 |   name: "ip2"
135 |   type: "InnerProduct"
136 |   bottom: "ip1"
137 |   top: "ip2"
138 |   param {
139 |     lr_mult: 1
140 |   }
141 |   param {
142 |     lr_mult: 2
143 |   }
144 |   inner_product_param {
145 |     num_output: 10
146 |     weight_filler {
147 |       type: "xavier"
148 |     }
149 |     bias_filler {
150 |       type: "constant"
151 |     }
152 |   }
153 | }
154 | layer {
155 |   name: "accuracy"
156 |   type: "Accuracy"
157 |   bottom: "ip2"
158 |   bottom: "label"
159 |   top: "accuracy"
160 |   include {
161 |     phase: TEST
162 |   }
163 | }
164 | layer {
165 |   name: "loss"
166 |   type: "SoftmaxWithLoss"
167 |   bottom: "ip2"
168 |   bottom: "label"
169 |   top: "loss"
170 | }
171 | 


--------------------------------------------------------------------------------
/chap8/caffe/lenet_train_val_aug.prototxt:
--------------------------------------------------------------------------------
  1 | name: "LeNet"
  2 | layer {
  3 |   name: "mnist"
  4 |   type: "Data"
  5 |   top: "data"
  6 |   top: "label"
  7 |   include {
  8 |     phase: TRAIN
  9 |   }
 10 |   transform_param {
 11 |     mean_value: 128
 12 |     scale: 0.00390625
 13 |   }
 14 |   data_param {
 15 |     source: "../data/train_aug_lmdb"
 16 |     batch_size: 50
 17 |     backend: LMDB
 18 |   }
 19 | }
 20 | layer {
 21 |   name: "mnist"
 22 |   type: "Data"
 23 |   top: "data"
 24 |   top: "label"
 25 |   include {
 26 |     phase: TEST
 27 |   }
 28 |   transform_param {
 29 |     mean_value: 128
 30 |     scale: 0.00390625
 31 |   }
 32 |   data_param {
 33 |     source: "../data/val_lmdb"
 34 |     batch_size: 100
 35 |     backend: LMDB
 36 |   }
 37 | }
 38 | layer {
 39 |   name: "conv1"
 40 |   type: "Convolution"
 41 |   bottom: "data"
 42 |   top: "conv1"
 43 |   param {
 44 |     lr_mult: 1
 45 |   }
 46 |   param {
 47 |     lr_mult: 2
 48 |   }
 49 |   convolution_param {
 50 |     num_output: 20
 51 |     kernel_size: 5
 52 |     stride: 1
 53 |     weight_filler {
 54 |       type: "xavier"
 55 |     }
 56 |     bias_filler {
 57 |       type: "constant"
 58 |     }
 59 |   }
 60 | }
 61 | layer {
 62 |   name: "pool1"
 63 |   type: "Pooling"
 64 |   bottom: "conv1"
 65 |   top: "pool1"
 66 |   pooling_param {
 67 |     pool: MAX
 68 |     kernel_size: 2
 69 |     stride: 2
 70 |   }
 71 | }
 72 | layer {
 73 |   name: "conv2"
 74 |   type: "Convolution"
 75 |   bottom: "pool1"
 76 |   top: "conv2"
 77 |   param {
 78 |     lr_mult: 1
 79 |   }
 80 |   param {
 81 |     lr_mult: 2
 82 |   }
 83 |   convolution_param {
 84 |     num_output: 50
 85 |     kernel_size: 5
 86 |     stride: 1
 87 |     weight_filler {
 88 |       type: "xavier"
 89 |     }
 90 |     bias_filler {
 91 |       type: "constant"
 92 |     }
 93 |   }
 94 | }
 95 | layer {
 96 |   name: "pool2"
 97 |   type: "Pooling"
 98 |   bottom: "conv2"
 99 |   top: "pool2"
100 |   pooling_param {
101 |     pool: MAX
102 |     kernel_size: 2
103 |     stride: 2
104 |   }
105 | }
106 | layer {
107 |   name: "ip1"
108 |   type: "InnerProduct"
109 |   bottom: "pool2"
110 |   top: "ip1"
111 |   param {
112 |     lr_mult: 1
113 |   }
114 |   param {
115 |     lr_mult: 2
116 |   }
117 |   inner_product_param {
118 |     num_output: 500
119 |     weight_filler {
120 |       type: "xavier"
121 |     }
122 |     bias_filler {
123 |       type: "constant"
124 |     }
125 |   }
126 | }
127 | layer {
128 |   name: "relu1"
129 |   type: "ReLU"
130 |   bottom: "ip1"
131 |   top: "ip1"
132 | }
133 | layer {
134 |   name: "ip2"
135 |   type: "InnerProduct"
136 |   bottom: "ip1"
137 |   top: "ip2"
138 |   param {
139 |     lr_mult: 1
140 |   }
141 |   param {
142 |     lr_mult: 2
143 |   }
144 |   inner_product_param {
145 |     num_output: 10
146 |     weight_filler {
147 |       type: "xavier"
148 |     }
149 |     bias_filler {
150 |       type: "constant"
151 |     }
152 |   }
153 | }
154 | layer {
155 |   name: "accuracy"
156 |   type: "Accuracy"
157 |   bottom: "ip2"
158 |   bottom: "label"
159 |   top: "accuracy"
160 |   include {
161 |     phase: TEST
162 |   }
163 | }
164 | layer {
165 |   name: "loss"
166 |   type: "SoftmaxWithLoss"
167 |   bottom: "ip2"
168 |   bottom: "label"
169 |   top: "loss"
170 | }
171 | 


--------------------------------------------------------------------------------
/chap8/caffe/recognize_digit.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | sys.path.append('/path/to/caffe/python')
 3 | import numpy as np
 4 | import cv2
 5 | import caffe
 6 | 
 7 | MEAN = 128
 8 | SCALE = 0.00390625
 9 | 
10 | imglist = sys.argv[1]
11 | 
12 | caffe.set_mode_gpu()
13 | caffe.set_device(0)
14 | net = caffe.Net('lenet.prototxt', 'mnist_lenet_iter_36000.caffemodel', caffe.TEST)
15 | net.blobs['data'].reshape(1, 1, 28, 28)
16 | 
17 | with open(imglist, 'r') as f:
18 |     line = f.readline()
19 |     while line:
20 |         imgpath, label = line.split()
21 |         line = f.readline()
22 |         image = cv2.imread(imgpath, cv2.IMREAD_GRAYSCALE).astype(np.float) - MEAN
23 |         image *= SCALE
24 |         net.blobs['data'].data[...] = image
25 |         output = net.forward()
26 |         pred_label = np.argmax(output['prob'][0])
27 |         print('Predicted digit for {} is {}'.format(imgpath, pred_label))
28 | 


--------------------------------------------------------------------------------
/chap8/data/convert_mnist.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import pickle, gzip
 3 | from matplotlib import pyplot
 4 | 
 5 | # Load the dataset
 6 | print('Loading data from mnist.pkl.gz ...')
 7 | with gzip.open('mnist.pkl.gz', 'rb') as f:
 8 |     train_set, valid_set, test_set = pickle.load(f)
 9 | 
10 | imgs_dir = 'mnist'
11 | os.system('mkdir -p {}'.format(imgs_dir))
12 | datasets = {'train': train_set, 'val': valid_set, 'test': test_set}
13 | for dataname, dataset in datasets.items():
14 |     print('Converting {} dataset ...'.format(dataname))
15 |     data_dir = os.sep.join([imgs_dir, dataname])
16 |     os.system('mkdir -p {}'.format(data_dir))
17 |     for i, (img, label) in enumerate(zip(*dataset)):
18 |         filename = '{:0>6d}_{}.jpg'.format(i, label)
19 |         filepath = os.sep.join([data_dir, filename])
20 |         img = img.reshape((28, 28))
21 |         pyplot.imsave(filepath, img, cmap='gray')
22 |         if (i+1) % 10000 == 0:
23 |             print('{} images converted!'.format(i+1))
24 | 
25 | 


--------------------------------------------------------------------------------
/chap8/data/download_mnist.sh:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 | # wget http://www.iro.umontreal.ca/~lisa/deep/data/mnist/mnist.pkl.gz
3 | wget http://deeplearning.net/data/mnist/mnist.pkl.gz
4 | 


--------------------------------------------------------------------------------
/chap8/data/gen_caffe_imglist.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import sys
 3 | 
 4 | input_path = sys.argv[1].rstrip(os.sep)
 5 | output_path = sys.argv[2]
 6 | 
 7 | filenames = os.listdir(input_path)
 8 | 
 9 | with open(output_path, 'w') as f:
10 |     for filename in filenames:
11 |         filepath = os.sep.join([input_path, filename])
12 |         label = filename[:filename.rfind('.')].split('_')[1]
13 |         line = '{} {}\n'.format(filepath, label)
14 |         f.write(line)
15 | 
16 | 


--------------------------------------------------------------------------------
/chap8/data/gen_mxnet_imglist.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import sys
 3 | 
 4 | input_path = sys.argv[1].rstrip(os.sep)
 5 | output_path = sys.argv[2]
 6 | 
 7 | filenames = os.listdir(input_path)
 8 | 
 9 | with open(output_path, 'w') as f:
10 |     for i, filename in enumerate(filenames):
11 |         filepath = os.sep.join([input_path, filename])
12 |         label = filename[:filename.rfind('.')].split('_')[1]
13 |         line = '{}\t{}\t{}\n'.format(i, label, filepath)
14 |         f.write(line)
15 | 
16 | 


--------------------------------------------------------------------------------
/chap8/mxnet/benchmark_model.py:
--------------------------------------------------------------------------------
 1 | import time
 2 | import mxnet as mx
 3 | 
 4 | benchmark_dataiter = mx.io.ImageRecordIter(
 5 |     path_imgrec="../data/test.rec",
 6 |     data_shape=(1, 28, 28),
 7 |     batch_size=64,
 8 |     mean_r=128,
 9 |     scale=0.00390625,
10 | )
11 | 
12 | mod = mx.mod.Module.load('mnist_lenet', 35, context=mx.gpu(2))
13 | mod.bind(
14 |     data_shapes=benchmark_dataiter.provide_data, 
15 |     label_shapes=benchmark_dataiter.provide_label, 
16 |     for_training=False)
17 | 
18 | start = time.time()
19 | for i, batch in enumerate(benchmark_dataiter):
20 |     mod.forward(batch)
21 | time_elapsed = time.time() - start
22 | msg = '{} batches iterated!\nAverage forward time per batch: {:.6f} ms'
23 | print(msg.format(i+1, 1000*time_elapsed/float(i)))
24 | 


--------------------------------------------------------------------------------
/chap8/mxnet/recognize_digit.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | import os
 3 | import cv2
 4 | from collections import namedtuple
 5 | Batch = namedtuple('Batch', ['data'])
 6 | import numpy as np
 7 | import mxnet as mx
 8 | 
 9 | input_path = sys.argv[1].rstrip(os.sep)
10 | 
11 | mod = mx.mod.Module.load('mnist_lenet', 35, context=mx.gpu(2))
12 | mod.bind(
13 |     data_shapes=[('data', (1, 1, 28, 28))], 
14 |     for_training=False)
15 | 
16 | filenames = os.listdir(input_path)
17 | for filename in filenames:
18 |     filepath = os.sep.join([input_path, filename])
19 |     img = cv2.imread(filepath, cv2.IMREAD_GRAYSCALE)
20 |     img = (img.astype(np.float)-128) * 0.00390625
21 |     img = img.reshape((1, 1)+img.shape)
22 |     mod.forward(Batch([mx.nd.array(img)]))
23 |     prob = mod.get_outputs()[0].asnumpy()
24 |     prob = np.squeeze(prob)
25 |     pred_label = np.argmax(prob)
26 |     print('Predicted digit for {} is {}'.format(filepath, pred_label))
27 | 


--------------------------------------------------------------------------------
/chap8/mxnet/score_model.py:
--------------------------------------------------------------------------------
 1 | import mxnet as mx
 2 | 
 3 | test_dataiter = mx.io.ImageRecordIter(
 4 |     path_imgrec="../data/test.rec",
 5 |     data_shape=(1, 28, 28),
 6 |     batch_size=100,
 7 |     mean_r=128,
 8 |     scale=0.00390625,
 9 | )
10 | 
11 | mod = mx.mod.Module.load('mnist_lenet', 35, context=mx.gpu(2))
12 | mod.bind(
13 |     data_shapes=test_dataiter.provide_data, 
14 |     label_shapes=test_dataiter.provide_label, 
15 |     for_training=False)
16 | 
17 | '''
18 | # in case we need to continue to train from epoch 35
19 | mod.fit(...,
20 |         arg_params=arg_params,
21 |         aux_params=aux_params,
22 |         begin_epoch=35)
23 | '''
24 | 
25 | metric = mx.metric.create('acc')
26 | mod.score(test_dataiter, metric)
27 | 
28 | for name, val in metric.get_name_value():
29 |     print('{}={:.2f}%'.format(name, val*100))
30 | 


--------------------------------------------------------------------------------
/chap8/mxnet/train_lenet5.py:
--------------------------------------------------------------------------------
 1 | import mxnet as mx
 2 | import logging
 3 | 
 4 | # data & preprocessing
 5 | data = mx.symbol.Variable('data')
 6 | 
 7 | # 1st conv
 8 | conv1 = mx.symbol.Convolution(data=data, kernel=(5, 5), num_filter=20)
 9 | pool1 = mx.symbol.Pooling(data=conv1, pool_type="max",
10 |                           kernel=(2, 2), stride=(2, 2))
11 | # 2nd conv
12 | conv2 = mx.symbol.Convolution(data=pool1, kernel=(5, 5), num_filter=50)
13 | pool2 = mx.symbol.Pooling(data=conv2, pool_type="max",
14 |                           kernel=(2, 2), stride=(2, 2))
15 | # 1st fc & relu
16 | flatten = mx.symbol.Flatten(data=pool2)
17 | fc1 = mx.symbol.FullyConnected(data=flatten, num_hidden=500)
18 | relu1 = mx.symbol.Activation(data=fc1, act_type="relu")
19 | 
20 | # 2nd fc
21 | fc2 = mx.symbol.FullyConnected(data=relu1, num_hidden=10)
22 | # loss
23 | lenet5 = mx.symbol.SoftmaxOutput(data=fc2, name='softmax')
24 | 
25 | train_dataiter = mx.io.ImageRecordIter(
26 |     path_imgrec="../data/train.rec",
27 |     data_shape=(1, 28, 28),
28 |     batch_size=50,
29 |     mean_r=128,
30 |     scale=0.00390625,
31 |     rand_crop=True,
32 |     min_crop_size=26,
33 |     max_crop_size=28,
34 |     max_rotate_angle=15,
35 |     fill_value=0
36 | )
37 | 
38 | val_dataiter = mx.io.ImageRecordIter(
39 |     path_imgrec="../data/val.rec",
40 |     data_shape=(1, 28, 28),
41 |     batch_size=100,
42 |     mean_r=128,
43 |     scale=0.00390625,
44 | )
45 | 
46 | logging.getLogger().setLevel(logging.DEBUG)
47 | fh = logging.FileHandler('train_mnist_lenet.log')
48 | logging.getLogger().addHandler(fh)
49 | 
50 | lr_scheduler = mx.lr_scheduler.FactorScheduler(1000, factor=0.95)
51 | optimizer_params = {
52 |     'learning_rate': 0.01,
53 |     'momentum': 0.9,
54 |     'wd': 0.0005,
55 |     'lr_scheduler': lr_scheduler
56 | }
57 | checkpoint = mx.callback.do_checkpoint('mnist_lenet', period=5)
58 | 
59 | mod = mx.mod.Module(lenet5, context=mx.gpu(2))
60 | mod.fit(train_dataiter,
61 |         eval_data=val_dataiter,
62 |         optimizer_params=optimizer_params,
63 |         num_epoch=36,
64 |         epoch_end_callback=checkpoint)
65 | 


--------------------------------------------------------------------------------
/chap9/README.md:
--------------------------------------------------------------------------------
 1 | ## Step 1
 2 | 
 3 | > python make_noises.py
 4 | 
 5 | ## Step 2 
 6 | 
 7 | > python gen_label.py
 8 | 
 9 | ## Step 3
10 | 
11 | > python gen_hdf5.py train.txt  
12 | > python gen_hdf5.py val.txt
13 | 
14 | ## Step 4
15 | 
16 | > /path/to/caffe/build/tools/caffe train -solver solver.prototxt
17 | 
18 | ## Step 5
19 | > python predict.py test.txt
20 | 
21 | ## Visualize Conv1 Kernels
22 | > python visualize_conv1_kernels.py
23 | 


--------------------------------------------------------------------------------
/chap9/deploy.prototxt:
--------------------------------------------------------------------------------
  1 | name: "RegressionExample"
  2 | layer {
  3 |   name: "data"
  4 |   type: "Input"
  5 |   top: "data"
  6 |   input_param { 
  7 |     shape: { 
  8 |       dim: 100
  9 |       dim: 1 
 10 |       dim: 100
 11 |       dim: 100
 12 |     } 
 13 |   }
 14 | }
 15 | layer {
 16 |   name: "conv1"
 17 |   type: "Convolution"
 18 |   bottom: "data"
 19 |   top: "conv1"
 20 |   param {
 21 |     lr_mult: 1
 22 |     decay_mult: 1
 23 |   }
 24 |   param {
 25 |     lr_mult: 1
 26 |     decay_mult: 0
 27 |   }
 28 |   convolution_param {
 29 |     num_output: 96
 30 |     kernel_size: 5
 31 |     stride: 2
 32 |     weight_filler {
 33 |       type: "gaussian"
 34 |       std: 0.01
 35 |     }
 36 |     bias_filler {
 37 |       type: "constant"
 38 |       value: 0
 39 |     }
 40 |   }
 41 | }
 42 | layer {
 43 |   name: "relu1"
 44 |   type: "ReLU"
 45 |   bottom: "conv1"
 46 |   top: "conv1"
 47 | }
 48 | layer {
 49 |   name: "pool1"
 50 |   type: "Pooling"
 51 |   bottom: "conv1"
 52 |   top: "pool1"
 53 |   pooling_param {
 54 |     pool: MAX
 55 |     kernel_size: 3
 56 |     stride: 2
 57 |   }
 58 | }
 59 | layer {
 60 |   name: "conv2"
 61 |   type: "Convolution"
 62 |   bottom: "pool1"
 63 |   top: "conv2"
 64 |   param {
 65 |     lr_mult: 1
 66 |     decay_mult: 1
 67 |   }
 68 |   param {
 69 |     lr_mult: 1
 70 |     decay_mult: 0
 71 |   }
 72 |   convolution_param {
 73 |     num_output: 96
 74 |     pad: 2
 75 |     kernel_size: 3
 76 |     weight_filler {
 77 |       type: "gaussian"
 78 |       std: 0.01
 79 |     }
 80 |     bias_filler {
 81 |       type: "constant"
 82 |       value: 0
 83 |     }
 84 |   }
 85 | }
 86 | layer {
 87 |   name: "relu2"
 88 |   type: "ReLU"
 89 |   bottom: "conv2"
 90 |   top: "conv2"
 91 | }
 92 | layer {
 93 |   name: "pool2"
 94 |   type: "Pooling"
 95 |   bottom: "conv2"
 96 |   top: "pool2"
 97 |   pooling_param {
 98 |     pool: MAX
 99 |     kernel_size: 3
100 |     stride: 2
101 |   }
102 | }
103 | layer {
104 |   name: "conv3"
105 |   type: "Convolution"
106 |   bottom: "pool2"
107 |   top: "conv3"
108 |   param {
109 |     lr_mult: 1
110 |     decay_mult: 1
111 |   }
112 |   param {
113 |     lr_mult: 1
114 |     decay_mult: 0
115 |   }
116 |   convolution_param {
117 |     num_output: 128
118 |     pad: 1
119 |     kernel_size: 3
120 |     weight_filler {
121 |       type: "gaussian"
122 |       std: 0.01
123 |     }
124 |     bias_filler {
125 |       type: "constant"
126 |       value: 0
127 |     }
128 |   }
129 | }
130 | layer {
131 |   name: "relu3"
132 |   type: "ReLU"
133 |   bottom: "conv3"
134 |   top: "conv3"
135 | }
136 | layer {
137 |   name: "pool3"
138 |   type: "Pooling"
139 |   bottom: "conv3"
140 |   top: "pool3"
141 |   pooling_param {
142 |     pool: MAX
143 |     kernel_size: 3
144 |     stride: 2
145 |   }
146 | }
147 | layer {
148 |   name: "fc4"
149 |   type: "InnerProduct"
150 |   bottom: "pool3"
151 |   top: "fc4"
152 |   param {
153 |     lr_mult: 1
154 |     decay_mult: 1
155 |   }
156 |   param {
157 |     lr_mult: 1
158 |     decay_mult: 0
159 |   }
160 |   inner_product_param {
161 |     num_output: 192
162 |     weight_filler {
163 |       type: "gaussian"
164 |       std: 0.005
165 |     }
166 |     bias_filler {
167 |       type: "constant"
168 |       value: 0
169 |     }
170 |   }
171 | }
172 | layer {
173 |   name: "relu4"
174 |   type: "ReLU"
175 |   bottom: "fc4"
176 |   top: "fc4"
177 | }
178 | layer {
179 |   name: "drop4"
180 |   type: "Dropout"
181 |   bottom: "fc4"
182 |   top: "fc4"
183 |   dropout_param {
184 |     dropout_ratio: 0.35
185 |   }
186 | }
187 | layer {
188 |   name: "fc5"
189 |   type: "InnerProduct"
190 |   bottom: "fc4"
191 |   top: "fc5"
192 |   param {
193 |     lr_mult: 1
194 |     decay_mult: 1
195 |   }
196 |   param {
197 |     lr_mult: 1
198 |     decay_mult: 0
199 |   }
200 |   inner_product_param {
201 |     num_output: 2
202 |     weight_filler {
203 |       type: "gaussian"
204 |       std: 0.005
205 |     }
206 |     bias_filler {
207 |       type: "constant"
208 |       value: 0
209 |     }
210 |   }
211 | }
212 | layer {
213 |   name: "sigmoid5"
214 |   type: "Sigmoid"
215 |   bottom: "fc5"
216 |   top: "pred"
217 | }
218 | 


--------------------------------------------------------------------------------
/chap9/gen_hdf5.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | import numpy as np
 3 | import matplotlib.pyplot as plt
 4 | import h5py
 5 | 
 6 | IMAGE_SIZE = (100, 100)
 7 | MEAN_VALUE = 128
 8 | 
 9 | filename = sys.argv[1]
10 | setname, ext = filename.split('.')
11 | 
12 | with open(filename, 'r') as f:
13 |     lines = f.readlines()
14 | 
15 | np.random.shuffle(lines)
16 | 
17 | sample_size = len(lines)
18 | imgs = np.zeros((sample_size, 1,) + IMAGE_SIZE, dtype=np.float32)
19 | freqs = np.zeros((sample_size, 2), dtype=np.float32)
20 | 
21 | h5_filename = '{}.h5'.format(setname)
22 | with h5py.File(h5_filename, 'w') as h:
23 |     for i, line in enumerate(lines):
24 |         image_name, fx, fy = line[:-1].split()
25 |         img = plt.imread(image_name)[:, :, 0].astype(np.float32)
26 |         img = img.reshape((1, )+img.shape)
27 |         img -= MEAN_VALUE
28 |         imgs[i] = img
29 |         freqs[i] = [float(fx), float(fy)]
30 |         if (i+1) % 1000 == 0:
31 |             print('Processed {} images!'.format(i+1))
32 |     h.create_dataset('data', data=imgs)
33 |     h.create_dataset('freq', data=freqs)
34 | 
35 | with open('{}_h5.txt'.format(setname), 'w') as f:
36 |     f.write(h5_filename)
37 | 


--------------------------------------------------------------------------------
/chap9/gen_label.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | filename2score = lambda x: x[:x.rfind('.')].split('_')[-2:]
 4 | 
 5 | filenames = os.listdir('samples')
 6 | 
 7 | with open('train.txt', 'w') as f_train_txt:
 8 |     for filename in filenames[:50000]:
 9 |         fx, fy = filename2score(filename)
10 |         line = 'samples/{} {} {}\n'.format(filename, fx, fy)
11 |         f_train_txt.write(line)
12 | 
13 | with open('val.txt', 'w') as f_val_txt:
14 |     for filename in filenames[50000:60000]:
15 |         fx, fy = filename2score(filename)
16 |         line = 'samples/{} {} {}\n'.format(filename, fx, fy)
17 |         f_val_txt.write(line)
18 | 
19 | with open('test.txt', 'w') as f_test_txt:
20 |     for filename in filenames[60000:]:
21 |         line = 'samples/{}\n'.format(filename)
22 |         f_test_txt.write(line)
23 | 


--------------------------------------------------------------------------------
/chap9/make_noises.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import sys
 3 | import datetime
 4 | import cv2
 5 | 
 6 | from multiprocessing import Process, cpu_count
 7 | 
 8 | import numpy as np
 9 | import matplotlib.pyplot as plt
10 | 
11 | H_IMG, W_IMG = 100, 100
12 | SAMPLE_SIZE = 70000
13 | SAMPLES_DIR = 'samples'
14 | 
15 | def make_noise(index):
16 |     h = np.random.randint(1, H_IMG)
17 |     w = np.random.randint(1, W_IMG)
18 |     noise = np.random.random((h, w))
19 |     noisy_img = cv2.resize(noise, (H_IMG, W_IMG), interpolation=cv2.INTER_CUBIC)
20 |     fx = float(w) / float(W_IMG)
21 |     fy = float(h) / float(H_IMG)
22 |     filename = '{}/{:0>5d}_{}_{}.jpg'.format(SAMPLES_DIR, index, fx, fy)
23 |     plt.imsave(filename, noisy_img, cmap='gray')
24 | 
25 | def make_noises(i0, i1):
26 |     np.random.seed(datetime.datetime.now().microsecond)
27 |     for i in xrange(i0, i1):
28 |         make_noise(i)
29 |     print('Noises from {} to {} are made!'.format(i0+1, i1))
30 |     sys.stdout.flush()
31 | 
32 | def main():
33 |     cmd = 'mkdir -p {}'.format(SAMPLES_DIR)
34 |     os.system(cmd)
35 |     n_procs = cpu_count()
36 | 
37 |     print('Making noises with {} processes ...'.format(n_procs))
38 |     length = float(SAMPLE_SIZE)/float(n_procs)
39 |     indices = [int(round(i * length)) for i in range(n_procs + 1)]
40 |     processes = [Process(target=make_noises, args=(indices[i], indices[i+1])) for i in range(n_procs)]
41 | 
42 |     for p in processes:
43 |         p.start()
44 |     
45 |     for p in processes:
46 |         p.join()
47 | 
48 |     print('Done!')
49 | 
50 | if __name__ == '__main__':
51 |     main()
52 | 


--------------------------------------------------------------------------------
/chap9/predict.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | import numpy as np
 3 | sys.path.append('/path/to/caffe/python')
 4 | import caffe
 5 | 
 6 | WEIGHTS_FILE = 'freq_regression_iter_10000.caffemodel'
 7 | DEPLOY_FILE = 'deploy.prototxt'
 8 | MEAN_VALUE = 128
 9 | 
10 | #caffe.set_mode_cpu()
11 | net = caffe.Net(DEPLOY_FILE, WEIGHTS_FILE, caffe.TEST)
12 | 
13 | transformer = caffe.io.Transformer({'data': net.blobs['data'].data.shape})
14 | transformer.set_transpose('data', (2,0,1))
15 | transformer.set_mean('data', np.array([MEAN_VALUE]))
16 | transformer.set_raw_scale('data', 255)
17 | 
18 | image_list = sys.argv[1]
19 | 
20 | batch_size = net.blobs['data'].data.shape[0]
21 | with open(image_list, 'r') as f:
22 |     i = 0
23 |     filenames = []
24 |     for line in f.readlines():
25 |         filename = line[:-1]
26 |         filenames.append(filename)
27 |         image = caffe.io.load_image(filename, False)
28 |         transformed_image = transformer.preprocess('data', image)
29 |         net.blobs['data'].data[i, ...] = transformed_image
30 |         i += 1
31 | 
32 |         if i == batch_size:
33 |             output = net.forward()
34 |             freqs = output['pred']
35 | 
36 |             for filename, (fx, fy) in zip(filenames, freqs):
37 |                 print('Predicted frequencies for {} is {:.2f} and {:.2f}'.format(filename, fx, fy))
38 | 
39 |             i = 0
40 |             filenames = []
41 | 


--------------------------------------------------------------------------------
/chap9/solver.prototxt:
--------------------------------------------------------------------------------
 1 | net: "./train_val.prototxt"
 2 | test_iter: 200
 3 | test_interval: 1000
 4 | base_lr: 0.01
 5 | lr_policy: "step"
 6 | gamma: 0.707
 7 | stepsize: 2000
 8 | display: 100
 9 | max_iter: 10000
10 | momentum: 0.9
11 | weight_decay: 0.00001
12 | snapshot_prefix: "./freq_regression"
13 | solver_mode: GPU
14 | type: "Nesterov"
15 | 


--------------------------------------------------------------------------------
/chap9/train_val.prototxt:
--------------------------------------------------------------------------------
  1 | name: "RegressionExample"
  2 | layer {
  3 |   name: "data"
  4 |   type: "HDF5Data"
  5 |   top: "data"
  6 |   top: "freq"
  7 |   include {
  8 |     phase: TRAIN
  9 |   }
 10 |   hdf5_data_param {
 11 |     source: "train_h5.txt"
 12 |     batch_size: 50
 13 |   }
 14 | }
 15 | layer {
 16 |   name: "data"
 17 |   type: "HDF5Data"
 18 |   top: "data"
 19 |   top: "freq"
 20 |   include {
 21 |     phase: TEST
 22 |   }
 23 |   hdf5_data_param {
 24 |     source: "val_h5.txt"
 25 |     batch_size: 50
 26 |   }
 27 | }
 28 | layer {
 29 |   name: "conv1"
 30 |   type: "Convolution"
 31 |   bottom: "data"
 32 |   top: "conv1"
 33 |   param {
 34 |     lr_mult: 1
 35 |     decay_mult: 1
 36 |   }
 37 |   param {
 38 |     lr_mult: 1
 39 |     decay_mult: 0
 40 |   }
 41 |   convolution_param {
 42 |     num_output: 96
 43 |     kernel_size: 5
 44 |     stride: 2
 45 |     weight_filler {
 46 |       type: "gaussian"
 47 |       std: 0.01
 48 |     }
 49 |     bias_filler {
 50 |       type: "constant"
 51 |       value: 0
 52 |     }
 53 |   }
 54 | }
 55 | layer {
 56 |   name: "relu1"
 57 |   type: "ReLU"
 58 |   bottom: "conv1"
 59 |   top: "conv1"
 60 | }
 61 | layer {
 62 |   name: "pool1"
 63 |   type: "Pooling"
 64 |   bottom: "conv1"
 65 |   top: "pool1"
 66 |   pooling_param {
 67 |     pool: MAX
 68 |     kernel_size: 3
 69 |     stride: 2
 70 |   }
 71 | }
 72 | layer {
 73 |   name: "conv2"
 74 |   type: "Convolution"
 75 |   bottom: "pool1"
 76 |   top: "conv2"
 77 |   param {
 78 |     lr_mult: 1
 79 |     decay_mult: 1
 80 |   }
 81 |   param {
 82 |     lr_mult: 1
 83 |     decay_mult: 0
 84 |   }
 85 |   convolution_param {
 86 |     num_output: 96
 87 |     pad: 2
 88 |     kernel_size: 3
 89 |     weight_filler {
 90 |       type: "gaussian"
 91 |       std: 0.01
 92 |     }
 93 |     bias_filler {
 94 |       type: "constant"
 95 |       value: 0
 96 |     }
 97 |   }
 98 | }
 99 | layer {
100 |   name: "relu2"
101 |   type: "ReLU"
102 |   bottom: "conv2"
103 |   top: "conv2"
104 | }
105 | layer {
106 |   name: "pool2"
107 |   type: "Pooling"
108 |   bottom: "conv2"
109 |   top: "pool2"
110 |   pooling_param {
111 |     pool: MAX
112 |     kernel_size: 3
113 |     stride: 2
114 |   }
115 | }
116 | layer {
117 |   name: "conv3"
118 |   type: "Convolution"
119 |   bottom: "pool2"
120 |   top: "conv3"
121 |   param {
122 |     lr_mult: 1
123 |     decay_mult: 1
124 |   }
125 |   param {
126 |     lr_mult: 1
127 |     decay_mult: 0
128 |   }
129 |   convolution_param {
130 |     num_output: 128
131 |     pad: 1
132 |     kernel_size: 3
133 |     weight_filler {
134 |       type: "gaussian"
135 |       std: 0.01
136 |     }
137 |     bias_filler {
138 |       type: "constant"
139 |       value: 0
140 |     }
141 |   }
142 | }
143 | layer {
144 |   name: "relu3"
145 |   type: "ReLU"
146 |   bottom: "conv3"
147 |   top: "conv3"
148 | }
149 | layer {
150 |   name: "pool3"
151 |   type: "Pooling"
152 |   bottom: "conv3"
153 |   top: "pool3"
154 |   pooling_param {
155 |     pool: MAX
156 |     kernel_size: 3
157 |     stride: 2
158 |   }
159 | }
160 | layer {
161 |   name: "fc4"
162 |   type: "InnerProduct"
163 |   bottom: "pool3"
164 |   top: "fc4"
165 |   param {
166 |     lr_mult: 1
167 |     decay_mult: 1
168 |   }
169 |   param {
170 |     lr_mult: 1
171 |     decay_mult: 0
172 |   }
173 |   inner_product_param {
174 |     num_output: 192
175 |     weight_filler {
176 |       type: "gaussian"
177 |       std: 0.005
178 |     }
179 |     bias_filler {
180 |       type: "constant"
181 |       value: 0
182 |     }
183 |   }
184 | }
185 | layer {
186 |   name: "relu4"
187 |   type: "ReLU"
188 |   bottom: "fc4"
189 |   top: "fc4"
190 | }
191 | layer {
192 |   name: "drop4"
193 |   type: "Dropout"
194 |   bottom: "fc4"
195 |   top: "fc4"
196 |   dropout_param {
197 |     dropout_ratio: 0.35
198 |   }
199 | }
200 | layer {
201 |   name: "fc5"
202 |   type: "InnerProduct"
203 |   bottom: "fc4"
204 |   top: "fc5"
205 |   param {
206 |     lr_mult: 1
207 |     decay_mult: 1
208 |   }
209 |   param {
210 |     lr_mult: 1
211 |     decay_mult: 0
212 |   }
213 |   inner_product_param {
214 |     num_output: 2
215 |     weight_filler {
216 |       type: "gaussian"
217 |       std: 0.005
218 |     }
219 |     bias_filler {
220 |       type: "constant"
221 |       value: 0
222 |     }
223 |   }
224 | }
225 | layer {
226 |   name: "sigmoid5"
227 |   type: "Sigmoid"
228 |   bottom: "fc5"
229 |   top: "pred"
230 | }
231 | layer {
232 |   name: "loss"
233 |   type: "EuclideanLoss"
234 |   bottom: "pred"
235 |   bottom: "freq"
236 |   top: "loss"
237 | }
238 | 


--------------------------------------------------------------------------------
/chap9/visualize_conv1_kernels.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | import numpy as np
 3 | import matplotlib.pyplot as plt
 4 | import cv2
 5 | sys.path.append('/path/to/caffe/python')
 6 | import caffe
 7 | 
 8 | ZOOM_IN_SIZE = 50
 9 | PAD_SIZE = 4
10 | 
11 | WEIGHTS_FILE = 'freq_regression_iter_10000.caffemodel'
12 | DEPLOY_FILE = 'deploy.prototxt'
13 | 
14 | net = caffe.Net(DEPLOY_FILE, WEIGHTS_FILE, caffe.TEST)
15 | kernels = net.params['conv1'][0].data
16 | 
17 | kernels -= kernels.min()
18 | kernels /= kernels.max()
19 | 
20 | zoomed_in_kernels = []
21 | for kernel in kernels:
22 |     zoomed_in_kernels.append(cv2.resize(kernel[0], (ZOOM_IN_SIZE, ZOOM_IN_SIZE), interpolation=cv2.INTER_NEAREST))
23 | 
24 | # plot 12*8 squares kernels
25 | half_pad = PAD_SIZE / 2
26 | padded_size = ZOOM_IN_SIZE+PAD_SIZE
27 | padding = ((0, 0), (half_pad, half_pad), (half_pad, half_pad))
28 | 
29 | padded_kernels = np.pad(zoomed_in_kernels, padding, 'constant', constant_values=1)
30 | padded_kernels = padded_kernels.reshape(8, 12, padded_size, padded_size).transpose(0, 2, 1, 3)
31 | kernels_img = padded_kernels.reshape((8*padded_size, 12*padded_size))[half_pad:-half_pad, half_pad: -half_pad]
32 | 
33 | plt.imshow(kernels_img, cmap='gray', interpolation='nearest')
34 | plt.axis('off')
35 | 
36 | plt.show()
37 | 


--------------------------------------------------------------------------------
/errata.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yeyun111/dlcv_for_beginners/080c7d3bbb4a68e4fb79e33231ccc666ada16dcc/errata.pdf


--------------------------------------------------------------------------------
/random_bonus/README.md:
--------------------------------------------------------------------------------
1 | # 和本书内容无关，知识点可能有关的一些杂乱内容
2 | 
3 | ## [基于Caffe的对抗样本的生成](https://github.com/frombeijingwithlove/dlcv_for_beginners/blob/master/random_bonus/adversarial_example_caffe)
4 | ## [GAN和Conditional GAN生成二维样本](https://github.com/frombeijingwithlove/dlcv_for_beginners/tree/master/random_bonus/gan_n_cgan_2d_example)
5 | ## [用Yahoo!的Open NSFW给XX图片生成马赛克](https://github.com/frombeijingwithlove/dlcv_for_beginners/tree/master/random_bonus/generate_mosaic_for_porno_images)
6 | ## [基于PyTorch实现的U-Net](https://github.com/frombeijingwithlove/dlcv_for_beginners/tree/master/random_bonus/image-segmentation(updating))  
7 | ## [Caffe中的模型融合](https://github.com/frombeijingwithlove/dlcv_for_beginners/tree/master/random_bonus/multiple_models_fusion_caffe)
8 | 
9 | 


--------------------------------------------------------------------------------
/random_bonus/adversarial_example_caffe/README.md:
--------------------------------------------------------------------------------
 1 | ## Generating Adversarial Examples
 2 | Blog: 
 3 | [用Caffe生成对抗样本](https://zhuanlan.zhihu.com/p/26122612)
 4 | 
 5 | ## step 1
 6 | > ./download-squeezenet-v1.0-weights.sh  
 7 | 
 8 | to download weights for SqueezeNet v1.0.
 9 | 
10 | ## step 2
11 | 
12 | > python adversarial_example_demo.py little_white_dog.jpg  
13 | 
14 | to check the demo results.


--------------------------------------------------------------------------------
/random_bonus/adversarial_example_caffe/adversarial_example_demo.py:
--------------------------------------------------------------------------------
  1 | import sys
  2 | from operator import itemgetter
  3 | import numpy
  4 | from matplotlib import pyplot
  5 | sys.path.append('/path/to/caffe/python')
  6 | import caffe
  7 | 
  8 | 
  9 | def make_n_test_adversarial_example(
 10 |         img, net, transformer, epsilon,
 11 |         data_blob='data', prob_blob='prob',
 12 |         label_index=None, top_k=5):
 13 | 
 14 |     # Load image & forward
 15 |     transformed_img = transformer.preprocess(data_blob, img)
 16 |     net.blobs[data_blob].data[0] = transformed_img
 17 |     net.forward()
 18 |     probs = [x for x in enumerate(net.blobs[prob_blob].data.flatten())]
 19 |     num_classes = len(probs)
 20 |     sorted_probs = sorted(probs, key=itemgetter(1), reverse=True)
 21 |     top_preds = sorted_probs[:top_k]
 22 |     pred = sorted_probs[0][0]
 23 | 
 24 |     # if label_index is set,
 25 |     # generate a adversarial example toward the label,
 26 |     # else
 27 |     # reduce the probability of predicted label
 28 |     net.blobs[prob_blob].diff[...] = 0
 29 |     if type(label_index) is int and 0 <= label_index < num_classes:
 30 |         net.blobs[prob_blob].diff[0][label_index] = 1.
 31 |     else:
 32 |         net.blobs[prob_blob].diff[0][pred] = -1.
 33 | 
 34 |     # generate attack image with fast gradient sign method
 35 |     diffs = net.backward()
 36 |     diff_sign_mat = numpy.sign(diffs[data_blob])
 37 |     adversarial_noise = epsilon * diff_sign_mat
 38 | 
 39 |     # clip exceeded values
 40 |     attack_hwc = transformer.deprocess(data_blob, transformed_img + adversarial_noise[0])
 41 |     attack_hwc[attack_hwc > 1] = 1.
 42 |     attack_hwc[attack_hwc < 0] = 0.
 43 |     attack_img = transformer.preprocess(data_blob, attack_hwc)
 44 | 
 45 |     net.blobs[data_blob].data[0] = attack_img
 46 |     net.forward()
 47 |     probs = [x for x in enumerate(net.blobs[prob_blob].data.flatten())]
 48 |     sorted_probs = sorted(probs, key=itemgetter(1), reverse=True)
 49 |     top_attacked_preds = sorted_probs[:top_k]
 50 | 
 51 |     return attack_hwc, top_preds, top_attacked_preds
 52 | 
 53 | 
 54 | def visualize_attack(title, original_img, attack_img, original_preds, attacked_preds, labels):
 55 |     pred = original_preds[0][0]
 56 |     attacked_pred = attacked_preds[0][0]
 57 |     k = len(original_preds)
 58 |     fig_name = '{}: {} to {}'.format(title, labels[pred], labels[attacked_pred])
 59 | 
 60 |     pyplot.figure(fig_name)
 61 |     for img, plt0, plt1, preds in [
 62 |         (original_img, 231, 234, original_preds),
 63 |         (attack_img, 233, 236, attacked_preds)
 64 |     ]:
 65 |         pyplot.subplot(plt0)
 66 |         pyplot.axis('off')
 67 |         pyplot.imshow(img)
 68 |         ax = pyplot.subplot(plt1)
 69 |         pyplot.axis('off')
 70 |         ax.set_xlim([0, 2])
 71 |         bars = ax.barh(range(k-1, -1, -1), [x[1] for x in preds])
 72 |         for i, bar in enumerate(bars):
 73 |             x_loc = bar.get_x() + bar.get_width()
 74 |             y_loc = k - i - 1
 75 |             label = labels[preds[i][0]]
 76 |             ax.text(x_loc, y_loc, '{}: {:.2f}%'.format(label, preds[i][1]*100))
 77 | 
 78 |     pyplot.subplot(232)
 79 |     pyplot.axis('off')
 80 |     noise = attack_img - original_img
 81 |     pyplot.imshow(255 * noise)
 82 | 
 83 | 
 84 | if __name__ == '__main__':
 85 |     # path to test image
 86 |     image_path = sys.argv[1]
 87 | 
 88 |     # model to attack
 89 |     model_definition = 'squeezenet-v1.0-deploy-with-force-backward.prototxt'
 90 |     model_weights = 'squeezenet_v1.0.caffemodel'
 91 |     channel_means = numpy.array([104., 117., 123.])
 92 | 
 93 |     # initialize net
 94 |     net = caffe.Net(model_definition, model_weights, caffe.TEST)
 95 |     n_channels, height, width = net.blobs['data'].shape[-3:]
 96 |     net.blobs['data'].reshape(1, n_channels, height, width)
 97 | 
 98 |     # initialize transformer
 99 |     transformer = caffe.io.Transformer({'data': net.blobs['data'].data.shape})
100 |     transformer.set_transpose('data', (2, 0, 1))
101 |     transformer.set_mean('data', channel_means)
102 |     transformer.set_raw_scale('data', 255)
103 |     transformer.set_channel_swap('data', (2, 1, 0))
104 | 
105 |     # load labels from imagenet synset words
106 |     with open('synset_words.txt', 'r') as f:
107 |         labels = [x.rstrip()[x.find(' '):].split(',')[0] for x in f.readlines()]
108 | 
109 |     # load image
110 |     img = caffe.io.load_image(image_path)
111 | 
112 |     examples = [
113 |         (None, 1.0),    # make adversarial example to reduce the predicted probability
114 |         (296, 1.0),     # make adversarial example toward ice bear(296)
115 |         (9, 1.0),       # make adversarial example toward ostrich(9)
116 |         (9, 2.0),       # make adversarial example toward ostrich(9) with stronger noise
117 |         (9, 6.0),       # make adversarial example toward ostrich(9) with very strong noise
118 |         (9, 18.0),      # make adversarial example toward ostrich(9) with too strong noise
119 |         (752, 1.0),     # make adversarial example toward racket(752)
120 |         (752, 2.0),     # make adversarial example toward racket(752) with stronger noise
121 |         (752, 6.0),     # make adversarial example toward racket(752) with very strong noise
122 |         (752, 18.0),    # make adversarial example toward racket(752) with too strong noise
123 |     ]
124 | 
125 |     for i, (label_index, epsilon) in enumerate(examples):
126 |         attack_img, original_preds, attacked_preds = \
127 |             make_n_test_adversarial_example(img, net, transformer, epsilon, label_index=label_index)
128 |         visualize_attack('example{}'.format(i), img, attack_img, original_preds, attacked_preds, labels)
129 | 
130 |     # try to make adversarial example toward racket(752) with epsilon=0.1, iterate 10 times
131 |     attack_img, original_preds, attacked_preds = \
132 |         make_n_test_adversarial_example(img, net, transformer, 0.1, label_index=752)
133 |     for i in range(9):
134 |         attack_img, _, attacked_preds = \
135 |             make_n_test_adversarial_example(attack_img, net, transformer, 0.1, label_index=752)
136 |     visualize_attack('racket_iterative'.format(i), img, attack_img, original_preds, attacked_preds, labels)
137 | 
138 |     pyplot.show()
139 | 


--------------------------------------------------------------------------------
/random_bonus/adversarial_example_caffe/download-squeezenet-v1.0-weights.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env sh
2 | 
3 | wget https://github.com/DeepScale/SqueezeNet/raw/master/SqueezeNet_v1.0/squeezenet_v1.0.caffemodel
4 | 
5 | 


--------------------------------------------------------------------------------
/random_bonus/adversarial_example_caffe/little_white_dog.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yeyun111/dlcv_for_beginners/080c7d3bbb4a68e4fb79e33231ccc666ada16dcc/random_bonus/adversarial_example_caffe/little_white_dog.jpg


--------------------------------------------------------------------------------
/random_bonus/gan_n_cgan_2d_example/README.md:
--------------------------------------------------------------------------------
 1 | ## Generative Adversarial Networks (GANs) with 2D Samples
 2 | Blog(in Chinese): 
 3 | [用GAN生成二维样本的小例子](https://zhuanlan.zhihu.com/p/27343585)  
 4 | Inspired & based on [Dev Nag's GAN example](https://github.com/devnag/pytorch-generative-adversarial-networks):  
 5 | 1) Use batch size instead of cardinality to achieve better convergency, the original version is actually generating 100 (cardinality by default) dimensional gaussian distribution for discriminator, so the convergency is **BAD**.   
 6 | 2) Use 2D samples, with visualization of training.  
 7 | 3) Demo of conditional GAN.  
 8 | 4) GPU support. 
 9 | 
10 | 
11 | ## Introduction
12 | Play with GAN to generate 2D samples that you can define your own probability density function (PDF) with a gray image.  
13 | ![image](https://github.com/frombeijingwithlove/dlcv_book_pretrained_caffe_models/blob/master/random_bonus/gan_n_cgan_2d/example_z.jpg)  
14 | 
15 | ## 2D Sampling
16 | > python sampler.py
17 | 
18 | Will demo 10000 samples from the PDF defined by a gray image.  
19 | ![image](https://github.com/frombeijingwithlove/dlcv_book_pretrained_caffe_models/blob/master/random_bonus/gan_n_cgan_2d/test_2d_sampling_batman.png)  
20 | ![image](https://github.com/frombeijingwithlove/dlcv_book_pretrained_caffe_models/blob/master/random_bonus/gan_n_cgan_2d/test_2d_sampling_binary.png)  
21 | ![image](https://github.com/frombeijingwithlove/dlcv_book_pretrained_caffe_models/blob/master/random_bonus/gan_n_cgan_2d/test_2d_sampling_triangle.png)  
22 | 
23 | ## GAN
24 | > python gan_demo.py inputs/zig.jpg  
25 | 
26 | Training will be visualized as the following:  
27 | ![image](https://github.com/frombeijingwithlove/dlcv_book_pretrained_caffe_models/blob/master/random_bonus/gan_n_cgan_2d/gan_zig.gif)
28 | 
29 | More examples:  
30 | ![image](https://github.com/frombeijingwithlove/dlcv_book_pretrained_caffe_models/blob/master/random_bonus/gan_n_cgan_2d/gan_Z.gif)  
31 | ![image](https://github.com/frombeijingwithlove/dlcv_book_pretrained_caffe_models/blob/master/random_bonus/gan_n_cgan_2d/gan_triangle.gif)  
32 | ![image](https://github.com/frombeijingwithlove/dlcv_book_pretrained_caffe_models/blob/master/random_bonus/gan_n_cgan_2d/gan_circle.gif)  
33 | ![image](https://github.com/frombeijingwithlove/dlcv_book_pretrained_caffe_models/blob/master/random_bonus/gan_n_cgan_2d/gan_random.gif)
34 | 
35 | ## Conditional GAN
36 | For more complex distributions, conditional GAN is much better. This demo reads distributions from different pdfs, encoding conditions as one-hot vector.
37 | 
38 | > python cgan_demo.py inputs/binary
39 | 
40 | Training will be visualized as the following:  
41 | ![image](https://github.com/frombeijingwithlove/dlcv_book_pretrained_caffe_models/blob/master/random_bonus/gan_n_cgan_2d/cgan_binary.gif)  
42 | Compared to vanilla GAN version:  
43 | ![image](https://github.com/frombeijingwithlove/dlcv_book_pretrained_caffe_models/blob/master/random_bonus/gan_n_cgan_2d/gan_binary.gif)  
44 | 
45 | More examples:  
46 | Vortex with C-GAN  
47 | ![image](https://github.com/frombeijingwithlove/dlcv_book_pretrained_caffe_models/blob/master/random_bonus/gan_n_cgan_2d/cgan_vortex.gif)  
48 | 
49 | Vortex with vanilla GAN  
50 | ![image](https://github.com/frombeijingwithlove/dlcv_book_pretrained_caffe_models/blob/master/random_bonus/gan_n_cgan_2d/gan_vortex.gif)  
51 | 
52 | Pentagram with C-GAN  
53 | ![image](https://github.com/frombeijingwithlove/dlcv_book_pretrained_caffe_models/blob/master/random_bonus/gan_n_cgan_2d/cgan_penta.gif)  
54 | 
55 | Pentagram with vanilla GAN  
56 | ![image](https://github.com/frombeijingwithlove/dlcv_book_pretrained_caffe_models/blob/master/random_bonus/gan_n_cgan_2d/gan_penta.gif)  
57 | 
58 | ## Latent space dimensionality / model complexity / learning rates / ...
59 | > python gan_demo.py -h  
60 | 
61 | 


--------------------------------------------------------------------------------
/random_bonus/gan_n_cgan_2d_example/argparser.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import argparse
 3 | import torch.optim as optim
 4 | 
 5 | OPTIMIZERS = {
 6 |     'adadelta': optim.Adadelta,
 7 |     'adam': optim.Adam,
 8 |     'rmsprop': optim.RMSprop,
 9 |     'sgd': optim.SGD
10 | }
11 | 
12 | 
13 | def parse_args():
14 |     parser = argparse.ArgumentParser(
15 |         description='A Simple Demo of Generative Adversarial Networks with 2D Samples',
16 |         formatter_class=argparse.ArgumentDefaultsHelpFormatter)
17 | 
18 |     parser.add_argument('input_path',
19 |                         help='Image or directory containing images to define distribution')
20 | 
21 |     parser.add_argument('--z_dim',
22 |                         help='Dimensionality of latent space',
23 |                         type=int, default=2)
24 |     parser.add_argument('--iterations',
25 |                         help='Num of training iterations',
26 |                         type=int, default=2000)
27 |     parser.add_argument('--batch_size',
28 |                         help='Batch size of each kind',
29 |                         type=int, default=2000)
30 |     parser.add_argument('--optimizer',
31 |                         help='Optimizer: Adadelta/Adam/RMSprop/SGD',
32 |                         type=str, default='Adadelta')
33 |     parser.add_argument('--d_lr',
34 |                         help='Learning rate of discriminator, for Adadelta it is the base learning rate',
35 |                         type=float, default=1)
36 |     parser.add_argument('--g_lr',
37 |                         help='Learning rate of generator, for Adadelta it is the base learning rate',
38 |                         type=float, default=1)
39 |     parser.add_argument('--d_steps',
40 |                         help='Steps of discriminators in each iteration',
41 |                         type=int, default=3)
42 |     parser.add_argument('--g_steps',
43 |                         help='Steps of generator in each iteration',
44 |                         type=int, default=1)
45 |     parser.add_argument('--d_hidden_size',
46 |                         help='Num of hidden units in discriminator',
47 |                         type=int, default=100)
48 |     parser.add_argument('--g_hidden_size',
49 |                         help='Num of hidden units in generator',
50 |                         type=int, default=50)
51 |     parser.add_argument('--display_interval',
52 |                         help='Interval of iterations to display/export images',
53 |                         type=int, default=10)
54 |     parser.add_argument('--no_display',
55 |                         help='Show plots during training', action='store_true')
56 |     parser.add_argument('--export',
57 |                         help='Export images', action='store_true')
58 |     parser.add_argument('--cpu',
59 |                         help='Set to CPU mode', action='store_true')
60 | 
61 |     args = parser.parse_args()
62 |     args.input_path = args.input_path.rstrip(os.sep)
63 |     args.optimizer = OPTIMIZERS[args.optimizer.lower()]
64 | 
65 |     return args
66 | 


--------------------------------------------------------------------------------
/random_bonus/gan_n_cgan_2d_example/cgan_demo.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # Conditional Generative Adversarial Networks (GAN) example with 2D samples in PyTorch.
  3 | import os
  4 | import numpy
  5 | from skimage import io
  6 | import torch
  7 | import torch.nn as nn
  8 | from torch.autograd import Variable
  9 | from sampler import generate_lut, sample_2d
 10 | from visualizer import CGANDemoVisualizer
 11 | from argparser import parse_args
 12 | from networks import SimpleMLP
 13 | 
 14 | DIMENSION = 2
 15 | 
 16 | args = parse_args()
 17 | cuda = False if args.cpu else True
 18 | bs = args.batch_size
 19 | z_dim = args.z_dim
 20 | 
 21 | image_paths = [os.sep.join([args.input_path, x]) for x in os.listdir(args.input_path)]
 22 | density_imgs = [io.imread(x, True) for x in image_paths]
 23 | luts_2d = [generate_lut(x) for x in density_imgs]
 24 | # Sampling based on visual density, a too small batch size may result in failure with conditions
 25 | pix_sums = [numpy.sum(x) for x in density_imgs]
 26 | total_pix_sums = numpy.sum(pix_sums)
 27 | c_indices = [0] + [int(sum(pix_sums[:i+1])/total_pix_sums*bs+0.5) for i in range(len(pix_sums)-1)] + [bs]
 28 | 
 29 | c_dim = len(luts_2d)    # Dimensionality of condition labels <--> number of images
 30 | 
 31 | visualizer = CGANDemoVisualizer('Conditional GAN 2D Example Visualization of {}'.format(args.input_path))
 32 | 
 33 | generator = SimpleMLP(input_size=z_dim+c_dim, hidden_size=args.g_hidden_size, output_size=DIMENSION)
 34 | discriminator = SimpleMLP(input_size=DIMENSION+c_dim, hidden_size=args.d_hidden_size, output_size=1)
 35 | 
 36 | if cuda:
 37 |     generator.cuda()
 38 |     discriminator.cuda()
 39 | criterion = nn.BCELoss()
 40 | 
 41 | d_optimizer = args.optimizer(discriminator.parameters(), lr=args.d_lr)
 42 | g_optimizer = args.optimizer(generator.parameters(), lr=args.d_lr)
 43 | 
 44 | y = numpy.zeros((bs, c_dim))
 45 | for i in range(c_dim):
 46 |     y[c_indices[i]:c_indices[i + 1], i] = 1  # conditional labels, one-hot encoding
 47 | y = Variable(torch.Tensor(y))
 48 | if cuda:
 49 |     y = y.cuda()
 50 | 
 51 | for train_iter in range(args.iterations):
 52 |     for d_index in range(args.d_steps):
 53 |         # 1. Train D on real+fake
 54 |         discriminator.zero_grad()
 55 | 
 56 |         #  1A: Train D on real samples with conditions
 57 |         real_samples = numpy.zeros((bs, DIMENSION))
 58 |         for i in range(c_dim):
 59 |             real_samples[c_indices[i]:c_indices[i+1], :] = sample_2d(luts_2d[i], c_indices[i+1]-c_indices[i])
 60 | 
 61 |         # first c dimensions is the condition inputs, the last 2 dimensions are samples
 62 |         real_samples = Variable(torch.Tensor(real_samples))
 63 |         if cuda:
 64 |             real_samples = real_samples.cuda()
 65 |         d_real_data = torch.cat([y, real_samples], 1)
 66 |         if cuda:
 67 |             d_real_data = d_real_data.cuda()
 68 |         d_real_decision = discriminator(d_real_data)
 69 |         labels = Variable(torch.ones(bs))
 70 |         if cuda:
 71 |             labels = labels.cuda()
 72 |         d_real_loss = criterion(d_real_decision, labels)  # ones = true
 73 | 
 74 |         #  1B: Train D on fake
 75 |         latent_samples = Variable(torch.randn(bs, z_dim))
 76 |         if cuda:
 77 |             latent_samples = latent_samples.cuda()
 78 |         # first c dimensions is the condition inputs, the last z_dim dimensions are latent samples
 79 |         d_gen_input = torch.cat([y, latent_samples], 1)
 80 |         d_fake_data = generator(d_gen_input).detach()  # detach to avoid training G on these labels
 81 |         conditional_d_fake_data = torch.cat([y, d_fake_data], 1)
 82 |         if cuda:
 83 |             conditional_d_fake_data = conditional_d_fake_data.cuda()
 84 |         d_fake_decision = discriminator(conditional_d_fake_data)
 85 |         labels = Variable(torch.zeros(bs))
 86 |         if cuda:
 87 |             labels = labels.cuda()
 88 |         d_fake_loss = criterion(d_fake_decision, labels)  # zeros = fake
 89 | 
 90 |         d_loss = d_real_loss + d_fake_loss
 91 |         d_loss.backward()
 92 | 
 93 |         d_optimizer.step()     # Only optimizes D's parameters; changes based on stored gradients from backward()
 94 | 
 95 |     for g_index in range(args.g_steps):
 96 |         # 2. Train G on D's response (but DO NOT train D on these labels)
 97 |         generator.zero_grad()
 98 | 
 99 |         latent_samples = Variable(torch.randn(bs, z_dim))
100 |         if cuda:
101 |             latent_samples = latent_samples.cuda()
102 |         g_gen_input = torch.cat([y, latent_samples], 1)
103 |         g_fake_data = generator(g_gen_input)
104 |         conditional_g_fake_data = torch.cat([y, g_fake_data], 1)
105 |         g_fake_decision = discriminator(conditional_g_fake_data)
106 |         labels = Variable(torch.ones(bs))
107 |         if cuda:
108 |             labels = labels.cuda()
109 |         g_loss = criterion(g_fake_decision, labels)  # we want to fool, so pretend it's all genuine
110 | 
111 |         g_loss.backward()
112 |         g_optimizer.step()  # Only optimizes G's parameters
113 | 
114 |     if train_iter % args.display_interval == 0:
115 |         loss_d_real = d_real_loss.data.cpu().numpy()[0] if cuda else d_real_loss.data.numpy()[0]
116 |         loss_d_fake = d_fake_loss.data.cpu().numpy()[0] if cuda else d_fake_loss.data.numpy()[0]
117 |         loss_g = g_loss.data.cpu().numpy()[0] if cuda else g_loss.data.numpy()[0]
118 | 
119 |         msg = 'Iteration {}: D_loss(real/fake): {:.6g}/{:.6g} G_loss: {:.6g}'.format(train_iter, loss_d_real, loss_d_fake, loss_g)
120 |         print(msg)
121 | 
122 |         real_samples_with_y = d_real_data.data.cpu().numpy() if cuda else d_real_data.data.numpy()
123 |         gen_samples_with_y = conditional_g_fake_data.data.cpu().numpy() if cuda else conditional_g_fake_data.data.numpy()
124 |         if args.no_display:
125 |             visualizer.draw(real_samples_with_y, gen_samples_with_y, msg, show=False)
126 |         else:
127 |             visualizer.draw(real_samples_with_y, gen_samples_with_y, msg)
128 | 
129 |         if args.export:
130 |             filename = args.input_path.split(os.sep)[-1]
131 |             output_dir = 'cgan_training_{}'.format(filename)
132 |             os.system('mkdir -p {}'.format(output_dir))
133 |             export_filepath = os.sep.join([output_dir, 'iter_{:0>6d}.png'.format(train_iter)])
134 |             visualizer.savefig(export_filepath)
135 | 
136 | if not args.no_display:
137 |     visualizer.show()
138 | 


--------------------------------------------------------------------------------
/random_bonus/gan_n_cgan_2d_example/gan_demo.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # Generative Adversarial Networks (GAN) example with 2D samples in PyTorch.
  3 | import os
  4 | from skimage import io
  5 | import torch
  6 | import torch.nn as nn
  7 | from torch.autograd import Variable
  8 | from sampler import generate_lut, sample_2d
  9 | from visualizer import GANDemoVisualizer
 10 | from argparser import parse_args
 11 | from networks import SimpleMLP
 12 | 
 13 | DIMENSION = 2
 14 | 
 15 | args = parse_args()
 16 | cuda = False if args.cpu else True
 17 | bs = args.batch_size
 18 | z_dim = args.z_dim
 19 | 
 20 | density_img = io.imread(args.input_path, True)
 21 | lut_2d = generate_lut(density_img)
 22 | 
 23 | visualizer = GANDemoVisualizer('GAN 2D Example Visualization of {}'.format(args.input_path))
 24 | 
 25 | generator = SimpleMLP(input_size=z_dim, hidden_size=args.g_hidden_size, output_size=DIMENSION)
 26 | discriminator = SimpleMLP(input_size=DIMENSION, hidden_size=args.d_hidden_size, output_size=1)
 27 | 
 28 | if cuda:
 29 |     generator.cuda()
 30 |     discriminator.cuda()
 31 | criterion = nn.BCELoss()
 32 | 
 33 | d_optimizer = args.optimizer(discriminator.parameters(), lr=args.d_lr)
 34 | g_optimizer = args.optimizer(generator.parameters(), lr=args.d_lr)
 35 | 
 36 | for train_iter in range(args.iterations):
 37 |     for d_index in range(args.d_steps):
 38 |         # 1. Train D on real+fake
 39 |         discriminator.zero_grad()
 40 | 
 41 |         #  1A: Train D on real
 42 |         real_samples = sample_2d(lut_2d, bs)
 43 |         d_real_data = Variable(torch.Tensor(real_samples))
 44 |         if cuda:
 45 |             d_real_data = d_real_data.cuda()
 46 |         d_real_decision = discriminator(d_real_data)
 47 |         labels = Variable(torch.ones(bs))
 48 |         if cuda:
 49 |             labels = labels.cuda()
 50 |         d_real_loss = criterion(d_real_decision, labels)  # ones = true
 51 | 
 52 |         #  1B: Train D on fake
 53 |         latent_samples = torch.randn(bs, z_dim)
 54 |         d_gen_input = Variable(latent_samples)
 55 |         if cuda:
 56 |             d_gen_input = d_gen_input.cuda()
 57 |         d_fake_data = generator(d_gen_input).detach()  # detach to avoid training G on these labels
 58 |         d_fake_decision = discriminator(d_fake_data)
 59 |         labels = Variable(torch.zeros(bs))
 60 |         if cuda:
 61 |             labels = labels.cuda()
 62 |         d_fake_loss = criterion(d_fake_decision, labels)  # zeros = fake
 63 | 
 64 |         d_loss = d_real_loss + d_fake_loss
 65 |         d_loss.backward()
 66 | 
 67 |         d_optimizer.step()     # Only optimizes D's parameters; changes based on stored gradients from backward()
 68 | 
 69 |     for g_index in range(args.g_steps):
 70 |         # 2. Train G on D's response (but DO NOT train D on these labels)
 71 |         generator.zero_grad()
 72 | 
 73 |         latent_samples = torch.randn(bs, z_dim)
 74 |         g_gen_input = Variable(latent_samples)
 75 |         if cuda:
 76 |             g_gen_input = g_gen_input.cuda()
 77 |         g_fake_data = generator(g_gen_input)
 78 |         g_fake_decision = discriminator(g_fake_data)
 79 |         labels = Variable(torch.ones(bs))
 80 |         if cuda:
 81 |             labels = labels.cuda()
 82 |         g_loss = criterion(g_fake_decision, labels)  # we want to fool, so pretend it's all genuine
 83 | 
 84 |         g_loss.backward()
 85 |         g_optimizer.step()  # Only optimizes G's parameters
 86 | 
 87 |     if train_iter % args.display_interval == 0:
 88 |         loss_d_real = d_real_loss.data.cpu().numpy()[0] if cuda else d_real_loss.data.numpy()[0]
 89 |         loss_d_fake = d_fake_loss.data.cpu().numpy()[0] if cuda else d_fake_loss.data.numpy()[0]
 90 |         loss_g = g_loss.data.cpu().numpy()[0] if cuda else g_loss.data.numpy()[0]
 91 | 
 92 |         msg = 'Iteration {}: D_loss(real/fake): {:.6g}/{:.6g} G_loss: {:.6g}'.format(train_iter, loss_d_real, loss_d_fake, loss_g)
 93 |         print(msg)
 94 | 
 95 |         gen_samples = g_fake_data.data.cpu().numpy() if cuda else g_fake_data.data.numpy()
 96 | 
 97 |         if args.no_display:
 98 |             visualizer.draw(real_samples, gen_samples, msg, show=False)
 99 |         else:
100 |             visualizer.draw(real_samples, gen_samples, msg)
101 | 
102 |         if args.export:
103 |             filename = args.input_path.split(os.sep)[-1]
104 |             output_dir = 'gan_training_{}'.format(filename[:filename.rfind('.')])
105 |             os.system('mkdir -p {}'.format(output_dir))
106 |             export_filepath = os.sep.join([output_dir, 'iter_{:0>6d}.png'.format(train_iter)])
107 |             visualizer.savefig(export_filepath)
108 | 
109 | if not args.no_display:
110 |     visualizer.show()
111 | 


--------------------------------------------------------------------------------
/random_bonus/gan_n_cgan_2d_example/inputs/U.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yeyun111/dlcv_for_beginners/080c7d3bbb4a68e4fb79e33231ccc666ada16dcc/random_bonus/gan_n_cgan_2d_example/inputs/U.jpg


--------------------------------------------------------------------------------
/random_bonus/gan_n_cgan_2d_example/inputs/Z.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yeyun111/dlcv_for_beginners/080c7d3bbb4a68e4fb79e33231ccc666ada16dcc/random_bonus/gan_n_cgan_2d_example/inputs/Z.jpg


--------------------------------------------------------------------------------
/random_bonus/gan_n_cgan_2d_example/inputs/batman.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yeyun111/dlcv_for_beginners/080c7d3bbb4a68e4fb79e33231ccc666ada16dcc/random_bonus/gan_n_cgan_2d_example/inputs/batman.jpg


--------------------------------------------------------------------------------
/random_bonus/gan_n_cgan_2d_example/inputs/binary.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yeyun111/dlcv_for_beginners/080c7d3bbb4a68e4fb79e33231ccc666ada16dcc/random_bonus/gan_n_cgan_2d_example/inputs/binary.jpg


--------------------------------------------------------------------------------
/random_bonus/gan_n_cgan_2d_example/inputs/binary/0.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yeyun111/dlcv_for_beginners/080c7d3bbb4a68e4fb79e33231ccc666ada16dcc/random_bonus/gan_n_cgan_2d_example/inputs/binary/0.jpg


--------------------------------------------------------------------------------
/random_bonus/gan_n_cgan_2d_example/inputs/binary/1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yeyun111/dlcv_for_beginners/080c7d3bbb4a68e4fb79e33231ccc666ada16dcc/random_bonus/gan_n_cgan_2d_example/inputs/binary/1.jpg


--------------------------------------------------------------------------------
/random_bonus/gan_n_cgan_2d_example/inputs/circle.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yeyun111/dlcv_for_beginners/080c7d3bbb4a68e4fb79e33231ccc666ada16dcc/random_bonus/gan_n_cgan_2d_example/inputs/circle.jpg


--------------------------------------------------------------------------------
/random_bonus/gan_n_cgan_2d_example/inputs/dumbbell.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yeyun111/dlcv_for_beginners/080c7d3bbb4a68e4fb79e33231ccc666ada16dcc/random_bonus/gan_n_cgan_2d_example/inputs/dumbbell.jpg


--------------------------------------------------------------------------------
/random_bonus/gan_n_cgan_2d_example/inputs/penta.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yeyun111/dlcv_for_beginners/080c7d3bbb4a68e4fb79e33231ccc666ada16dcc/random_bonus/gan_n_cgan_2d_example/inputs/penta.jpg


--------------------------------------------------------------------------------
/random_bonus/gan_n_cgan_2d_example/inputs/penta/0.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yeyun111/dlcv_for_beginners/080c7d3bbb4a68e4fb79e33231ccc666ada16dcc/random_bonus/gan_n_cgan_2d_example/inputs/penta/0.jpg


--------------------------------------------------------------------------------
/random_bonus/gan_n_cgan_2d_example/inputs/penta/1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yeyun111/dlcv_for_beginners/080c7d3bbb4a68e4fb79e33231ccc666ada16dcc/random_bonus/gan_n_cgan_2d_example/inputs/penta/1.jpg


--------------------------------------------------------------------------------
/random_bonus/gan_n_cgan_2d_example/inputs/penta/2.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yeyun111/dlcv_for_beginners/080c7d3bbb4a68e4fb79e33231ccc666ada16dcc/random_bonus/gan_n_cgan_2d_example/inputs/penta/2.jpg


--------------------------------------------------------------------------------
/random_bonus/gan_n_cgan_2d_example/inputs/penta/3.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yeyun111/dlcv_for_beginners/080c7d3bbb4a68e4fb79e33231ccc666ada16dcc/random_bonus/gan_n_cgan_2d_example/inputs/penta/3.jpg


--------------------------------------------------------------------------------
/random_bonus/gan_n_cgan_2d_example/inputs/penta/4.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yeyun111/dlcv_for_beginners/080c7d3bbb4a68e4fb79e33231ccc666ada16dcc/random_bonus/gan_n_cgan_2d_example/inputs/penta/4.jpg


--------------------------------------------------------------------------------
/random_bonus/gan_n_cgan_2d_example/inputs/random.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yeyun111/dlcv_for_beginners/080c7d3bbb4a68e4fb79e33231ccc666ada16dcc/random_bonus/gan_n_cgan_2d_example/inputs/random.jpg


--------------------------------------------------------------------------------
/random_bonus/gan_n_cgan_2d_example/inputs/triangle.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yeyun111/dlcv_for_beginners/080c7d3bbb4a68e4fb79e33231ccc666ada16dcc/random_bonus/gan_n_cgan_2d_example/inputs/triangle.jpg


--------------------------------------------------------------------------------
/random_bonus/gan_n_cgan_2d_example/inputs/vortex.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yeyun111/dlcv_for_beginners/080c7d3bbb4a68e4fb79e33231ccc666ada16dcc/random_bonus/gan_n_cgan_2d_example/inputs/vortex.jpg


--------------------------------------------------------------------------------
/random_bonus/gan_n_cgan_2d_example/inputs/vortex/0.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yeyun111/dlcv_for_beginners/080c7d3bbb4a68e4fb79e33231ccc666ada16dcc/random_bonus/gan_n_cgan_2d_example/inputs/vortex/0.jpg


--------------------------------------------------------------------------------
/random_bonus/gan_n_cgan_2d_example/inputs/vortex/1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yeyun111/dlcv_for_beginners/080c7d3bbb4a68e4fb79e33231ccc666ada16dcc/random_bonus/gan_n_cgan_2d_example/inputs/vortex/1.jpg


--------------------------------------------------------------------------------
/random_bonus/gan_n_cgan_2d_example/inputs/vortex/2.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yeyun111/dlcv_for_beginners/080c7d3bbb4a68e4fb79e33231ccc666ada16dcc/random_bonus/gan_n_cgan_2d_example/inputs/vortex/2.jpg


--------------------------------------------------------------------------------
/random_bonus/gan_n_cgan_2d_example/inputs/zig.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yeyun111/dlcv_for_beginners/080c7d3bbb4a68e4fb79e33231ccc666ada16dcc/random_bonus/gan_n_cgan_2d_example/inputs/zig.jpg


--------------------------------------------------------------------------------
/random_bonus/gan_n_cgan_2d_example/networks.py:
--------------------------------------------------------------------------------
 1 | import torch.nn as nn
 2 | import torch.nn.functional as F
 3 | 
 4 | 
 5 | class SimpleMLP(nn.Module):
 6 |     def __init__(self, input_size, hidden_size, output_size):
 7 |         super(SimpleMLP, self).__init__()
 8 |         self.map1 = nn.Linear(input_size, hidden_size)
 9 |         self.map2 = nn.Linear(hidden_size, output_size)
10 | 
11 |     def forward(self, x):
12 |         x = F.leaky_relu(self.map1(x), 0.1)
13 |         return F.sigmoid(self.map2(x))
14 | 
15 | class DeepMLP(nn.Module):
16 |     def __init__(self, input_size, hidden_size, output_size):
17 |         super(DeepMLP, self).__init__()
18 |         self.map1 = nn.Linear(input_size, hidden_size)
19 |         self.map2 = nn.Linear(hidden_size, hidden_size)
20 |         self.map3 = nn.Linear(hidden_size, output_size)
21 | 
22 |     def forward(self, x):
23 |         x = F.leaky_relu(self.map1(x), 0.1)
24 |         x = F.leaky_relu(self.map2(x), 0.1)
25 |         return F.sigmoid(self.map3(x))
26 | 


--------------------------------------------------------------------------------
/random_bonus/gan_n_cgan_2d_example/sampler.py:
--------------------------------------------------------------------------------
 1 | from functools import partial
 2 | import numpy
 3 | from skimage import transform
 4 | 
 5 | EPS = 1e-66
 6 | RESOLUTION = 0.001
 7 | num_grids = int(1/RESOLUTION+0.5)
 8 | 
 9 | def generate_lut(img):
10 |     """
11 |     linear approximation of CDF & marginal
12 |     :param density_img:
13 |     :return: lut_y, lut_x
14 |     """
15 |     density_img = transform.resize(img, (num_grids, num_grids))
16 |     x_accumlation = numpy.sum(density_img, axis=1)
17 |     sum_xy = numpy.sum(x_accumlation)
18 |     y_cdf_of_accumulated_x = [[0., 0.]]
19 |     accumulated = 0
20 |     for ir, i in enumerate(range(num_grids-1, -1, -1)):
21 |         accumulated += x_accumlation[i]
22 |         if accumulated == 0:
23 |             y_cdf_of_accumulated_x[0][0] = float(ir+1)/float(num_grids)
24 |         elif EPS < accumulated < sum_xy - EPS:
25 |             y_cdf_of_accumulated_x.append([float(ir+1)/float(num_grids), accumulated/sum_xy])
26 |         else:
27 |             break
28 |     y_cdf_of_accumulated_x.append([float(ir+1)/float(num_grids), 1.])
29 |     y_cdf_of_accumulated_x = numpy.array(y_cdf_of_accumulated_x)
30 | 
31 |     x_cdfs = []
32 |     for j in range(num_grids):
33 |         x_freq = density_img[num_grids-j-1]
34 |         sum_x = numpy.sum(x_freq)
35 |         x_cdf = [[0., 0.]]
36 |         accumulated = 0
37 |         for i in range(num_grids):
38 |             accumulated += x_freq[i]
39 |             if accumulated == 0:
40 |                 x_cdf[0][0] = float(i+1) / float(num_grids)
41 |             elif EPS < accumulated < sum_xy - EPS:
42 |                 x_cdf.append([float(i+1)/float(num_grids), accumulated/sum_x])
43 |             else:
44 |                 break
45 |         x_cdf.append([float(i+1)/float(num_grids), 1.])
46 |         if accumulated > EPS:
47 |             x_cdf = numpy.array(x_cdf)
48 |             x_cdfs.append(x_cdf)
49 |         else:
50 |             x_cdfs.append(None)
51 | 
52 |     y_lut = partial(numpy.interp, xp=y_cdf_of_accumulated_x[:, 1], fp=y_cdf_of_accumulated_x[:, 0])
53 |     x_luts = [partial(numpy.interp, xp=x_cdfs[i][:, 1], fp=x_cdfs[i][:, 0]) if x_cdfs[i] is not None else None for i in range(num_grids)]
54 | 
55 |     return y_lut, x_luts
56 | 
57 | def sample_2d(lut, N):
58 |     y_lut, x_luts = lut
59 |     u_rv = numpy.random.random((N, 2))
60 |     samples = numpy.zeros(u_rv.shape)
61 |     for i, (x, y) in enumerate(u_rv):
62 |         ys = y_lut(y)
63 |         x_bin = int(ys/RESOLUTION)
64 |         xs = x_luts[x_bin](x)
65 |         samples[i][0] = xs
66 |         samples[i][1] = ys
67 | 
68 |     return samples
69 | 
70 | if __name__ == '__main__':
71 |     from skimage import io
72 |     density_img = io.imread('inputs/random.jpg', True)
73 |     lut_2d = generate_lut(density_img)
74 |     samples = sample_2d(lut_2d, 10000)
75 | 
76 |     from matplotlib import pyplot
77 |     fig, (ax0, ax1) = pyplot.subplots(ncols=2, figsize=(9, 4))
78 |     fig.canvas.set_window_title('Test 2D Sampling')
79 |     ax0.imshow(density_img, cmap='gray')
80 |     ax0.xaxis.set_major_locator(pyplot.NullLocator())
81 |     ax0.yaxis.set_major_locator(pyplot.NullLocator())
82 | 
83 |     ax1.axis('equal')
84 |     ax1.axis([0, 1, 0, 1])
85 |     ax1.plot(samples[:, 0], samples[:, 1], 'k,')
86 |     pyplot.show()
87 | 


--------------------------------------------------------------------------------
/random_bonus/gan_n_cgan_2d_example/visualizer.py:
--------------------------------------------------------------------------------
  1 | from itertools import cycle
  2 | import numpy
  3 | from matplotlib import pyplot
  4 | from skimage import filters
  5 | 
  6 | 
  7 | class GANDemoVisualizer:
  8 | 
  9 |     def __init__(self, title, l_kde=100, bw_kde=5):
 10 |         self.title = title
 11 |         self.l_kde = l_kde
 12 |         self.resolution = 1. / self.l_kde
 13 |         self.bw_kde_ = bw_kde
 14 |         self.fig, self.axes = pyplot.subplots(ncols=3, figsize=(13.5, 4))
 15 |         self.fig.canvas.set_window_title(self.title)
 16 | 
 17 |     def draw(self, real_samples, gen_samples, msg=None, cmap='hot', pause_time=0.05, max_sample_size=500, show=True):
 18 |         if msg:
 19 |             self.fig.suptitle(msg)
 20 |         ax0, ax1, ax2 = self.axes
 21 | 
 22 |         self.draw_samples(ax0, 'real and generated samples', real_samples, gen_samples, max_sample_size)
 23 |         self.draw_density_estimation(ax1, 'density: real samples', real_samples, cmap)
 24 |         self.draw_density_estimation(ax2, 'density: generated samples', gen_samples, cmap)
 25 | 
 26 |         if show:
 27 |             pyplot.draw()
 28 |             pyplot.pause(pause_time)
 29 | 
 30 |     @staticmethod
 31 |     def draw_samples(axis, title, real_samples, generated_samples, max_sample_size):
 32 |         axis.clear()
 33 |         axis.set_xlabel(title)
 34 |         axis.plot(generated_samples[:max_sample_size, 0], generated_samples[:max_sample_size, 1], '.')
 35 |         axis.plot(real_samples[:max_sample_size, 0], real_samples[:max_sample_size, 1], 'kx')
 36 |         axis.axis('equal')
 37 |         axis.axis([0, 1, 0, 1])
 38 | 
 39 |     def draw_density_estimation(self, axis, title, samples, cmap):
 40 |         axis.clear()
 41 |         axis.set_xlabel(title)
 42 |         density_estimation = numpy.zeros((self.l_kde, self.l_kde))
 43 |         for x, y in samples:
 44 |             if 0 < x < 1 and 0 < y < 1:
 45 |                 density_estimation[int((1-y) / self.resolution)][int(x / self.resolution)] += 1
 46 |         density_estimation = filters.gaussian(density_estimation, self.bw_kde_)
 47 |         axis.imshow(density_estimation, cmap=cmap)
 48 |         axis.xaxis.set_major_locator(pyplot.NullLocator())
 49 |         axis.yaxis.set_major_locator(pyplot.NullLocator())
 50 | 
 51 |     def savefig(self, filepath):
 52 |         self.fig.savefig(filepath)
 53 | 
 54 |     @staticmethod
 55 |     def show():
 56 |         pyplot.show()
 57 | 
 58 | 
 59 | class CGANDemoVisualizer(GANDemoVisualizer):
 60 | 
 61 |     def __init__(self, title, l_kde=100, bw_kde=5):
 62 |         GANDemoVisualizer.__init__(self, title, l_kde, bw_kde)
 63 | 
 64 |     def draw(self, real_samples, gen_samples, msg=None, cmap='hot', pause_time=0.05, max_sample_size=500, show=True):
 65 |         if msg:
 66 |             self.fig.suptitle(msg)
 67 |         ax0, ax1, ax2 = self.axes
 68 | 
 69 |         self.draw_samples(ax0, 'real and generated samples', real_samples, gen_samples, max_sample_size)
 70 |         self.draw_density_estimation(ax1, 'density: real samples', real_samples[:, -2:], cmap)
 71 |         self.draw_density_estimation(ax2, 'density: generated samples', gen_samples[:, -2:], cmap)
 72 | 
 73 |         if show:
 74 |             pyplot.draw()
 75 |             pyplot.pause(pause_time)
 76 | 
 77 |     def draw_samples(self, axis, title, real_samples, generated_samples, max_sample_size):
 78 |         axis.clear()
 79 |         axis.set_xlabel(title)
 80 |         g_samples = numpy.copy(generated_samples)
 81 |         r_samples = numpy.copy(real_samples)
 82 |         numpy.random.shuffle(g_samples)
 83 |         numpy.random.shuffle(r_samples)
 84 |         g_samples = g_samples[:max_sample_size, :]
 85 |         r_samples = r_samples[:max_sample_size, :]
 86 |         color_iter = cycle('bgrcmy')
 87 |         for i in range(g_samples.shape[1]-2):
 88 |             c = next(color_iter)
 89 |             samples = g_samples[g_samples[:, i] > 0, :][:, -2:]
 90 |             axis.plot(samples[:, 0], samples[:, 1], c+'.', markersize=5)
 91 |             samples = r_samples[r_samples[:, i] > 0, :][:, -2:]
 92 |             axis.plot(samples[:, 0], samples[:, 1], c+'x', markersize=5)
 93 |         axis.axis('equal')
 94 |         axis.axis([0, 1, 0, 1])
 95 | 
 96 |     def savefig(self, filepath):
 97 |         self.fig.savefig(filepath)
 98 | 
 99 |     @staticmethod
100 |     def show():
101 |         pyplot.show()
102 | 


--------------------------------------------------------------------------------
/random_bonus/generate_mosaic_for_porno_images/README.md:
--------------------------------------------------------------------------------
 1 | ## 给XX图片生成马赛克
 2 | Blog: 
 3 | [提高驾驶技术：用GAN去除(爱情)动作片中的马赛克和衣服](https://zhuanlan.zhihu.com/p/27199954)
 4 | 
 5 | ## step 1
 6 | 下载yahoo的open_nsfw(Not Safe For Work)模型
 7 | > ./clone_open_nsfw.sh  
 8 | 
 9 | **注**：原版open_nsfw中执行global pooling的是最后一层kernel size为7的pooling层，为了实现输入大小可变，让马赛克更精细一些，deploy_global_pooling.prototxt中将
10 | > kernel_size: 7   
11 | 
12 | 改为了
13 | > global_pooling: true
14 | 
15 | ## step 2
16 | 
17 | > python gen_mosaic.py [input dir] [output dir]  
18 | 
19 | 代码基于第10章的激活响应可视化：[visualize_activation.py](https://github.com/frombeijingwithlove/dlcv_for_beginners/blob/master/chap10/visualize_activation.py) 修改而来。
20 | 
21 | ## step 3 (optional)
22 | 用于pix2pix训练的图片如果是长宽比较大，并且XX区域大都位于画面中心，可以考虑中央裁剪并重新缩放到256x256：
23 | > python crop_n_resize [dir_0] [dir_1] ... [dir_n] 256
24 | 


--------------------------------------------------------------------------------
/random_bonus/generate_mosaic_for_porno_images/clone_open_nsfw.sh:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 | git clone https://github.com/yahoo/open_nsfw.git
3 | 


--------------------------------------------------------------------------------
/random_bonus/generate_mosaic_for_porno_images/crop_n_resize.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import cv2
 3 | import sys
 4 | 
 5 | folders = sys.argv[1:-1]
 6 | length = int(sys.argv[-1])
 7 | 
 8 | cnt = 0
 9 | for folder in folders:
10 |     print('scanning {} ...'.format(folder))
11 |     folder = folder.rstrip('/')
12 |     files = os.listdir(folder)
13 | 
14 |     for img_file in files:
15 |         filepath = '{}/{}'.format(folder, img_file)
16 |         try:
17 |             img = cv2.imread(filepath)
18 |             h, w, c = img.shape
19 |         except:
20 |             print('problematic file:', filepath)
21 |             continue
22 | 
23 |         if img is None:
24 |             print('problematic file:', filepath)
25 |             continue
26 |         elif h == length and w == length:
27 |             continue
28 |         else:
29 |             if h > w:
30 |                 dl = int((h-w)/2)
31 |                 if dl > 0:
32 |                     img = img[dl:-dl, ...]
33 |             else:
34 |                 dl = int((w-h)/2)
35 |                 if dl > 0:
36 |                     img = img[:, dl:-dl, ...]
37 |             img = cv2.resize(img, (length, length))
38 |             cv2.imwrite(filepath, img)
39 | 
40 |         cnt += 1
41 |         if cnt % 100 == 0:
42 |             print('{} images processed!'.format(cnt))
43 | 
44 | print('Done!')
45 | 
46 | 


--------------------------------------------------------------------------------
/random_bonus/generate_mosaic_for_porno_images/gen_mosaic.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | import os
 3 | import numpy as np
 4 | import cv2
 5 | sys.path.append('/path/to/caffe/python')
 6 | import caffe
 7 | 
 8 | WEIGHTS_FILE = 'open_nsfw/nsfw_model/resnet_50_1by2_nsfw.caffemodel'
 9 | DEPLOY_FILE = 'deploy_global_pooling.prototxt'
10 | FEATURE_MAPS = 'eltwise_stage3_block2'
11 | FC_LAYER = 'fc_nsfw'
12 | 
13 | SHORT_EDGE = 320
14 | MOSAIC_RANGE = [5, 15]
15 | 
16 | #caffe.set_mode_cpu()
17 | net = caffe.Net(DEPLOY_FILE, WEIGHTS_FILE, caffe.TEST)
18 | input_dir = sys.argv[1]
19 | output_dir = sys.argv[2]
20 | os.system('mkdir -p {}'.format(output_dir))
21 | 
22 | porno = 1
23 | mask_th = 0.5
24 | 
25 | filenames = os.listdir(input_dir)
26 | for i, filename in enumerate(filenames):
27 |     filepath = os.sep.join([input_dir, filename])
28 | 
29 |     image = cv2.imread(filepath)[:, :, :3]
30 |     height, width = image.shape[:2]
31 | 
32 |     short_edge_image = min(image.shape[:2])
33 |     scale_ratio = float(SHORT_EDGE) / float(short_edge_image)
34 |     if scale_ratio < 1:
35 |         transformed_image = cv2.resize(image, (0, 0), fx=scale_ratio, fy=scale_ratio)
36 |     else:
37 |         transformed_image = np.copy(image)
38 |     transformed_image = transformed_image.astype(np.float32)
39 |     transformed_image -= np.array([104., 117., 123.])
40 |     transformed_image = np.transpose(transformed_image, (2, 0, 1))
41 | 
42 |     net.blobs['data'].reshape(1, 3, transformed_image.shape[1], transformed_image.shape[2])
43 |     net.blobs['data'].data[...] = transformed_image
44 | 
45 |     mosaic_size = np.random.random_integers(MOSAIC_RANGE[0], MOSAIC_RANGE[1]+1, 1)
46 |     scale_mosaic = 1 / float(mosaic_size)
47 |     mosaic_image = cv2.resize(image, (0, 0), fx=scale_mosaic, fy=scale_mosaic)
48 |     mosaic_image = cv2.resize(mosaic_image, (width, height), interpolation=cv2.INTER_NEAREST)
49 | 
50 |     net.forward()
51 |     feature_maps = net.blobs[FEATURE_MAPS].data[0]
52 |     fc_params = net.params[FC_LAYER]
53 |     fc_w = fc_params[0].data[porno]
54 | 
55 |     activation_map = np.zeros_like(feature_maps[0])
56 |     for feature_map, w in zip(feature_maps, fc_w):
57 |         activation_map += feature_map * w
58 | 
59 |     activation_map = cv2.resize(activation_map, (width, height), interpolation=cv2.INTER_CUBIC)
60 |     activation_map -= activation_map.min()
61 |     activation_map /= activation_map.max()
62 |     mask = np.zeros(activation_map.shape)
63 |     mask[activation_map > mask_th] = 1
64 |     image_with_mosaic = np.copy(image)
65 |     image_with_mosaic[mask > mask_th] = mosaic_image[mask > mask_th]
66 | 
67 |     output_filepath = os.sep.join([output_dir, filename])
68 |     cv2.imwrite(output_filepath, image_with_mosaic)
69 | 
70 |     if (i+1) % 100 == 0:
71 |         print('{} images processed!'.format(i+1))
72 |         
73 |     # uncomment the following for visualization
74 |     #vis_img = np.hstack([image, image_with_mosaic])
75 |     #cv2.imshow('Mosaic Visualization', vis_img)
76 |     #cv2.waitKey()
77 | 
78 | print('Done!')
79 | 
80 | 


--------------------------------------------------------------------------------
/random_bonus/great-circle-interp/README.md:
--------------------------------------------------------------------------------
1 | ## Interpolating Latent Space of GAN with Great Circle
2 | 
3 | If you can read Chinese please refer to [行走在GAN的Latent Space](https://zhuanlan.zhihu.com/p/32135185)
4 | 
5 | pretrained model to generate comic avatars can be downloaded at: https://github.com/frombeijingwithlove/dlcv_book_pretrained_caffe_models/blob/master/random_bonus/great-circle-interp/netG_epoch_49.pth


--------------------------------------------------------------------------------
/random_bonus/great-circle-interp/distance-experiment.py:
--------------------------------------------------------------------------------
 1 | import numpy
 2 | from matplotlib import pyplot
 3 | 
 4 | 
 5 | def dist_o2l(p1, p2):
 6 |     # distance from origin to the line defined by (p1, p2)
 7 |     p12 = p2 - p1
 8 |     u12 = p12 / numpy.linalg.norm(p12)
 9 |     l_pp = numpy.dot(-p1, u12)
10 |     pp = l_pp*u12 + p1
11 |     return numpy.linalg.norm(pp)
12 | 
13 | dim = 100
14 | N = 100000
15 | 
16 | rvs = []
17 | dists2l = []
18 | for i in range(N):
19 |     u = numpy.random.randn(dim)
20 |     v = numpy.random.randn(dim)
21 |     rvs.extend([u, v])
22 |     dists2l.append(dist_o2l(u, v))
23 | 
24 | dists = [numpy.linalg.norm(x) for x in rvs]
25 | 
26 | print('Distances to samples, mean: {}, std: {}'.format(numpy.mean(dists), numpy.std(dists)))
27 | print('Distances to lines, mean: {}, std: {}'.format(numpy.mean(dists2l), numpy.std(dists2l)))
28 | 
29 | fig, (ax0, ax1) = pyplot.subplots(ncols=2, figsize=(11, 5))
30 | ax0.hist(dists, 100, normed=1, color='g')
31 | ax1.hist(dists2l, 100, normed=1, color='b')
32 | pyplot.show()
33 | 


--------------------------------------------------------------------------------
/random_bonus/great-circle-interp/latent-walk-great-circle.py:
--------------------------------------------------------------------------------
 1 | from __future__ import print_function
 2 | import argparse
 3 | import os
 4 | import numpy
 5 | from scipy.stats import chi
 6 | import torch.utils.data
 7 | from torch.autograd import Variable
 8 | from networks import NetG
 9 | from PIL import Image
10 | 
11 | parser = argparse.ArgumentParser()
12 | parser.add_argument('--nz', type=int, default=100, help='size of the latent z vector')
13 | parser.add_argument('--niter', type=int, default=10, help='how many paths')
14 | parser.add_argument('--n_steps', type=int, default=23, help='steps to walk')
15 | parser.add_argument('--ngf', type=int, default=64)
16 | parser.add_argument('--ngpu', type=int, default=1, help='number of GPUs to use')
17 | parser.add_argument('--netG', default='netG_epoch_49.pth', help="trained params for G")
18 | 
19 | opt = parser.parse_args()
20 | output_dir = 'gcircle-walk'
21 | os.system('mkdir -p {}'.format(output_dir))
22 | print(opt)
23 | 
24 | ngpu = int(opt.ngpu)
25 | nz = int(opt.nz)
26 | ngf = int(opt.ngf)
27 | nc = 3
28 | 
29 | netG = NetG(ngf, nz, nc, ngpu)
30 | netG.load_state_dict(torch.load(opt.netG, map_location=lambda storage, loc: storage))
31 | netG.eval()
32 | print(netG)
33 | 
34 | for j in range(opt.niter):
35 |     # step 1
36 |     r = chi.rvs(df=100)
37 | 
38 |     # step 2
39 |     u = numpy.random.normal(0, 1, nz)
40 |     w = numpy.random.normal(0, 1, nz)
41 |     u /= numpy.linalg.norm(u)
42 |     w /= numpy.linalg.norm(w)
43 | 
44 |     v = w - numpy.dot(u, w) * u
45 |     v /= numpy.linalg.norm(v)
46 | 
47 |     ndimgs = []
48 |     for i in range(opt.n_steps):
49 |         t = float(i) / float(opt.n_steps)
50 |         # step 3
51 |         z = numpy.cos(t * 2 * numpy.pi) * u + numpy.sin(t * 2 * numpy.pi) * v
52 |         z *= r
53 | 
54 |         noise_t = z.reshape((1, nz, 1, 1))
55 |         noise_t = torch.FloatTensor(noise_t)
56 |         noisev = Variable(noise_t)
57 |         fake = netG(noisev)
58 |         timg = fake[0]
59 |         timg = timg.data
60 | 
61 |         timg.add_(1).div_(2)
62 |         ndimg = timg.mul(255).clamp(0, 255).byte().permute(1, 2, 0).numpy()
63 |         ndimgs.append(ndimg)
64 | 
65 |     print('exporting {} ...'.format(j))
66 |     ndimg = numpy.hstack(ndimgs)
67 | 
68 |     im = Image.fromarray(ndimg)
69 |     filename = os.sep.join([output_dir, 'gc-{:0>6d}.png'.format(j)])
70 |     im.save(filename)
71 | 


--------------------------------------------------------------------------------
/random_bonus/great-circle-interp/latent-walk-slerp-vs-lerp.py:
--------------------------------------------------------------------------------
 1 | from __future__ import print_function
 2 | import argparse
 3 | import numpy
 4 | import torch.utils.data
 5 | from torch.autograd import Variable
 6 | import networks
 7 | from PIL import Image
 8 | 
 9 | parser = argparse.ArgumentParser()
10 | parser.add_argument('--nz', type=int, default=100, help='size of the latent z vector')
11 | parser.add_argument('--n_samples', type=int, default=10, help='how many images')
12 | parser.add_argument('--n_steps', type=int, default=11, help='steps for interpolation')
13 | parser.add_argument('--ngf', type=int, default=64)
14 | parser.add_argument('--ngpu', type=int, default=1, help='number of GPUs to use')
15 | parser.add_argument('--netG', default='netG_epoch_49.pth', help="path to netG")
16 | 
17 | opt = parser.parse_args()
18 | print(opt)
19 | 
20 | ngpu = int(opt.ngpu)
21 | nz = int(opt.nz)
22 | ngf = int(opt.ngf)
23 | nc = 3
24 | 
25 | netG = networks.NetG(ngf, nz, nc, ngpu)
26 | netG.eval()
27 | netG.load_state_dict(torch.load(opt.netG, map_location=lambda storage, loc: storage))
28 | print(netG)
29 | 
30 | n_steps = opt.n_steps
31 | for epoch in range(opt.n_samples):
32 |     u = numpy.random.randn(nz)
33 |     v = numpy.random.randn(nz)
34 |     lu = numpy.linalg.norm(u)
35 |     lv = numpy.linalg.norm(v)
36 |     theta = numpy.arccos(numpy.dot(u, v)/lu/lv)
37 | 
38 |     ndimgs_slerp = []
39 |     ndimgs_lerp = []
40 |     for i in range(n_steps+1):
41 |         t = float(i) / float(n_steps)
42 | 
43 |         # slerp
44 |         z_slerp = numpy.sin((1 - t) * theta) / numpy.sin(theta) * u + numpy.sin(t * theta) / numpy.sin(theta) * v
45 | 
46 |         noise_t = z_slerp.reshape((1, nz, 1, 1))
47 |         noise_t = torch.FloatTensor(noise_t)
48 |         noisev = Variable(noise_t)
49 |         fake = netG(noisev)
50 |         timg = fake[0]
51 |         timg = timg.data
52 | 
53 |         timg.add_(1).div_(2)
54 |         ndimg = timg.mul(255).clamp(0, 255).byte().permute(1, 2, 0).numpy()
55 |         ndimgs_slerp.append(ndimg)
56 | 
57 |         # lerp
58 |         z_lerp = (1 - t) * u + t * v
59 | 
60 |         noise_t = z_lerp.reshape((1, nz, 1, 1))
61 |         noise_t = torch.FloatTensor(noise_t)
62 |         noisev = Variable(noise_t)
63 |         fake = netG(noisev)
64 |         timg = fake[0]
65 |         timg = timg.data
66 | 
67 |         timg.add_(1).div_(2)
68 |         ndimg = timg.mul(255).clamp(0, 255).byte().permute(1, 2, 0).numpy()
69 |         ndimgs_lerp.append(ndimg)
70 | 
71 |     print('exporting {} ...'.format(epoch))
72 | 
73 |     # export slerp result
74 |     ndimg = numpy.hstack(ndimgs_slerp)
75 |     im = Image.fromarray(ndimg)
76 |     im.save('e{:0>3d}-slerp.png'.format(epoch))
77 | 
78 |     # export lerp result
79 |     ndimg = numpy.hstack(ndimgs_lerp)
80 |     im = Image.fromarray(ndimg)
81 |     im.save('e{:0>3d}-lerp.png'.format(epoch))
82 | 


--------------------------------------------------------------------------------
/random_bonus/great-circle-interp/networks.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | import torch.nn.parallel
 4 | import torch.backends.cudnn as cudnn
 5 | 
 6 | 
 7 | class NetG(nn.Module):
 8 |     def __init__(self, ngf, nz, nc, ngpu):
 9 |         super(NetG, self).__init__()
10 |         self.ngpu = ngpu
11 |         self.main = nn.Sequential(
12 |             # input is Z, going into a convolution
13 |             nn.ConvTranspose2d(nz, ngf * 8, 4, 1, 0, bias=False),
14 |             nn.BatchNorm2d(ngf * 8),
15 |             nn.ReLU(True),
16 |             # state size. (ngf*8) x 4 x 4
17 |             nn.ConvTranspose2d(ngf * 8, ngf * 4, 4, 2, 1, bias=False),
18 |             nn.BatchNorm2d(ngf * 4),
19 |             nn.ReLU(True),
20 |             # state size. (ngf*4) x 8 x 8
21 |             nn.ConvTranspose2d(ngf * 4, ngf * 2, 4, 2, 1, bias=False),
22 |             nn.BatchNorm2d(ngf * 2),
23 |             nn.ReLU(True),
24 |             # state size. (ngf*2) x 16 x 16
25 |             nn.ConvTranspose2d(ngf * 2, ngf, 4, 2, 1, bias=False),
26 |             nn.BatchNorm2d(ngf),
27 |             nn.ReLU(True),
28 |             # state size. (ngf) x 32 x 32
29 |             nn.ConvTranspose2d(ngf, nc, 4, 2, 1, bias=False),
30 |             nn.Tanh()
31 |             # state size. (nc) x 64 x 64
32 |         )
33 | 
34 |     def forward(self, input):
35 |         if isinstance(input.data, torch.cuda.FloatTensor) and self.ngpu > 1:
36 |             output = nn.parallel.data_parallel(self.main, input, range(self.ngpu))
37 |         else:
38 |             output = self.main(input)
39 |         return output
40 | 
41 | 
42 | class NetD(nn.Module):
43 |     def __init__(self, ndf, nc, ngpu):
44 |         super(NetD, self).__init__()
45 |         self.ngpu = ngpu
46 |         self.main = nn.Sequential(
47 |             # input is (nc) x 64 x 64
48 |             nn.Conv2d(nc, ndf, 4, 2, 1, bias=False),
49 |             nn.LeakyReLU(0.2, inplace=True),
50 |             # state size. (ndf) x 32 x 32
51 |             nn.Conv2d(ndf, ndf * 2, 4, 2, 1, bias=False),
52 |             nn.BatchNorm2d(ndf * 2),
53 |             nn.LeakyReLU(0.2, inplace=True),
54 |             # state size. (ndf*2) x 16 x 16
55 |             nn.Conv2d(ndf * 2, ndf * 4, 4, 2, 1, bias=False),
56 |             nn.BatchNorm2d(ndf * 4),
57 |             nn.LeakyReLU(0.2, inplace=True),
58 |             # state size. (ndf*4) x 8 x 8
59 |             nn.Conv2d(ndf * 4, ndf * 8, 4, 2, 1, bias=False),
60 |             nn.BatchNorm2d(ndf * 8),
61 |             nn.LeakyReLU(0.2, inplace=True),
62 |             # state size. (ndf*8) x 4 x 4
63 |             nn.Conv2d(ndf * 8, 1, 4, 1, 0, bias=False),
64 |             nn.Sigmoid()
65 |         )
66 | 
67 |     def forward(self, input):
68 |         if isinstance(input.data, torch.cuda.FloatTensor) and self.ngpu > 1:
69 |             output = nn.parallel.data_parallel(self.main, input, range(self.ngpu))
70 |         else:
71 |             output = self.main(input)
72 | 
73 |         return output.view(-1, 1).squeeze(1)
74 | 


--------------------------------------------------------------------------------
/random_bonus/image-segmentation(updating)/README.md:
--------------------------------------------------------------------------------
 1 | ## Image Segmentation with Simplified & Customizable U-Net & TriangleNet by PyTorch
 2 | 
 3 | ### Step 1  
 4 | In root folder, create "train" & "val" folder containing two folders with images and segmentations (in lossless format, e.g. PNG). Default folder for images is "image", for segmentations is "segmentations". Or can be specified in config file (see example.cfg)
 5 | 
 6 | ### Step 2  
 7 | > python main.py train [path/to/root_folder] --config [path/to/configfile]
 8 | 
 9 | ## TriangleNet
10 | Compare image and label at different scale in training, refer to networks.py for more details


--------------------------------------------------------------------------------
/random_bonus/image-segmentation(updating)/argparser.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import argparse
 3 | 
 4 | 
 5 | def parse_param_file(filepath):
 6 |     with open(filepath, 'r') as f:
 7 |         kw_exprs = [x.strip() for x in f.readlines() if x.strip()]
 8 |     return eval('dict({})'.format(','.join(kw_exprs)))
 9 | 
10 | 
11 | def parse_args():
12 |     parser = argparse.ArgumentParser(
13 |         description='Simple Demo of Image Segmentation with U-Net',
14 |         formatter_class=argparse.ArgumentDefaultsHelpFormatter)
15 | 
16 |     # general options
17 |     parser.add_argument('mode',
18 |                         help='train/test')
19 |     parser.add_argument('dataroot',
20 |                         help='Directory containing training images in "images" and "segmentations" or test images')
21 |     parser.add_argument('config',
22 |                         help='Path to config file')
23 |     parser.add_argument('--cpu',
24 |                         help='Set to CPU mode', action='store_true')
25 |     parser.add_argument('--output-dir',
26 |                         help='Directory of output for both train/test',
27 |                         type=str, default='')
28 | 
29 |     # test options
30 |     parser.add_argument('--model',
31 |                         help='Path to pre-trained model',
32 |                         type=str, default='')
33 | 
34 |     args = parser.parse_args()
35 | 
36 |     params = {
37 |         # general params
38 |         'network': 'triangle',
39 |         'layers': [32, 64, 128, 256, 512],
40 |         'groups': 1, 
41 |         'color_labels': [], 
42 |         'image_width': None,
43 |         'image_height': None
44 |     }
45 | 
46 |     kwargs = parse_param_file(args.config)
47 |     
48 |     # other params specified in config file
49 |     if args.mode == 'train':
50 | 
51 |         # default: no augmentation, with batch-norm
52 | 
53 |         train_params = {
54 |             # training params
55 |             'optimizer': 'SGD',
56 |             'lr_policy': {0: 1e-4},
57 |             'momentum': 0.9,
58 |             'nesterov': True,
59 |             'batch_norm': True,
60 |             'batch_size': 4,
61 |             'val_batch_size': None,
62 |             'epochs': 24,
63 |             'print_interval': 50,
64 |             'validation_interval': 1000,
65 |             'checkpoint_interval': 10000,
66 |             'random_horizontal_flip': False,
67 |             'random_square_crop': False,
68 |             'random_crop': None,  # example: (0.81, 0.1), use 0.81 as area ratio, & 0.1 as the hw ratio variation
69 |             'random_rotation': 0,
70 |             'img_dir': 'images',
71 |             'seg_dir': 'segmentations',
72 |             'regression': False,
73 |         }
74 | 
75 |         params.update(train_params)
76 |         if params['val_batch_size'] is None:
77 |             params['val_batch_size'] = params['batch_size']
78 | 
79 |     # update params from config
80 |     for k, v in kwargs.items():
81 |         if k in params:
82 |             params[k] = v
83 | 
84 |     # set params to args
85 |     for k, v in params.items():
86 |         setattr(args, k, v)
87 | 
88 |     args.dataroot = args.dataroot.rstrip(os.sep)
89 | 
90 |     return args
91 | 


--------------------------------------------------------------------------------
/random_bonus/image-segmentation(updating)/example.cfg:
--------------------------------------------------------------------------------
 1 | unet_layers=[32, 64, 128, 256, 512]
 2 | lr_policy={0: 1, 3: 5e-1}
 3 | optimizer='Adadelta'
 4 | batch_size=4
 5 | epochs=100
 6 | color_labels=[(i, i, i) for i in range(18)]
 7 | image_width=256
 8 | image_height=384
 9 | random_horizontal_flip=True
10 | random_square_crop=False
11 | random_crop=(0.85, 0.1)
12 | random_rotation=5
13 | validation_interval=5000
14 | checkpoint_interval=10000
15 | print_interval=100
16 | seg_dir='profiles'
17 | 


--------------------------------------------------------------------------------
/random_bonus/image-segmentation(updating)/loss_visualizer.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import sys
 3 | import numpy
 4 | from matplotlib import pyplot
 5 | 
 6 | LOG_FILENAME = 'log.txt'
 7 | TRAIN_LOSS_KEYWORD = '| Training loss: '
 8 | VAL_LOSS_KEYWORD = '| Validation loss: '
 9 | ITER_INDEX = 4
10 | LOSS_INDEX = -1
11 | MIOU_INDEX = -5
12 | MPA_INDEX = -9
13 | 
14 | 
15 | def parse_log(filepath):
16 |     with open(filepath, 'r') as f:
17 |         train_curve = []
18 |         val_curve = []
19 |         line = f.readline()
20 |         while line:
21 |             if TRAIN_LOSS_KEYWORD in line or VAL_LOSS_KEYWORD in line:
22 |                 tokens = line.split()
23 |                 measure = [int(tokens[ITER_INDEX]), float(tokens[LOSS_INDEX])]
24 |                 if TRAIN_LOSS_KEYWORD in line:
25 |                     train_curve.append(measure)
26 |                 else:
27 |                     measure.extend([float(tokens[MPA_INDEX]), float(tokens[MIOU_INDEX])])
28 |                     val_curve.append(measure)
29 | 
30 |             line = f.readline()
31 |     return train_curve, val_curve
32 | 
33 | root_dir = sys.argv[1].rstrip(os.sep)
34 | keyword = sys.argv[2] if len(sys.argv) > 2 else None
35 | 
36 | groups = [x for x in os.listdir(root_dir) if os.path.isdir(x) and (True if keyword is None else (keyword in x))]
37 | 
38 | for group in groups:
39 |     log_path = os.sep.join([root_dir, group, LOG_FILENAME])
40 |     train_loss, val_loss = parse_log(log_path)
41 |     train_loss = numpy.array(train_loss)
42 |     val_loss = numpy.array(val_loss)
43 |     pyplot.figure('Train/Test Loss Curves')
44 |     pyplot.plot(train_loss[:, 0], train_loss[:, 1], label=group)
45 |     pyplot.plot(val_loss[:, 0], val_loss[:, 1], '--', label=group)
46 |     pyplot.figure('mPA/mIOU Curves')
47 |     pyplot.plot(val_loss[:, 0], val_loss[:, 2], label='{}-mPA'.format(group))
48 |     pyplot.plot(val_loss[:, 0], val_loss[:, 3], '--', label='{}-mIOU'.format(group))
49 | 
50 | pyplot.figure('Train/Test Loss Curves')
51 | pyplot.legend(loc='upper right')
52 | pyplot.figure('mPA/mIOU Curves')
53 | pyplot.legend(loc='lower right')
54 | pyplot.show()
55 | 


--------------------------------------------------------------------------------
/random_bonus/image-segmentation(updating)/networks.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import torch.nn.functional as F
  4 | from torchvision.models import resnet
  5 | from torchvision.models.resnet import conv3x3
  6 | 
  7 | 
  8 | class UNetConvBlock(nn.Module):
  9 |     def __init__(self, input_nch, output_nch, kernel_size=3, activation=F.leaky_relu, use_bn=True, same_conv=True):
 10 |         super(UNetConvBlock, self).__init__()
 11 |         padding = kernel_size // 2 if same_conv else 0  # only support odd kernel
 12 |         self.conv0 = nn.Conv2d(input_nch, output_nch, kernel_size, padding=padding)
 13 |         self.conv1 = nn.Conv2d(output_nch, output_nch, kernel_size, padding=padding)
 14 |         self.act = activation
 15 |         self.batch_norm = nn.BatchNorm2d(output_nch) if use_bn else None
 16 | 
 17 |     def forward(self, x):
 18 |         x = self.conv0(x)
 19 |         if self.batch_norm:
 20 |             x = self.batch_norm(x)
 21 |         x = self.act(x)
 22 |         x = self.conv1(x)
 23 |         if self.batch_norm:
 24 |             x = self.batch_norm(x)
 25 |         return self.act(x)
 26 | 
 27 | 
 28 | class UNet(nn.Module):
 29 |     def __init__(self, conv_channels, input_nch=3, output_nch=2, use_bn=True):
 30 |         super(UNet, self).__init__()
 31 |         self.n_stages = len(conv_channels)
 32 |         # define convolution blocks
 33 |         down_convs = []
 34 |         up_convs = []
 35 | 
 36 |         self.max_pooling = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
 37 | 
 38 |         in_nch = input_nch
 39 |         for i, out_nch in enumerate(conv_channels):
 40 |             down_convs.append(UNetConvBlock(in_nch, out_nch, use_bn=use_bn))
 41 |             up_conv_in_ch = 2 * out_nch if i < self.n_stages - 1 else out_nch # first up conv with equal channels
 42 |             up_conv_out_ch = out_nch if i == 0 else in_nch  # last up conv with channels equal to labels
 43 |             up_convs.insert(0, UNetConvBlock(up_conv_in_ch, up_conv_out_ch, use_bn=use_bn))
 44 |             in_nch = out_nch
 45 | 
 46 |         self.down_convs = nn.ModuleList(down_convs)
 47 |         self.up_convs = nn.ModuleList(up_convs)
 48 | 
 49 |         # define output convolution
 50 |         self.out_conv = nn.Conv2d(conv_channels[0], output_nch, 1)
 51 | 
 52 |     def forward(self, x):
 53 |         # conv & downsampling
 54 |         down_sampled_fmaps = []
 55 |         for i in range(self.n_stages-1):
 56 |             x = self.down_convs[i](x)
 57 |             x = self.max_pooling(x)
 58 |             down_sampled_fmaps.insert(0, x)
 59 | 
 60 |         # center convs
 61 |         x = self.down_convs[self.n_stages-1](x)
 62 |         x = self.up_convs[0](x)
 63 | 
 64 |         # conv & upsampling
 65 |         for i, down_sampled_fmap in enumerate(down_sampled_fmaps):
 66 |             x = torch.cat([x, down_sampled_fmap], 1)
 67 |             x = self.up_convs[i+1](x)
 68 |             x = F.upsample(x, scale_factor=2, mode='bilinear')
 69 | 
 70 |         return self.out_conv(x)
 71 |         #x = self.out_conv(x)
 72 |         #return x if self.out_conv.out_channels == 1 else F.relu(x)
 73 | 
 74 | 
 75 | class BasicResBlock(nn.Module):
 76 | 
 77 |     def __init__(self, input_nch, output_nch, groups=1):
 78 |         super(BasicResBlock, self).__init__()
 79 |         self.transform_conv = nn.Conv2d(input_nch, output_nch, 1)
 80 |         self.bn1 = nn.BatchNorm2d(output_nch)
 81 |         self.conv1 = nn.Conv2d(output_nch, output_nch, 3, padding=1, groups=groups, bias=False)
 82 |         self.bn2 = nn.BatchNorm2d(output_nch)
 83 |         self.conv2 = nn.Conv2d(output_nch, output_nch, 3, padding=1, groups=groups, bias=False)
 84 |         self.act = nn.LeakyReLU(inplace=True)
 85 | 
 86 |     def forward(self, x):
 87 |         x = self.transform_conv(x)
 88 |         residual = x
 89 | 
 90 |         out = self.bn1(x)
 91 |         out = self.act(out)
 92 |         out = self.conv1(out)
 93 | 
 94 |         out = self.bn2(out)
 95 |         out = self.act(out)
 96 |         out = self.conv2(out)
 97 | 
 98 |         out += residual
 99 | 
100 |         return out
101 | 
102 | 
103 | class TriangleNet(nn.Module):
104 |     def __init__(self, conv_channels, input_nch, output_nch, groups=1):
105 |         super(TriangleNet, self).__init__()
106 |         self.input_nch = input_nch
107 |         self.output_nch = output_nch
108 |         self.pyramid_height = len(conv_channels)
109 | 
110 |         blocks = [list() for _ in range(self.pyramid_height)]
111 |         for i in range(self.pyramid_height):
112 |             for j in range(i, self.pyramid_height):
113 |                 if i == 0 and j == 0:
114 |                     blocks[i].append(BasicResBlock(input_nch, conv_channels[j], groups=groups))
115 |                 else:
116 |                     blocks[i].append(BasicResBlock(conv_channels[j-1], conv_channels[j], groups=groups))
117 | 
118 |         for i in range(self.pyramid_height):
119 |             blocks[i] = nn.ModuleList(blocks[i])
120 |         self.blocks = nn.ModuleList(blocks)
121 | 
122 |         self.down_sample = nn.MaxPool2d(3, 2, 1)
123 |         self.up_samples = nn.ModuleList([nn.Upsample(scale_factor=2**i, mode='bilinear') for i in range(1, self.pyramid_height)])
124 | 
125 |         self.channel_out_convs = nn.ModuleList([nn.Conv2d(conv_channels[-1], output_nch, 1) for _ in range(self.pyramid_height)])
126 |         self.out_conv = nn.Conv2d(self.pyramid_height * conv_channels[-1], output_nch, 1)
127 | 
128 |     def forward(self, x):
129 |         # forward & expand
130 |         x = [self.blocks[0][0](x)]
131 |         for i in range(1, self.pyramid_height):
132 |             x.append(self.down_sample(x[-1]))
133 |             for j in range(i+1):
134 |                 x[j] = self.blocks[j][i-j](x[j])
135 | 
136 |         # upsampling & conv
137 |         if self.training:
138 |             ms_out = [self.channel_out_convs[i](x[i]) for i in range(self.pyramid_height)]
139 |         x = [x[0]] + [self.up_samples[i-1](x[i]) for i in range(1, self.pyramid_height)]
140 | 
141 |         # final 1x1 conv
142 |         out = self.out_conv(torch.cat(x, 1))
143 |         return [out] + ms_out if self.training else out
144 | 
145 | 
146 | class PSPTriangleNet(nn.Module):
147 |     def __init__(self, conv_channels, input_nch, output_nch, groups):
148 |         super(PSPTriangleNet, self).__init__()
149 |         self.input_nch = input_nch
150 |         self.output_nch = output_nch
151 |         self.pyramid_height = len(conv_channels)
152 | 
153 |         blocks = []
154 |         for i in range(self.pyramid_height-1):
155 |             if i == 0:
156 |                 blocks.append(BasicResBlock(input_nch, conv_channels[i], groups=groups))
157 |             else:
158 |                 blocks.append(BasicResBlock(conv_channels[i-1], conv_channels[i], groups=groups))
159 | 
160 |         ms_blocks = []
161 |         for i in range(self.pyramid_height):
162 |             ms_blocks.append(BasicResBlock(conv_channels[-2], conv_channels[-1]//self.pyramid_height))
163 |         self.blocks = nn.ModuleList(blocks)
164 |         self.ms_blocks = nn.ModuleList(ms_blocks)
165 | 
166 |         self.down_samples = nn.ModuleList([nn.MaxPool2d(2**i+1, 2**i, 2**(i-1)) for i in range(1, self.pyramid_height)])
167 |         self.up_samples = nn.ModuleList([nn.Upsample(scale_factor=2**i, mode='bilinear') for i in range(1, self.pyramid_height)])
168 | 
169 |         self.channel_out_convs = nn.ModuleList([nn.Conv2d(conv_channels[-1]//self.pyramid_height, output_nch, 1) for _ in range(self.pyramid_height)])
170 |         self.out_conv = nn.Conv2d(conv_channels[-1], output_nch, 1)
171 | 
172 |     def forward(self, x):
173 |         # forward & expand
174 |         for i in range(self.pyramid_height-1):
175 |             x = self.blocks[i](x)
176 |         x = [self.ms_blocks[0](x)] + [self.down_samples[i](self.ms_blocks[i](x)) for i in range(self.pyramid_height-1)]
177 | 
178 |         # upsampling & conv
179 |         if self.training:
180 |             ms_out = [self.channel_out_convs[i](x[i]) for i in range(self.pyramid_height)]
181 |         x = [x[0]] + [self.up_samples[i-1](x[i]) for i in range(1, self.pyramid_height)]
182 | 
183 |         # final 1x1 conv
184 |         out = self.out_conv(torch.cat(x, 1))
185 |         return [out] + ms_out if self.training else out
186 | 
187 | 


--------------------------------------------------------------------------------
/random_bonus/image-segmentation(updating)/utils.py:
--------------------------------------------------------------------------------
  1 | from datetime import datetime
  2 | import random
  3 | import numpy
  4 | import torch
  5 | import torch.nn as nn
  6 | import torch.nn.functional as F
  7 | import torchvision
  8 | from torchvision.datasets.folder import *
  9 | from torch.optim import SGD, Adadelta, Adam, Adagrad, RMSprop, ASGD
 10 | import cv2
 11 | 
 12 | OPTIMIZERS = {
 13 |     'sgd': SGD,
 14 |     'adadelta': Adadelta,
 15 |     'adam': Adam,
 16 |     'adagrad': Adagrad,
 17 |     'rmsprop': RMSprop,
 18 |     'asgd': ASGD
 19 | }
 20 | 
 21 | 
 22 | class SegmentationImageFolder(ImageFolder):
 23 |     """A simplified segmentation data loader where the images are arranged in this way: ::
 24 | 
 25 |         root/images/001.png
 26 |         root/images/002.png
 27 |         root/images/003.png
 28 | 
 29 |         root/segmentations/001.png
 30 |         root/segmentations/002.png
 31 |         root/segmentations/003.png
 32 | 
 33 |         images in the two folder should be corresponding, sorting by name
 34 | 
 35 |     Args:
 36 |         please refer to
 37 |         https://github.com/frombeijingwithlove/dlcv_for_beginners/blob/master/chap6/data_augmentation/image_augmentation.py
 38 |     """
 39 | 
 40 |     def __init__(self, root,
 41 |                  image_folder='images', segmentation_folder='segmentations',
 42 |                  labels=[(0, 0, 0), (255, 255, 255)],
 43 |                  image_size=None,
 44 |                  random_horizontal_flip=False,
 45 |                  random_rotation=0,
 46 |                  random_crop=None,
 47 |                  random_square_crop=False,
 48 |                  loader=default_loader,
 49 |                  label_regr=False,
 50 |                  multi_scale=0):
 51 |         super(SegmentationImageFolder, self).__init__(root, loader=loader)
 52 |         pair_len = len(self.imgs) // 2
 53 |         assert image_folder in self.classes and segmentation_folder in self.classes
 54 |         if image_folder < segmentation_folder:
 55 |             self.imgs = [(self.imgs[i][0], self.imgs[i+pair_len][0]) for i in range(pair_len)]
 56 |         else:
 57 |             self.imgs = [(self.imgs[i+pair_len][0], self.imgs[i][0]) for i in range(pair_len)]
 58 |         self.img_folder = image_folder
 59 |         self.seg_folder = segmentation_folder
 60 |         self.labels = [numpy.array(x, dtype=numpy.uint8) for x in labels]
 61 |         self.image_size = image_size
 62 |         self.flip_lr = random_horizontal_flip
 63 |         self.random_rotation = random_rotation
 64 |         self.random_crop = random_crop
 65 |         self.random_square_crop = random_square_crop
 66 |         self.label_regr=label_regr
 67 |         self.multi_scale=multi_scale
 68 | 
 69 |     def __getitem__(self, index):
 70 |         """
 71 |         Args:
 72 |             index (int): Index
 73 | 
 74 |         Returns:
 75 |             tuple: (image, target) where target is class_index of the target class.
 76 |         """
 77 |         imgpath, segpath = self.imgs[index]
 78 |         img = self.loader(imgpath)
 79 |         seg = self.loader(segpath)
 80 | 
 81 |         # manually transform to incorporate horizontal flip & one-hot coding for segmentation labels
 82 |         if self.random_rotation:
 83 |             w, h = img.size
 84 |             angle = self.random_rotation % 360
 85 |             img = img.rotate(angle)
 86 |             seg = seg.rotate(angle)
 87 | 
 88 |             angle_crop = angle % 180
 89 |             if angle_crop > 90:
 90 |                 angle_crop = 180 - angle_crop
 91 |             theta = angle_crop * numpy.pi / 180.0
 92 |             hw_ratio = float(h) / float(w)
 93 |             tan_theta = numpy.tan(theta)
 94 |             numerator = numpy.cos(theta) + numpy.sin(theta) * tan_theta
 95 |             r = hw_ratio if h > w else 1 / hw_ratio
 96 |             denominator = r * tan_theta + 1
 97 |             crop_mult = numerator / denominator
 98 |             w_crop = int(round(crop_mult * w))
 99 |             h_crop = int(round(crop_mult * h))
100 |             x0 = int((w - w_crop) / 2)
101 |             y0 = int((h - h_crop) / 2)
102 | 
103 |             img = img.crop((x0, y0, x0+w_crop, y0+h_crop))
104 |             seg = seg.crop((x0, y0, x0+w_crop, y0+h_crop))
105 | 
106 |         if self.random_crop:
107 |             area_ratio, hw_vari = self.random_crop
108 |             w, h = img.size
109 |             hw_delta = numpy.random.uniform(-hw_vari, hw_vari)
110 |             hw_mult = 1 + hw_delta
111 |             w_crop = int(round(w * numpy.sqrt(area_ratio * hw_mult)))
112 |             if w_crop > w - 2:
113 |                 w_crop = w - 2
114 |             h_crop = int(round(h * numpy.sqrt(area_ratio / hw_mult)))
115 |             if h_crop > h - 2:
116 |                 h_crop = h - 2
117 |             x0 = numpy.random.randint(0, w - w_crop - 1)
118 |             y0 = numpy.random.randint(0, h - h_crop - 1)
119 |             img = img.crop((x0, y0, x0+w_crop, y0+h_crop))
120 |             seg = seg.crop((x0, y0, x0+w_crop, y0+h_crop))
121 | 
122 |         if self.random_square_crop:
123 |             w, h = img.size
124 |             if w > h:
125 |                 x0 = random.randint(0, w-h-1)
126 |                 img = img.crop((x0, 0, x0+h, h))
127 |                 seg = seg.crop((x0, 0, x0+h, h))
128 |             elif w < h:
129 |                 y0 = random.randint(0, h-w-1)
130 |                 img = img.crop((0, y0, w, y0+w))
131 |                 seg = seg.crop((0, y0, w, y0+w))
132 | 
133 |         if self.image_size:
134 |             img = img.resize(self.image_size)
135 |             seg = seg.resize(self.image_size, Image.NEAREST)
136 | 
137 |         # random horizontal flip
138 |         if random.random() > 0.5:
139 |             img = img.transpose(Image.FLIP_LEFT_RIGHT)
140 |             seg = seg.transpose(Image.FLIP_LEFT_RIGHT)
141 | 
142 |         # one-hot coding for segmentation labels
143 |         seg_arr = numpy.array(seg)
144 |         seg = numpy.zeros(seg_arr.shape[:2], dtype=numpy.int64)
145 |         for i, label_color in enumerate(self.labels):
146 |             label_indices = numpy.where(seg_arr == label_color)[:2]
147 |             seg[label_indices[0], label_indices[1]] = i
148 | 
149 |         if self.multi_scale:
150 |             h, w = seg.shape
151 |             seg = [seg] + [cv2.resize(seg, (w//(2**i), h//(2**i)), interpolation=cv2.INTER_NEAREST).astype(numpy.int64) for i in range(1, self.multi_scale)]
152 | 
153 |         # to tensor
154 |         transform = torchvision.transforms.Compose([
155 |             torchvision.transforms.ToTensor(),
156 |             torchvision.transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
157 |         ])
158 |         img = transform(img)
159 |         if self.multi_scale:
160 |             seg = [torch.Tensor(x) if self.label_regr else torch.LongTensor(x) for x in seg]
161 |         else:
162 |             seg = torch.Tensor(seg) if self.label_regr else torch.LongTensor(seg)
163 | 
164 |         return img, seg
165 | 
166 |     def __len__(self):
167 |         return len(self.imgs)
168 | 
169 | 
170 | class CrossEntropyLoss2D(nn.Module):
171 |     def __init__(self, size_average=True):
172 |         super(CrossEntropyLoss2D, self).__init__()
173 |         self.nll_loss_2d = nn.NLLLoss2d(size_average=size_average)
174 | 
175 |     def forward(self, outputs, targets):
176 |         return self.nll_loss_2d(F.log_softmax(outputs), targets)
177 | 
178 | 
179 | class MSCrossEntropyLoss2D(nn.Module):
180 |     def __init__(self, weights, size_average=True):
181 |         super(MSCrossEntropyLoss2D, self).__init__()
182 |         self.nll_loss_2d = nn.NLLLoss2d(size_average=size_average)
183 |         self.weights = weights
184 | 
185 |     def forward(self, outputs, targets):
186 |         loss = self.weights[0] * self.nll_loss_2d(F.log_softmax(outputs[0]), targets[0])
187 |         for i in range(len(self.weights)-1):
188 |             loss += self.weights[i+1] * self.nll_loss_2d(F.log_softmax(outputs[i+1]), targets[i])
189 |         return loss
190 | 
191 | 
192 | def get_datetime_string():
193 |     datetime_now = datetime.now()
194 |     return '{}-{}-{}-{}-{}-{}'.format(
195 |         datetime_now.year,
196 |         datetime_now.month,
197 |         datetime_now.day,
198 |         datetime_now.hour,
199 |         datetime_now.minute,
200 |         datetime_now.second
201 |     )
202 | 
203 | 
204 | # borrowed from 
205 | # https://github.com/pytorch/examples/tree/master/imagenet
206 | class AverageMeter(object):
207 |     """Computes and stores the average and current value"""
208 |     def __init__(self):
209 |         self.val = 0
210 |         self.avg = 0
211 |         self.sum = 0
212 |         self.count = 0
213 | 
214 |     def update(self, val, n=1):
215 |         self.val = val
216 |         self.sum += val * n
217 |         self.count += n
218 |         self.avg = self.sum / self.count
219 | 
220 | 
221 | def get_optimizer(name, model_params, **kwargs):
222 |     name = name.lower()
223 |     if name == 'sgd':
224 |         optimizer = OPTIMIZERS[name](
225 |             model_params,
226 |             lr=kwargs['lr'],
227 |             momentum=kwargs['momentum'],
228 |             nesterov=kwargs['nesterov']
229 |         )
230 |     elif name in ['adadelta', 'adam', 'adagrad', 'asgd']:
231 |         optimizer = OPTIMIZERS[name](model_params, lr=kwargs['lr'])
232 |     elif name == 'rmsprop':
233 |         optimizer = OPTIMIZERS[name](
234 |             model_params,
235 |             lr=kwargs['lr'],
236 |             momentum=kwargs['momentum'],
237 |         )
238 |     else:
239 |         raise Exception('Not supported optimizer!')
240 | 
241 |     return optimizer
242 | 


--------------------------------------------------------------------------------
/random_bonus/multiple_models_fusion_caffe/README.md:
--------------------------------------------------------------------------------
 1 | ## Caffe中进行模型融合
 2 | Blog: 
 3 | [在Caffe中实现模型融合](http://www.cnblogs.com/frombeijingwithlove/p/6683476.html)
 4 | 
 5 | ## 从头训练两个不同模型
 6 | ### step 1
 7 | > ./download_mnist.sh   
 8 | 
 9 | ### step 2 
10 | > python convert_mnist.py
11 | 
12 | ### step 3
13 | > python gen_img_list.py
14 | 
15 | ### step 4
16 | 
17 | train with lenet_odd_solver.prototxt & lenet_even_solver.prototxt
18 | 
19 | ## 基于预训练模型直接融合
20 | 预训练模型下载地址：  
21 | https://github.com/frombeijingwithlove/dlcv_book_pretrained_caffe_models/blob/master/mnist_lenet_even_iter_30000.caffemodel  
22 | https://github.com/frombeijingwithlove/dlcv_book_pretrained_caffe_models/blob/master/mnist_lenet_odd_iter_30000.caffemodel
23 | 
24 | ## 生成融合后的prototxt
25 | 
26 | > python rename_n_freeze_layers.py input_model output_model prefix
27 | 
28 | 拷贝从data层之后开始到最后要进行融合的特征层，比如ip1位置的prototxt，放到一个prototxt中，然后在开始加上data层，融合层可以用concat进行拼接，然后再接fc或是其他操作。
29 | 
30 | ## 生成融合后的权重
31 | > python fuse_model.py
32 | 
33 | ## 基于融合的模型继续进行finetune
34 | 
35 | 直接训练即可


--------------------------------------------------------------------------------
/random_bonus/multiple_models_fusion_caffe/convert_mnist.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import pickle, gzip
 3 | from matplotlib import pyplot
 4 | 
 5 | # Load the dataset
 6 | print('Loading data from mnist.pkl.gz ...')
 7 | with gzip.open('mnist.pkl.gz', 'rb') as f:
 8 |     train_set, valid_set, test_set = pickle.load(f)
 9 | 
10 | imgs_dir = 'mnist'
11 | os.system('mkdir -p {}'.format(imgs_dir))
12 | datasets = {'train': train_set, 'val': valid_set, 'test': test_set}
13 | for dataname, dataset in datasets.items():
14 |     print('Converting {} dataset ...'.format(dataname))
15 |     data_dir = os.sep.join([imgs_dir, dataname])
16 |     os.system('mkdir -p {}'.format(data_dir))
17 |     for i, (img, label) in enumerate(zip(*dataset)):
18 |         filename = '{:0>6d}_{}.jpg'.format(i, label)
19 |         filepath = os.sep.join([data_dir, filename])
20 |         img = img.reshape((28, 28))
21 |         pyplot.imsave(filepath, img, cmap='gray')
22 |         if (i+1) % 10000 == 0:
23 |             print('{} images converted!'.format(i+1))
24 | 
25 | 


--------------------------------------------------------------------------------
/random_bonus/multiple_models_fusion_caffe/download_mnist.sh:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 | # wget http://www.iro.umontreal.ca/~lisa/deep/data/mnist/mnist.pkl.gz
3 | wget http://deeplearning.net/data/mnist/mnist.pkl.gz
4 | 


--------------------------------------------------------------------------------
/random_bonus/multiple_models_fusion_caffe/fuse_model.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | sys.path.append('/path/to/caffe/python')
 3 | import caffe
 4 | 
 5 | fusion_net = caffe.Net('lenet_fusion_train_val.prototxt', caffe.TEST)
 6 | 
 7 | model_list = [
 8 |     ('even', 'lenet_even_train_val.prototxt', 'mnist_lenet_even_iter_30000.caffemodel'),
 9 |     ('odd', 'lenet_odd_train_val.prototxt', 'mnist_lenet_odd_iter_30000.caffemodel')
10 | ]
11 | 
12 | for prefix, model_def, model_weight in model_list:
13 |     net = caffe.Net(model_def, model_weight, caffe.TEST)
14 | 
15 |     for layer_name, param in net.params.iteritems():
16 |         n_params = len(param)
17 |         try:
18 |             for i in range(n_params):
19 |                 fusion_net.params['{}/{}'.format(prefix, layer_name)][i].data[...] = param[i].data[...]
20 |         except Exception as e:
21 |             print(e)
22 | 
23 | fusion_net.save('init_fusion.caffemodel')
24 | 


--------------------------------------------------------------------------------
/random_bonus/multiple_models_fusion_caffe/gen_img_list.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import sys
 3 | 
 4 | mnist_path = 'mnist'
 5 | data_sets = ['train', 'val']
 6 | 
 7 | for data_set in data_sets:
 8 |     odd_list = '{}_odd.txt'.format(data_set)
 9 |     even_list = '{}_even.txt'.format(data_set)
10 |     all_list = '{}_all.txt'.format(data_set)
11 |     root = os.sep.join([mnist_path, data_set])
12 |     filenames = os.listdir(root)
13 |     with open(odd_list, 'w') as f_odd, open(even_list, 'w') as f_even, open(all_list, 'w') as f_all:
14 |         for filename in filenames:
15 |             filepath = os.sep.join([root, filename])
16 |             label = int(filename[:filename.rfind('.')].split('_')[1])
17 |             line = '{} {}\n'.format(filepath, label)
18 |             f_all.write(line)
19 | 
20 |             line = '{} {}\n'.format(filepath, int(label/2))
21 |             if label % 2:
22 |                 f_odd.write(line)
23 |             else:
24 |                 f_even.write(line)
25 | 


--------------------------------------------------------------------------------
/random_bonus/multiple_models_fusion_caffe/lenet_even_solver.prototxt:
--------------------------------------------------------------------------------
 1 | net: "lenet_even_train_val.prototxt"
 2 | test_iter: 247
 3 | test_initialization: false
 4 | test_interval: 1000
 5 | base_lr: 0.01
 6 | momentum: 0.9
 7 | weight_decay: 0.0005
 8 | lr_policy: "step"
 9 | gamma: 0.707
10 | stepsize: 1000
11 | display: 200
12 | max_iter: 30000
13 | snapshot: 30000
14 | snapshot_prefix: "mnist_lenet_even"
15 | solver_mode: GPU
16 | 


--------------------------------------------------------------------------------
/random_bonus/multiple_models_fusion_caffe/lenet_even_train_val.prototxt:
--------------------------------------------------------------------------------
  1 | name: "LeNet"
  2 | layer {
  3 |   name: "mnist"
  4 |   type: "ImageData"
  5 |   top: "data"
  6 |   top: "label"
  7 |   include {
  8 |     phase: TRAIN
  9 |   }
 10 |   transform_param {
 11 |     mean_value: 128
 12 |     scale: 0.00390625
 13 |   }
 14 |   image_data_param {
 15 |     source: "train_even.txt"
 16 |     is_color: false
 17 |     batch_size: 25
 18 |   }
 19 | }
 20 | layer {
 21 |   name: "mnist"
 22 |   type: "ImageData"
 23 |   top: "data"
 24 |   top: "label"
 25 |   include {
 26 |     phase: TEST
 27 |   }
 28 |   transform_param {
 29 |     mean_value: 128
 30 |     scale: 0.00390625
 31 |   }
 32 |   image_data_param {
 33 |     source: "val_even.txt"
 34 |     is_color: false
 35 |     batch_size: 20
 36 |   }
 37 | }
 38 | layer {
 39 |   name: "conv1"
 40 |   type: "Convolution"
 41 |   bottom: "data"
 42 |   top: "conv1"
 43 |   param {
 44 |     lr_mult: 1
 45 |   }
 46 |   param {
 47 |     lr_mult: 2
 48 |   }
 49 |   convolution_param {
 50 |     num_output: 20
 51 |     kernel_size: 5
 52 |     stride: 1
 53 |     weight_filler {
 54 |       type: "xavier"
 55 |     }
 56 |     bias_filler {
 57 |       type: "constant"
 58 |     }
 59 |   }
 60 | }
 61 | layer {
 62 |   name: "pool1"
 63 |   type: "Pooling"
 64 |   bottom: "conv1"
 65 |   top: "pool1"
 66 |   pooling_param {
 67 |     pool: MAX
 68 |     kernel_size: 2
 69 |     stride: 2
 70 |   }
 71 | }
 72 | layer {
 73 |   name: "conv2"
 74 |   type: "Convolution"
 75 |   bottom: "pool1"
 76 |   top: "conv2"
 77 |   param {
 78 |     lr_mult: 1
 79 |   }
 80 |   param {
 81 |     lr_mult: 2
 82 |   }
 83 |   convolution_param {
 84 |     num_output: 50
 85 |     kernel_size: 5
 86 |     stride: 1
 87 |     weight_filler {
 88 |       type: "xavier"
 89 |     }
 90 |     bias_filler {
 91 |       type: "constant"
 92 |     }
 93 |   }
 94 | }
 95 | layer {
 96 |   name: "pool2"
 97 |   type: "Pooling"
 98 |   bottom: "conv2"
 99 |   top: "pool2"
100 |   pooling_param {
101 |     pool: MAX
102 |     kernel_size: 2
103 |     stride: 2
104 |   }
105 | }
106 | layer {
107 |   name: "ip1"
108 |   type: "InnerProduct"
109 |   bottom: "pool2"
110 |   top: "ip1"
111 |   param {
112 |     lr_mult: 1
113 |   }
114 |   param {
115 |     lr_mult: 2
116 |   }
117 |   inner_product_param {
118 |     num_output: 500
119 |     weight_filler {
120 |       type: "xavier"
121 |     }
122 |     bias_filler {
123 |       type: "constant"
124 |     }
125 |   }
126 | }
127 | layer {
128 |   name: "relu1"
129 |   type: "ReLU"
130 |   bottom: "ip1"
131 |   top: "ip1"
132 | }
133 | layer {
134 |   name: "ip2"
135 |   type: "InnerProduct"
136 |   bottom: "ip1"
137 |   top: "ip2"
138 |   param {
139 |     lr_mult: 1
140 |   }
141 |   param {
142 |     lr_mult: 2
143 |   }
144 |   inner_product_param {
145 |     num_output: 5
146 |     weight_filler {
147 |       type: "xavier"
148 |     }
149 |     bias_filler {
150 |       type: "constant"
151 |     }
152 |   }
153 | }
154 | layer {
155 |   name: "accuracy"
156 |   type: "Accuracy"
157 |   bottom: "ip2"
158 |   bottom: "label"
159 |   top: "accuracy"
160 |   include {
161 |     phase: TEST
162 |   }
163 | }
164 | layer {
165 |   name: "loss"
166 |   type: "SoftmaxWithLoss"
167 |   bottom: "ip2"
168 |   bottom: "label"
169 |   top: "loss"
170 | }
171 | 


--------------------------------------------------------------------------------
/random_bonus/multiple_models_fusion_caffe/lenet_fusion_solver.prototxt:
--------------------------------------------------------------------------------
 1 | net: "lenet_fusion_train_val.prototxt"
 2 | test_iter: 500
 3 | test_initialization: false
 4 | test_interval: 1000
 5 | base_lr: 0.01
 6 | momentum: 0.9
 7 | weight_decay: 0.0005
 8 | lr_policy: "step"
 9 | gamma: 0.707
10 | stepsize: 1000
11 | display: 200
12 | max_iter: 30000
13 | snapshot: 30000
14 | snapshot_prefix: "mnist_lenet_fused"
15 | solver_mode: GPU
16 | 


--------------------------------------------------------------------------------
/random_bonus/multiple_models_fusion_caffe/lenet_fusion_train_val.prototxt:
--------------------------------------------------------------------------------
  1 | name: "LeNet"
  2 | layer {
  3 |   name: "mnist"
  4 |   type: "ImageData"
  5 |   top: "data"
  6 |   top: "label"
  7 |   include {
  8 |     phase: TRAIN
  9 |   }
 10 |   transform_param {
 11 |     mean_value: 128
 12 |     scale: 0.00390625
 13 |   }
 14 |   image_data_param {
 15 |     source: "train_all.txt"
 16 |     is_color: false
 17 |     batch_size: 50
 18 |   }
 19 | }
 20 | layer {
 21 |   name: "mnist"
 22 |   type: "ImageData"
 23 |   top: "data"
 24 |   top: "label"
 25 |   include {
 26 |     phase: TEST
 27 |   }
 28 |   transform_param {
 29 |     mean_value: 128
 30 |     scale: 0.00390625
 31 |   }
 32 |   image_data_param {
 33 |     source: "val_all.txt"
 34 |     is_color: false
 35 |     batch_size: 20
 36 |   }
 37 | }
 38 | layer {
 39 |   name: "odd/conv1"
 40 |   type: "Convolution"
 41 |   bottom: "data"
 42 |   top: "odd/conv1"
 43 |   param {
 44 |     lr_mult: 0
 45 |   }
 46 |   param {
 47 |     lr_mult: 0
 48 |   }
 49 |   convolution_param {
 50 |     num_output: 20
 51 |     kernel_size: 5
 52 |     stride: 1
 53 |     weight_filler {
 54 |       type: "xavier"
 55 |     }
 56 |     bias_filler {
 57 |       type: "constant"
 58 |     }
 59 |   }
 60 | }
 61 | layer {
 62 |   name: "odd/pool1"
 63 |   type: "Pooling"
 64 |   bottom: "odd/conv1"
 65 |   top: "odd/pool1"
 66 |   pooling_param {
 67 |     pool: MAX
 68 |     kernel_size: 2
 69 |     stride: 2
 70 |   }
 71 | }
 72 | layer {
 73 |   name: "odd/conv2"
 74 |   type: "Convolution"
 75 |   bottom: "odd/pool1"
 76 |   top: "odd/conv2"
 77 |   param {
 78 |     lr_mult: 0
 79 |   }
 80 |   param {
 81 |     lr_mult: 0
 82 |   }
 83 |   convolution_param {
 84 |     num_output: 50
 85 |     kernel_size: 5
 86 |     stride: 1
 87 |     weight_filler {
 88 |       type: "xavier"
 89 |     }
 90 |     bias_filler {
 91 |       type: "constant"
 92 |     }
 93 |   }
 94 | }
 95 | layer {
 96 |   name: "odd/pool2"
 97 |   type: "Pooling"
 98 |   bottom: "odd/conv2"
 99 |   top: "odd/pool2"
100 |   pooling_param {
101 |     pool: MAX
102 |     kernel_size: 2
103 |     stride: 2
104 |   }
105 | }
106 | layer {
107 |   name: "odd/ip1"
108 |   type: "InnerProduct"
109 |   bottom: "odd/pool2"
110 |   top: "odd/ip1"
111 |   param {
112 |     lr_mult: 0
113 |   }
114 |   param {
115 |     lr_mult: 0
116 |   }
117 |   inner_product_param {
118 |     num_output: 500
119 |     weight_filler {
120 |       type: "xavier"
121 |     }
122 |     bias_filler {
123 |       type: "constant"
124 |     }
125 |   }
126 | }
127 | layer {
128 |   name: "odd/relu1"
129 |   type: "ReLU"
130 |   bottom: "odd/ip1"
131 |   top: "odd/ip1"
132 | }
133 | layer {
134 |   name: "even/conv1"
135 |   type: "Convolution"
136 |   bottom: "data"
137 |   top: "even/conv1"
138 |   param {
139 |     lr_mult: 0
140 |   }
141 |   param {
142 |     lr_mult: 0
143 |   }
144 |   convolution_param {
145 |     num_output: 20
146 |     kernel_size: 5
147 |     stride: 1
148 |     weight_filler {
149 |       type: "xavier"
150 |     }
151 |     bias_filler {
152 |       type: "constant"
153 |     }
154 |   }
155 | }
156 | layer {
157 |   name: "even/pool1"
158 |   type: "Pooling"
159 |   bottom: "even/conv1"
160 |   top: "even/pool1"
161 |   pooling_param {
162 |     pool: MAX
163 |     kernel_size: 2
164 |     stride: 2
165 |   }
166 | }
167 | layer {
168 |   name: "even/conv2"
169 |   type: "Convolution"
170 |   bottom: "even/pool1"
171 |   top: "even/conv2"
172 |   param {
173 |     lr_mult: 0
174 |   }
175 |   param {
176 |     lr_mult: 0
177 |   }
178 |   convolution_param {
179 |     num_output: 50
180 |     kernel_size: 5
181 |     stride: 1
182 |     weight_filler {
183 |       type: "xavier"
184 |     }
185 |     bias_filler {
186 |       type: "constant"
187 |     }
188 |   }
189 | }
190 | layer {
191 |   name: "even/pool2"
192 |   type: "Pooling"
193 |   bottom: "even/conv2"
194 |   top: "even/pool2"
195 |   pooling_param {
196 |     pool: MAX
197 |     kernel_size: 2
198 |     stride: 2
199 |   }
200 | }
201 | layer {
202 |   name: "even/ip1"
203 |   type: "InnerProduct"
204 |   bottom: "even/pool2"
205 |   top: "even/ip1"
206 |   param {
207 |     lr_mult: 0
208 |   }
209 |   param {
210 |     lr_mult: 0
211 |   }
212 |   inner_product_param {
213 |     num_output: 500
214 |     weight_filler {
215 |       type: "xavier"
216 |     }
217 |     bias_filler {
218 |       type: "constant"
219 |     }
220 |   }
221 | }
222 | layer {
223 |   name: "even/relu1"
224 |   type: "ReLU"
225 |   bottom: "even/ip1"
226 |   top: "even/ip1"
227 | }
228 | layer {
229 |   name: "concat"
230 |   bottom: "odd/ip1"
231 |   bottom: "even/ip1"
232 |   top: "ip1_fused"
233 |   type: "Concat"
234 |   concat_param {
235 |     axis: 1
236 |   }
237 | }
238 | layer {
239 |   name: "ip2"
240 |   type: "InnerProduct"
241 |   bottom: "ip1_fused"
242 |   top: "ip2"
243 |   param {
244 |     lr_mult: 1
245 |   }
246 |   param {
247 |     lr_mult: 2
248 |   }
249 |   inner_product_param {
250 |     num_output: 10
251 |     weight_filler {
252 |       type: "xavier"
253 |     }
254 |     bias_filler {
255 |       type: "constant"
256 |     }
257 |   }
258 | }
259 | layer {
260 |   name: "accuracy"
261 |   type: "Accuracy"
262 |   bottom: "ip2"
263 |   bottom: "label"
264 |   top: "accuracy"
265 |   include {
266 |     phase: TEST
267 |   }
268 | }
269 | layer {
270 |   name: "loss"
271 |   type: "SoftmaxWithLoss"
272 |   bottom: "ip2"
273 |   bottom: "label"
274 |   top: "loss"
275 | }
276 | 


--------------------------------------------------------------------------------
/random_bonus/multiple_models_fusion_caffe/lenet_odd_solver.prototxt:
--------------------------------------------------------------------------------
 1 | net: "lenet_odd_train_val.prototxt"
 2 | test_iter: 253
 3 | test_initialization: false
 4 | test_interval: 1000
 5 | base_lr: 0.01
 6 | momentum: 0.9
 7 | weight_decay: 0.0005
 8 | lr_policy: "step"
 9 | gamma: 0.707
10 | stepsize: 1000
11 | display: 200
12 | max_iter: 30000
13 | snapshot: 30000
14 | snapshot_prefix: "mnist_lenet_odd"
15 | solver_mode: GPU
16 | 


--------------------------------------------------------------------------------
/random_bonus/multiple_models_fusion_caffe/lenet_odd_train_val.prototxt:
--------------------------------------------------------------------------------
  1 | name: "LeNet"
  2 | layer {
  3 |   name: "mnist"
  4 |   type: "ImageData"
  5 |   top: "data"
  6 |   top: "label"
  7 |   include {
  8 |     phase: TRAIN
  9 |   }
 10 |   transform_param {
 11 |     mean_value: 128
 12 |     scale: 0.00390625
 13 |   }
 14 |   image_data_param {
 15 |     source: "train_odd.txt"
 16 |     is_color: false
 17 |     batch_size: 25
 18 |   }
 19 | }
 20 | layer {
 21 |   name: "mnist"
 22 |   type: "ImageData"
 23 |   top: "data"
 24 |   top: "label"
 25 |   include {
 26 |     phase: TEST
 27 |   }
 28 |   transform_param {
 29 |     mean_value: 128
 30 |     scale: 0.00390625
 31 |   }
 32 |   image_data_param {
 33 |     source: "val_odd.txt"
 34 |     is_color: false
 35 |     batch_size: 20
 36 |   }
 37 | }
 38 | layer {
 39 |   name: "conv1"
 40 |   type: "Convolution"
 41 |   bottom: "data"
 42 |   top: "conv1"
 43 |   param {
 44 |     lr_mult: 1
 45 |   }
 46 |   param {
 47 |     lr_mult: 2
 48 |   }
 49 |   convolution_param {
 50 |     num_output: 20
 51 |     kernel_size: 5
 52 |     stride: 1
 53 |     weight_filler {
 54 |       type: "xavier"
 55 |     }
 56 |     bias_filler {
 57 |       type: "constant"
 58 |     }
 59 |   }
 60 | }
 61 | layer {
 62 |   name: "pool1"
 63 |   type: "Pooling"
 64 |   bottom: "conv1"
 65 |   top: "pool1"
 66 |   pooling_param {
 67 |     pool: MAX
 68 |     kernel_size: 2
 69 |     stride: 2
 70 |   }
 71 | }
 72 | layer {
 73 |   name: "conv2"
 74 |   type: "Convolution"
 75 |   bottom: "pool1"
 76 |   top: "conv2"
 77 |   param {
 78 |     lr_mult: 1
 79 |   }
 80 |   param {
 81 |     lr_mult: 2
 82 |   }
 83 |   convolution_param {
 84 |     num_output: 50
 85 |     kernel_size: 5
 86 |     stride: 1
 87 |     weight_filler {
 88 |       type: "xavier"
 89 |     }
 90 |     bias_filler {
 91 |       type: "constant"
 92 |     }
 93 |   }
 94 | }
 95 | layer {
 96 |   name: "pool2"
 97 |   type: "Pooling"
 98 |   bottom: "conv2"
 99 |   top: "pool2"
100 |   pooling_param {
101 |     pool: MAX
102 |     kernel_size: 2
103 |     stride: 2
104 |   }
105 | }
106 | layer {
107 |   name: "ip1"
108 |   type: "InnerProduct"
109 |   bottom: "pool2"
110 |   top: "ip1"
111 |   param {
112 |     lr_mult: 1
113 |   }
114 |   param {
115 |     lr_mult: 2
116 |   }
117 |   inner_product_param {
118 |     num_output: 500
119 |     weight_filler {
120 |       type: "xavier"
121 |     }
122 |     bias_filler {
123 |       type: "constant"
124 |     }
125 |   }
126 | }
127 | layer {
128 |   name: "relu1"
129 |   type: "ReLU"
130 |   bottom: "ip1"
131 |   top: "ip1"
132 | }
133 | layer {
134 |   name: "ip2"
135 |   type: "InnerProduct"
136 |   bottom: "ip1"
137 |   top: "ip2"
138 |   param {
139 |     lr_mult: 1
140 |   }
141 |   param {
142 |     lr_mult: 2
143 |   }
144 |   inner_product_param {
145 |     num_output: 5
146 |     weight_filler {
147 |       type: "xavier"
148 |     }
149 |     bias_filler {
150 |       type: "constant"
151 |     }
152 |   }
153 | }
154 | layer {
155 |   name: "accuracy"
156 |   type: "Accuracy"
157 |   bottom: "ip2"
158 |   bottom: "label"
159 |   top: "accuracy"
160 |   include {
161 |     phase: TEST
162 |   }
163 | }
164 | layer {
165 |   name: "loss"
166 |   type: "SoftmaxWithLoss"
167 |   bottom: "ip2"
168 |   bottom: "label"
169 |   top: "loss"
170 | }
171 | 


--------------------------------------------------------------------------------
/random_bonus/multiple_models_fusion_caffe/rename_n_freeze_layers.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | import re
 3 | 
 4 | layer_name_regex = re.compile('name:\s*"(.*?)"')
 5 | lr_mult_regex = re.compile('lr_mult:\s*\d+\.*\d*')
 6 | 
 7 | input_filepath = sys.argv[1]
 8 | output_filepath = sys.argv[2]
 9 | prefix = sys.argv[3]
10 | 
11 | with open(input_filepath, 'r') as fr, open(output_filepath, 'w') as fw:
12 |     prototxt = fr.read()
13 |     layer_names = set(layer_name_regex.findall(prototxt))
14 |     for layer_name in layer_names:
15 |         prototxt = prototxt.replace(layer_name, '{}/{}'.format(prefix, layer_name))
16 | 
17 |     lr_mult_statements = set(lr_mult_regex.findall(prototxt))
18 |     for lr_mult_statement in lr_mult_statements:
19 |         prototxt = prototxt.replace(lr_mult_statement, 'lr_mult: 0')
20 | 
21 |     fw.write(prototxt)
22 | 


--------------------------------------------------------------------------------
/reference.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yeyun111/dlcv_for_beginners/080c7d3bbb4a68e4fb79e33231ccc666ada16dcc/reference.pdf


--------------------------------------------------------------------------------