├── LICENSE ├── README.md ├── _config.yml ├── chap10 ├── README.md ├── data │ ├── collect_data.py │ ├── downscale.py │ ├── food_augmentation.py │ ├── gen_label_list.py │ ├── image_urls │ │ ├── 000_urls.txt │ │ ├── 001_urls.txt │ │ ├── 002_urls.txt │ │ ├── 003_urls.txt │ │ ├── 004_urls.txt │ │ ├── 005_urls.txt │ │ ├── 006_urls.txt │ │ └── fig-10.7.txt │ ├── keywords.txt │ ├── link_data_augmentation.sh │ ├── remove_dups_from_list.py │ ├── remove_invalid_images.py │ └── sample_val.py ├── download_resnet10_cvgj_weights.sh ├── food_resnet_10_cvgj_deploy.prototxt ├── food_resnet_10_cvgj_finetune_val.prototxt ├── kaoya_shuizhurou_roc_auc.py ├── make_confusion_matrix.py ├── recognize_food.py ├── solver.prototxt ├── sort_kaoya_by_pred_prob.py ├── val_results.txt └── visualize_activation.py ├── chap11 ├── README.md └── prepare_voc_data.sh ├── chap12 ├── README.md ├── gen_pairwise_imglist.py ├── mnist_siamese.prototxt ├── mnist_siamese_solver.prototxt ├── mnist_siamese_train_val.prototxt └── visualize_result.py ├── chap5 ├── README.md ├── bar_n_pie_chart.py ├── fit_data.py ├── scatter_3d.py ├── surface_3d.py └── three_doors.py ├── chap6 ├── README.md ├── bbox_labeling │ ├── bbox_labeling.py │ ├── detection_anno_bbox2voc.py │ └── samples.labels └── data_augmentation │ ├── image_augmentation.py │ ├── run_augmentation.py │ └── run_augmentation_pool_map.py ├── chap7 ├── README.md ├── caffe │ ├── gen_hdf5.py │ ├── simple_mlp_test.py │ ├── simple_mlp_train.py │ ├── solver.prototxt │ ├── test.prototxt │ └── train.prototxt ├── gen_data.py └── mxnet │ └── simple_mlp.py ├── chap8 ├── README.md ├── caffe │ ├── lenet.prototxt │ ├── lenet_solver.prototxt │ ├── lenet_solver_aug.prototxt │ ├── lenet_test.prototxt │ ├── lenet_train_val.prototxt │ ├── lenet_train_val_aug.prototxt │ └── recognize_digit.py ├── data │ ├── convert_mnist.py │ ├── download_mnist.sh │ ├── gen_caffe_imglist.py │ └── gen_mxnet_imglist.py └── mxnet │ ├── benchmark_model.py │ ├── recognize_digit.py │ ├── score_model.py │ └── train_lenet5.py ├── chap9 ├── README.md ├── deploy.prototxt ├── gen_hdf5.py ├── gen_label.py ├── make_noises.py ├── predict.py ├── solver.prototxt ├── train_val.prototxt └── visualize_conv1_kernels.py ├── errata.pdf ├── random_bonus ├── README.md ├── adversarial_example_caffe │ ├── README.md │ ├── adversarial_example_demo.py │ ├── download-squeezenet-v1.0-weights.sh │ ├── little_white_dog.jpg │ ├── squeezenet-v1.0-deploy-with-force-backward.prototxt │ └── synset_words.txt ├── gan_n_cgan_2d_example │ ├── LICENSE │ ├── README.md │ ├── argparser.py │ ├── cgan_demo.py │ ├── gan_demo.py │ ├── inputs │ │ ├── U.jpg │ │ ├── Z.jpg │ │ ├── batman.jpg │ │ ├── binary.jpg │ │ ├── binary │ │ │ ├── 0.jpg │ │ │ └── 1.jpg │ │ ├── circle.jpg │ │ ├── dumbbell.jpg │ │ ├── penta.jpg │ │ ├── penta │ │ │ ├── 0.jpg │ │ │ ├── 1.jpg │ │ │ ├── 2.jpg │ │ │ ├── 3.jpg │ │ │ └── 4.jpg │ │ ├── random.jpg │ │ ├── triangle.jpg │ │ ├── vortex.jpg │ │ ├── vortex │ │ │ ├── 0.jpg │ │ │ ├── 1.jpg │ │ │ └── 2.jpg │ │ └── zig.jpg │ ├── networks.py │ ├── sampler.py │ └── visualizer.py ├── generate_mosaic_for_porno_images │ ├── README.md │ ├── clone_open_nsfw.sh │ ├── crop_n_resize.py │ ├── deploy_global_pooling.prototxt │ └── gen_mosaic.py ├── great-circle-interp │ ├── README.md │ ├── distance-experiment.py │ ├── latent-walk-great-circle.py │ ├── latent-walk-slerp-vs-lerp.py │ └── networks.py ├── image-segmentation(updating) │ ├── README.md │ ├── argparser.py │ ├── example.cfg │ ├── loss_visualizer.py │ ├── main.py │ ├── networks.py │ └── utils.py └── multiple_models_fusion_caffe │ ├── README.md │ ├── convert_mnist.py │ ├── download_mnist.sh │ ├── fuse_model.py │ ├── gen_img_list.py │ ├── lenet_even_solver.prototxt │ ├── lenet_even_train_val.prototxt │ ├── lenet_fusion_solver.prototxt │ ├── lenet_fusion_train_val.prototxt │ ├── lenet_odd_solver.prototxt │ ├── lenet_odd_train_val.prototxt │ └── rename_n_freeze_layers.py └── reference.pdf /LICENSE: -------------------------------------------------------------------------------- 1 | BSD 3-Clause License 2 | 3 | Copyright (c) 2017, frombeijingwithlove 4 | All rights reserved. 5 | 6 | Redistribution and use in source and binary forms, with or without 7 | modification, are permitted provided that the following conditions are met: 8 | 9 | * Redistributions of source code must retain the above copyright notice, this 10 | list of conditions and the following disclaimer. 11 | 12 | * Redistributions in binary form must reproduce the above copyright notice, 13 | this list of conditions and the following disclaimer in the documentation 14 | and/or other materials provided with the distribution. 15 | 16 | * Neither the name of the copyright holder nor the names of its 17 | contributors may be used to endorse or promote products derived from 18 | this software without specific prior written permission. 19 | 20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 21 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 23 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 24 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 26 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 27 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 28 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 《深度学习与计算机视觉》配套代码 2 | === 3 | 感谢大家在issue里提的各种问题,因为太久没有更新了,本打算在春节前集中精力更新一把,不过看了一圈之后有些无从下手的感觉,再加上和图书公司的编辑讨论过应该不会出第二版或者重新印刷的版本,所以决定放弃…… 4 | 5 | 我自己还发现的一个错误是关于特征值那块,SVD并不是用于不正定矩阵的,而是用于不对称矩阵的,详见更新后的知乎回答: 6 | https://www.zhihu.com/question/20507061/answer/120540926 7 | 其他部分的勘误要么已经在勘误.pdf里,要么在issue里大家的讨论基本上是正确的 8 | 9 | 本repo预计不会再更新(坦白讲本书后半部分内容已过时),如实在有问题请给我本人github私信:yeyun11 10 | 11 | 祝各位春节快乐,2020年除夕 12 | 13 | _ 14 | === 15 | 16 | ![封面](https://raw.githubusercontent.com/frombeijingwithlove/dlcv_book_pretrained_caffe_models/master/fm.jpg) 17 | 18 | 原名《深度学习与计算机视觉:实例入门》,请注意:**这本书定位是入门书**。 19 | 20 | 代码[点这里](https://github.com/frombeijingwithlove/dlcv_for_beginners)。所有彩色图表电子版下载[点这里](https://github.com/frombeijingwithlove/dlcv_book_pretrained_caffe_models/tree/master/figs_n_plots),第五章和第六章的彩色图表参见在线版:[第五章上](https://zhuanlan.zhihu.com/p/24162430),[第五章下](https://zhuanlan.zhihu.com/p/24309547),[第六章](https://zhuanlan.zhihu.com/p/24425116)。 21 | 22 | 因为某些我无法理解的原因,书中英文被出版社要求强行翻译,最后:1)部分英文不同程度的被翻译成了中文,2)导致英文文献占大部分的文献列表未能放到书中。引用文献列表[点这里](https://github.com/frombeijingwithlove/dlcv_for_beginners/blob/master/reference.pdf)。 23 | 24 | 内容错误请到[这里](https://github.com/frombeijingwithlove/dlcv_for_beginners/issues)提出。勘误表点[这里](https://github.com/frombeijingwithlove/dlcv_for_beginners/blob/master/errata.pdf)。 25 | 26 | 购买链接:[京东](https://item.jd.com/12152559.html),[亚马逊](https://www.amazon.cn/gp/product/B074JWSF99),[当当](http://product.dangdang.com/25138676.html) 27 | 28 | ## 代码快速指引 29 | [第五章:numpy、matplotlib可视化的例子](https://github.com/frombeijingwithlove/dlcv_for_beginners/tree/master/chap5) 30 | [第六章:物体检测标注小工具和本地数据增强小工具](https://github.com/frombeijingwithlove/dlcv_for_beginners/tree/master/chap6) 31 | [第七章:二维平面分类,分别基于Caffe和MXNet](https://github.com/frombeijingwithlove/dlcv_for_beginners/tree/master/chap7) 32 | [第八章:MNIST分类,分别基于Caffe和MXNet](https://github.com/frombeijingwithlove/dlcv_for_beginners/tree/master/chap8) 33 | [第九章:基于Caffe回归图像混乱程度,及卷积核可视化](https://github.com/frombeijingwithlove/dlcv_for_beginners/tree/master/chap9) 34 | [第十章:从ImageNet预训练模型进行迁移学习美食分类模型,混淆矩阵,ROC曲线绘制及模型类别响应图可视化,基于Caffe](https://github.com/frombeijingwithlove/dlcv_for_beginners/tree/master/chap10) 35 | [第十二章:用MNIST训练Siamese网络,t-SNE可视化,基于Caffe](https://github.com/frombeijingwithlove/dlcv_for_beginners/tree/master/chap12) 36 | [书中未包含杂七杂八:包括制造对抗样本(Caffe)、二维GAN及训练过程可视化(PyTorch)、给色情图片自动打马赛克(Caffe)、模型融合(Caffe)、图像分割(PyTorch)](https://github.com/frombeijingwithlove/dlcv_for_beginners/tree/master/random_bonus) 37 | [模型剪枝(PyTorch)](https://github.com/yeyun11/pytorch-network-slimming) 38 | -------------------------------------------------------------------------------- /_config.yml: -------------------------------------------------------------------------------- 1 | theme: jekyll-theme-minimal -------------------------------------------------------------------------------- /chap10/README.md: -------------------------------------------------------------------------------- 1 | ## 第十章 迁移学习和模型微调 2 | 3 | 从搜索引擎上下载关键词对应的图片,并利用模型微调进行训练,以及最后的模型评估,分析和可视化。 4 | 5 | 具体用法参考本书第十章内容。 6 | 7 | 书中例子对应的预训练模型可以在下面地址下载: 8 | https://github.com/frombeijingwithlove/dlcv_book_pretrained_caffe_models/blob/master/food_resnet-10_iter_10000.caffemodel 9 | 或 10 | http://pan.baidu.com/s/1jHRLsLw -------------------------------------------------------------------------------- /chap10/data/collect_data.py: -------------------------------------------------------------------------------- 1 | import os 2 | import re 3 | import urllib 4 | from multiprocessing import Process 5 | 6 | SUPPORTED_FORMATS = ['jpg', 'png', 'jpeg'] 7 | URL_TEMPLATE = r'http://image.b***u.com/search/flip?tn=b***uimage&ie=utf-8&word={keyword}&pn={index}' 8 | 9 | def download_images_from_b***u(dir_name, keyword, start_index, end_index): 10 | index = start_index 11 | while index < end_index: 12 | url = URL_TEMPLATE.format(keyword=keyword, index=index) 13 | try: 14 | html_text = urllib.urlopen(url).read().decode('utf-8', 'ignore') 15 | image_urls = re.findall(r'"objURL":"(.*?)"', html_text) 16 | if not image_urls: 17 | print('Cannot retrieve anymore image urls \nStopping ...'.format(url)) 18 | break 19 | except IOError as e: 20 | print(e) 21 | print('Cannot open {}. \nStopping ...'.format(url)) 22 | break 23 | 24 | downloaded_urls = [] 25 | for url in image_urls: 26 | filename = url.split('/')[-1] 27 | ext = filename[filename.rfind('.')+1:] 28 | if ext.lower() not in SUPPORTED_FORMATS: 29 | index += 1 30 | continue 31 | filename = '{}/{:0>6d}.{}'.format(dir_name, index, ext) 32 | cmd = 'wget "{}" -t 3 -T 5 -O {}'.format(url, filename) 33 | os.system(cmd) 34 | 35 | if os.path.exists(filename) and os.path.getsize(filename) > 1024: 36 | index_url = '{:0>6d},{}'.format(index, url) 37 | downloaded_urls.append(index_url) 38 | else: 39 | os.system('rm {}'.format(filename)) 40 | 41 | index += 1 42 | if index >= end_index: 43 | break 44 | 45 | with open('{}_urls.txt'.format(dir_name), 'a') as furls: 46 | urls_text = '{}\n'.format('\n'.join(downloaded_urls)) 47 | if len(urls_text) > 11: 48 | furls.write(urls_text) 49 | 50 | def download_images(keywords, num_per_kw, procs_per_kw): 51 | args_list = [] 52 | for class_id, keyword in enumerate(keywords): 53 | dir_name = '{:0>3d}'.format(class_id) 54 | os.system('mkdir -p {}'.format(dir_name)) 55 | num_per_proc = int(round(float(num_per_kw/procs_per_kw))) 56 | for i in range(procs_per_kw): 57 | start_index = i * num_per_proc 58 | end_index = start_index + num_per_proc - 1 59 | args_list.append((dir_name, keyword, start_index, end_index)) 60 | 61 | processes = [Process(target=download_images_from_b***u, args=x) for x in args_list] 62 | 63 | print('Starting to download images with {} processes ...'.format(len(processes))) 64 | 65 | for p in processes: 66 | p.start() 67 | 68 | for p in processes: 69 | p.join() 70 | 71 | print('Done!') 72 | 73 | if __name__ == "__main__": 74 | with open('keywords.txt', 'rb') as f: 75 | foods = f.read().split() 76 | download_images(foods, 2000, 3) 77 | -------------------------------------------------------------------------------- /chap10/data/downscale.py: -------------------------------------------------------------------------------- 1 | import os 2 | import cv2 3 | import sys 4 | 5 | input_path = sys.argv[1].rstrip(os.sep) 6 | target_short_edge = int(sys.argv[2]) 7 | 8 | for root, dirs, files in os.walk(input_path): 9 | print('scanning {} ...'.format(root)) 10 | for filename in files: 11 | filepath = os.sep.join([root, filename]) 12 | 13 | img = cv2.imread(filepath) 14 | h, w = img.shape[:2] 15 | short_edge = min(w, h) 16 | 17 | if short_edge > target_short_edge: 18 | scale = float(target_short_edge) / float(short_edge) 19 | new_w = int(round(w*scale)) 20 | new_h = int(round(h*scale)) 21 | print('Down sampling {} from {}x{} to {}x{} ...'.format( 22 | filepath, w, h, new_w, new_h 23 | )) 24 | img = cv2.resize(img, (new_w, new_h)) 25 | cv2.imwrite(filepath, img) 26 | 27 | print('Done!') 28 | -------------------------------------------------------------------------------- /chap10/data/food_augmentation.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | n_total = 3000 4 | 5 | class_dirs = os.listdir('train') 6 | 7 | for class_dir in class_dirs: 8 | src_path = 'train/{}'.format(class_dir) 9 | n_samples = len(os.listdir(src_path)) 10 | n_aug = n_total - n_samples 11 | cmd = 'python run_augmentation.py {} temp {}'.format(src_path, n_aug) 12 | os.system(cmd) 13 | cmd = 'mv temp/* {}'.format(src_path) 14 | os.system(cmd) 15 | 16 | os.system('rm -r temp') 17 | -------------------------------------------------------------------------------- /chap10/data/gen_label_list.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | 4 | dataset = sys.argv[1].rstrip(os.sep) 5 | 6 | class_dirs = os.listdir(dataset) 7 | 8 | with open('{}.txt'.format(dataset), 'w') as f: 9 | for class_dir in class_dirs: 10 | class_path = os.sep.join([dataset, class_dir]) 11 | label = int(class_dir) 12 | lines = ['{}/{} {}'.format(class_path, x, label) for x in os.listdir(class_path)] 13 | f.write('\n'.join(lines) + '\n') 14 | -------------------------------------------------------------------------------- /chap10/data/image_urls/fig-10.7.txt: -------------------------------------------------------------------------------- 1 | http://www.seelvyou.com/uploadfile/jingqu/beijing/beijing33.jpg 2 | http://pic51.huitu.com/res/20160201/859521_20160201110841329500_1.jpg 3 | http://www.wazsjg.com/image-pic/bd729074.jpg.jpg 4 | -------------------------------------------------------------------------------- /chap10/data/keywords.txt: -------------------------------------------------------------------------------- 1 | 烤鸭 2 | 羊肉串 3 | 水煮肉 4 | 鸡汤 5 | 麻小 6 | 面条 7 | 包子 8 | -------------------------------------------------------------------------------- /chap10/data/link_data_augmentation.sh: -------------------------------------------------------------------------------- 1 | ln -s ../../chap6/data_augmentation/run_augmentation.py run_augmentation.py 2 | ln -s ../../chap6/data_augmentation/image_augmentation.py image_augmentation.py 3 | 4 | -------------------------------------------------------------------------------- /chap10/data/remove_dups_from_list.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | 4 | dup_list = sys.argv[1] 5 | 6 | with open(dup_list, 'r') as f: 7 | lines = f.readlines() 8 | for line in lines: 9 | dups = line.split() 10 | print('Removing duplicates of {}'.format(dups[0])) 11 | for dup in dups[1:]: 12 | cmd = 'rm {}'.format(dup) 13 | os.system(cmd) 14 | -------------------------------------------------------------------------------- /chap10/data/remove_invalid_images.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import cv2 4 | from collect_data import SUPPORTED_FORMATS 5 | 6 | input_path = sys.argv[1] 7 | 8 | for root, dirs, files in os.walk(input_path): 9 | for filename in files: 10 | ext = filename[filename.rfind('.')+1:].lower() 11 | if ext not in SUPPORTED_FORMATS: 12 | continue 13 | filepath = os.sep.join([root, filename]) 14 | if cv2.imread(filepath) is None: 15 | os.system('rm {}'.format(filepath)) 16 | print('{} is not a valid image file. Deleted!'.format(filepath)) 17 | -------------------------------------------------------------------------------- /chap10/data/sample_val.py: -------------------------------------------------------------------------------- 1 | import os 2 | import random 3 | 4 | N = 300 5 | 6 | os.system('mkdir -p val') 7 | class_dirs = os.listdir('train') 8 | 9 | for class_dir in class_dirs: 10 | os.system('mkdir -p val/{}'.format(class_dir)) 11 | root = 'train/{}'.format(class_dir) 12 | print('Sampling validation set with {} images from {} ...'.format(N, root)) 13 | filenames = os.listdir(root) 14 | random.shuffle(filenames) 15 | val_filenames = filenames[:N] 16 | for filename in val_filenames: 17 | src_filepath = os.sep.join([root, filename]) 18 | dst_filepath = os.sep.join(['val', class_dir, filename]) 19 | cmd = 'mv {} {}'.format(src_filepath, dst_filepath) 20 | os.system(cmd) 21 | -------------------------------------------------------------------------------- /chap10/download_resnet10_cvgj_weights.sh: -------------------------------------------------------------------------------- 1 | wget https://upload.uni-jena.de/data/58493041de6f79.63214979/resnet10_cvgj_iter_320000.caffemodel 2 | 3 | -------------------------------------------------------------------------------- /chap10/kaoya_shuizhurou_roc_auc.py: -------------------------------------------------------------------------------- 1 | import matplotlib.pyplot as plt 2 | from sklearn.metrics import confusion_matrix, roc_curve, auc 3 | 4 | result_filepath = 'val_results.txt' 5 | 6 | # the index of ky & szr are 0 and 2, respectively 7 | is_ky = [] 8 | pred_ky = [] 9 | is_szr = [] 10 | pred_szr = [] 11 | ky_scores = [] 12 | szr_scores = [] 13 | with open(result_filepath, 'r') as f: 14 | lines = f.readlines() 15 | for line in lines: 16 | tokens = line.split() 17 | true_label = int(tokens[1]) 18 | pred_label = int(tokens[2]) 19 | ky_prob = float(tokens[3]) 20 | szr_prob = float(tokens[5]) 21 | 22 | is_ky.append(1 if true_label == 0 else 0) 23 | pred_ky.append(1 if pred_label == 0 else 0) 24 | ky_scores.append(ky_prob) 25 | 26 | is_szr.append(1 if true_label == 2 else 0) 27 | szr_scores.append(szr_prob) 28 | 29 | ky_cnf_mat = confusion_matrix(is_ky, pred_ky, labels=[1, 0]) 30 | print(ky_cnf_mat) 31 | 32 | ky_fpr, ky_tpr, ky_ths = roc_curve(is_ky, ky_scores) 33 | ky_auc = auc(ky_fpr, ky_tpr) 34 | 35 | szr_fpr, szr_tpr, szr_ths = roc_curve(is_szr, szr_scores) 36 | szr_auc = auc(szr_fpr, szr_tpr) 37 | 38 | plt.plot(ky_fpr, ky_tpr, 'k--', lw=2, 39 | label='Kao Ya ROC curve (auc = {:.2f})'.format(ky_auc)) 40 | plt.plot(szr_fpr, szr_tpr, 'b-.', lw=2, 41 | label='Shui Zhu Rou ROC curve (auc = {:.2f})'.format(szr_auc)) 42 | plt.plot([0, 1], [0, 1], 'k', lw=1) 43 | plt.plot([0, 0, 1], [0, 1, 1], 'k:', lw=2) 44 | plt.xlim([-0.02, 1.0]) 45 | plt.ylim([0.0, 1.02]) 46 | plt.xlabel('False Positive Rate', fontsize=16) 47 | plt.ylabel('True Positive Rate', fontsize=16) 48 | plt.title('Receiver operating characteristic example') 49 | plt.legend(loc="lower right") 50 | plt.show() 51 | -------------------------------------------------------------------------------- /chap10/make_confusion_matrix.py: -------------------------------------------------------------------------------- 1 | import itertools 2 | import numpy as np 3 | import matplotlib.pyplot as plt 4 | from sklearn.metrics import confusion_matrix 5 | 6 | def plot_confusion_matrix(cm, classes, 7 | normalize=False, 8 | title='Confusion matrix', 9 | cmap=plt.cm.Blues): 10 | 11 | plt.imshow(cm, interpolation='nearest', cmap=cmap) 12 | plt.title(title) 13 | plt.colorbar() 14 | tick_marks = np.arange(len(classes)) 15 | plt.xticks(tick_marks, classes, rotation=45) 16 | plt.yticks(tick_marks, classes) 17 | 18 | if normalize: 19 | cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis] 20 | print("Normalized confusion matrix") 21 | else: 22 | print('Confusion matrix, without normalization') 23 | 24 | print(cm) 25 | 26 | thresh = cm.max() / 2. 27 | for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])): 28 | plt.text(j, i, cm[i, j], 29 | horizontalalignment="center", 30 | color="white" if cm[i, j] > thresh else "black") 31 | 32 | plt.tight_layout() 33 | plt.ylabel('True label') 34 | plt.xlabel('Predicted label') 35 | 36 | result_filepath = 'val_results.txt' 37 | 38 | true_labels = [] 39 | pred_labels = [] 40 | n_correct = 0 41 | with open(result_filepath, 'r') as f: 42 | lines = f.readlines() 43 | for line in lines: 44 | tokens = line.split() 45 | true_label = int(tokens[1]) 46 | pred_label = int(tokens[2]) 47 | true_labels.append(true_label) 48 | pred_labels.append(pred_label) 49 | n_correct += 1 if true_label == pred_label else 0 50 | 51 | print('Accuracy = {:.2f}%'.format(float(n_correct)/float(len(true_labels))*100)) 52 | cnf_mat = confusion_matrix(true_labels, pred_labels) 53 | foods = ['kaoya', 'yangrouchuan', 'shuizhurou', 'jitang', 'maxiao', 'miantiao', 'baozi'] 54 | plot_confusion_matrix(cnf_mat, classes=foods) 55 | plt.show() 56 | -------------------------------------------------------------------------------- /chap10/recognize_food.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import numpy as np 3 | sys.path.append('/path/to/caffe/python') 4 | import caffe 5 | 6 | WEIGHTS_FILE = 'food_resnet-10_iter_10000.caffemodel' 7 | DEPLOY_FILE = 'food_resnet_10_cvgj_deploy.prototxt' 8 | 9 | #caffe.set_mode_cpu() 10 | net = caffe.Net(DEPLOY_FILE, WEIGHTS_FILE, caffe.TEST) 11 | 12 | transformer = caffe.io.Transformer({'data': net.blobs['data'].data.shape}) 13 | transformer.set_transpose('data', (2, 0, 1)) 14 | transformer.set_raw_scale('data', 255) 15 | transformer.set_channel_swap('data', (2, 1, 0)) 16 | 17 | image_list = sys.argv[1] 18 | result_list = '{}_results.txt'.format(image_list[:image_list.rfind('.')]) 19 | 20 | foods = open('/path/to/keywords.txt', 'rb').read().split() 21 | with open(image_list, 'r') as f, open(result_list, 'w') as f_ret: 22 | for line in f.readlines(): 23 | filepath, label = line.split() 24 | label = int(label) 25 | image = caffe.io.load_image(filepath) 26 | transformed_image = transformer.preprocess('data', image) 27 | net.blobs['data'].data[...] = transformed_image 28 | 29 | output = net.forward() 30 | probs = output['prob'][0] 31 | pred = np.argmax(probs) 32 | 33 | print('{}, predicted: {}, true: {}'.format(filepath, foods[pred], foods[label])) 34 | result_line = '{} {} {} {}\n'.format(filepath, label, pred, ' '.join([str(x) for x in probs])) 35 | f_ret.write(result_line) 36 | -------------------------------------------------------------------------------- /chap10/solver.prototxt: -------------------------------------------------------------------------------- 1 | net: "food_resnet_10_cvgj_finetune_val.prototxt" 2 | test_iter: 300 3 | test_interval: 1000 4 | base_lr: 0.1 5 | display: 100 6 | max_iter: 20000 7 | lr_policy: "fixed" 8 | momentum: 0.9 9 | delta: 1e-8 10 | weight_decay: 0.0001 11 | snapshot: 10000 12 | snapshot_prefix: "food_resnet-10" 13 | test_initialization: false 14 | solver_mode: GPU 15 | type: "AdaDelta" 16 | -------------------------------------------------------------------------------- /chap10/sort_kaoya_by_pred_prob.py: -------------------------------------------------------------------------------- 1 | from operator import itemgetter 2 | import numpy as np 3 | import matplotlib.pyplot as plt 4 | from sklearn.metrics import precision_recall_curve, average_precision_score 5 | 6 | result_filepath = 'val_results.txt' 7 | 8 | ky_probs = [] 9 | with open(result_filepath, 'r') as f: 10 | lines = f.readlines() 11 | for line in lines: 12 | tokens = line.split() 13 | true_label = int(tokens[1]) 14 | is_ky = 1 if true_label == 0 else 0 15 | ky_prob = float(tokens[3]) 16 | ky_probs.append([is_ky, ky_prob]) 17 | 18 | ky_probs_sorted = np.array(sorted(ky_probs, key=itemgetter(1), reverse=True)) 19 | for is_ky, ky_prob in ky_probs_sorted: 20 | print('{:.0f} {:.6f}'.format(is_ky, ky_prob)) 21 | 22 | labels = ky_probs_sorted[:, 0] 23 | probs = ky_probs_sorted[:, 1] 24 | 25 | precision, recall, ths = precision_recall_curve(labels, probs) 26 | ap = average_precision_score(labels, probs) 27 | 28 | plt.figure('Kao Ya Precision-Recall Curve') 29 | plt.plot(recall, precision, 'k', lw=2, label='Kao Ya') 30 | plt.xlabel('Recall', fontsize=16) 31 | plt.ylabel('Precision', fontsize=16) 32 | plt.ylim([0.0, 1.05]) 33 | plt.xlim([0.0, 1.0]) 34 | plt.title('Precision-Recall Curve: Average Precision={:.4f}'.format(ap)) 35 | plt.legend(loc="lower left") 36 | plt.show() 37 | 38 | -------------------------------------------------------------------------------- /chap10/visualize_activation.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import numpy as np 3 | import matplotlib.pyplot as plt 4 | import cv2 5 | sys.path.append('/path/to/caffe/python') 6 | import caffe 7 | 8 | WEIGHTS_FILE = 'food_resnet-10_iter_10000.caffemodel' 9 | DEPLOY_FILE = 'food_resnet_10_cvgj_deploy.prototxt' 10 | FEATURE_MAPS = 'layer_512_1_sum' 11 | FC_LAYER = 'fc_food' 12 | 13 | #caffe.set_mode_cpu() 14 | net = caffe.Net(DEPLOY_FILE, WEIGHTS_FILE, caffe.TEST) 15 | 16 | transformer = caffe.io.Transformer({'data': net.blobs['data'].data.shape}) 17 | transformer.set_transpose('data', (2, 0, 1)) 18 | transformer.set_raw_scale('data', 255) 19 | transformer.set_channel_swap('data', (2, 1, 0)) 20 | 21 | image_list = sys.argv[1] 22 | 23 | cmap = plt.get_cmap('jet') 24 | with open(image_list, 'r') as f: 25 | for line in f.readlines(): 26 | filepath = line.split()[0] 27 | image = caffe.io.load_image(filepath) 28 | # uncomment the following 2 lines to forward with 29 | # original image size and corresponding activation maps 30 | #transformer.inputs['data'] = (1, 3, image.shape[0], image.shape[1]) 31 | #net.blobs['data'].reshape(1, 3, image.shape[0], image.shape[1]) 32 | transformed_image = transformer.preprocess('data', image) 33 | net.blobs['data'].data[...] = transformed_image 34 | 35 | output = net.forward() 36 | pred = np.argmax(output['prob'][0]) 37 | 38 | feature_maps = net.blobs[FEATURE_MAPS].data[0] 39 | fc_params = net.params[FC_LAYER] 40 | fc_w = fc_params[0].data[pred] 41 | #fc_b = fc_params[1].data[pred] 42 | 43 | activation_map = np.zeros_like(feature_maps[0]) 44 | for feature_map, w in zip(feature_maps, fc_w): 45 | activation_map += feature_map * w 46 | #activation_map += fc_b 47 | 48 | # Visualize as 49 | # left: original image 50 | # middle: activation map 51 | # right: original image overlaid with activation map in 'jet' colormap 52 | image = np.round(image*255).astype(np.uint8) 53 | h, w = image.shape[:2] 54 | activation_map = cv2.resize(activation_map, (w, h), interpolation=cv2.INTER_CUBIC) 55 | activation_map -= activation_map.min() 56 | activation_map /= activation_map.max() 57 | activation_color_map = np.round(cmap(activation_map)[:, :, :3]*255).astype(np.uint8) 58 | activation_map = np.stack(np.round([activation_map*255]*3).astype(np.uint8)) 59 | activation_map = activation_map.transpose(1, 2, 0) 60 | overlay_img = image/2 + activation_color_map/2 61 | vis_img = np.hstack([image, activation_map, overlay_img]) 62 | vis_img = cv2.cvtColor(vis_img, cv2.COLOR_RGB2BGR) 63 | 64 | cv2.imshow('Activation Map Visualization', vis_img) 65 | cv2.waitKey() 66 | -------------------------------------------------------------------------------- /chap11/README.md: -------------------------------------------------------------------------------- 1 | # 第十一章 目标检测 2 | 3 | ## 主要文献链接 4 | 5 | ### 11.1.1 滑窗(Sliding Window)法 6 | Mitosis Detection in Breast Cancer Histology Images 7 | with Deep Neural Networks 8 | http://people.idsia.ch/~ciresan/data/miccai2013.pdf 9 | 10 | 11 | ### 11.1.3 Selective Search和R-CNN 12 | Selective Search for Object Recognition 13 | https://ivi.fnwi.uva.nl/isis/publications/2013/UijlingsIJCV2013/UijlingsIJCV2013.pdf 14 | 15 | R-CNN 16 | http://www.cv-foundation.org/openaccess/content_cvpr_2014/papers/Girshick_Rich_Feature_Hierarchies_2014_CVPR_paper.pdf 17 | 18 | ### 11.1.4 SPP, ROI Pooling和Fast R-CNN 19 | 20 | SPP 21 | https://arxiv.org/pdf/1406.4729v1.pdf 22 | 23 | Fast R-CNN 24 | http://www.cv-foundation.org/openaccess/content_iccv_2015/papers/Girshick_Fast_R-CNN_ICCV_2015_paper.pdf 25 | 26 | ### 11.1.5 RPN和Faster R-CNN 27 | Faster R-CNN: Towards Real-Time Object Detection with Region Proposal Networks 28 | http://papers.nips.cc/paper/5638-faster-r-cnn-towards-real-time-object-detection-with-region-proposal-networks.pdf 29 | 30 | ### 11.1.6 YOLO/SSD 31 | You Only Look Once: Unified, Real-Time Object Detection 32 | http://www.cv-foundation.org/openaccess/content_cvpr_2016/papers/Redmon_You_Only_Look_CVPR_2016_paper.pdf 33 | 34 | SSD: Single Shot MultiBox Detector 35 | https://arxiv.org/pdf/1512.02325v5.pdf 36 | 37 | ## 预训练模型下载链接 38 | vgg16_reduced.zip & ssd_300_vgg16_reduced_voc0712_trainval.zip 39 | http://pan.baidu.com/s/1sli1TE1 40 | -------------------------------------------------------------------------------- /chap11/prepare_voc_data.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | wget http://host.robots.ox.ac.uk/pascal/VOC/voc2012/VOCtrainval_11-May-2012.tar 3 | wget http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCtrainval_06-Nov-2007.tar 4 | wget http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCtest_06-Nov-2007.tar 5 | tar -xvf VOCtrainval_11-May-2012.tar 6 | tar -xvf VOCtrainval_06-Nov-2007.tar 7 | tar -xvf VOCtest_06-Nov-2007.tar 8 | -------------------------------------------------------------------------------- /chap12/README.md: -------------------------------------------------------------------------------- 1 | ## Metric Learning with Siamese Network 2 | ### step 1 3 | 按照第八章准备MNIST数据的方法生成图片,然后执行 4 | > ln -s /path/to/mnist mnist 5 | 6 | 在目录下链接mnist图片所在的目录 7 | 8 | ### step 2 9 | > python gen_pairwise_imglist.py 10 | 11 | 生成成对图片的列表 12 | 13 | ### step 3 14 | > /path/to/caffe/build/tools/convert_imageset ./ train.txt train_lmdb --gray 15 | > /path/to/caffe/build/tools/convert_imageset ./ train_p.txt train_p_lmdb --gray 16 | > /path/to/caffe/build/tools/convert_imageset ./ val.txt val_lmdb --gray 17 | > /path/to/caffe/build/tools/convert_imageset ./ val_p.txt val_p_lmdb --gray 18 | 19 | 生成lmdb 20 | 21 | ### step 4 22 | > /path/to/caffe/build/tools/caffe train -solver mnist_siamese_solver.prototxt -log_dir ./ 23 | 24 | 训练模型 25 | 26 | ### step 5 27 | 28 | > python visualize_result.py 29 | 30 | 进行结果可视化 31 | 32 | ## 预训练模型下载链接 33 | https://github.com/frombeijingwithlove/dlcv_book_pretrained_caffe_models/blob/master/mnist_siamese_iter_20000.caffemodel 34 | 或 35 | http://pan.baidu.com/s/1qYk5MDQ -------------------------------------------------------------------------------- /chap12/gen_pairwise_imglist.py: -------------------------------------------------------------------------------- 1 | import os 2 | import random 3 | import re 4 | 5 | train_dir = 'mnist/train' 6 | val_dir = 'mnist/val' 7 | n_train = 100000 8 | n_val = 10000 9 | 10 | pattern = re.compile('\d+_(\d)\.jpg') 11 | 12 | for img_dir, n_pairs in zip([train_dir, val_dir], [n_train, n_val]): 13 | imglist = os.listdir(img_dir) 14 | n_samples = len(imglist) 15 | dataset = img_dir[img_dir.rfind(os.sep)+1:] 16 | with open('{}.txt'.format(dataset), 'w') as f, \ 17 | open('{}_p.txt'.format(dataset), 'w') as f_p: 18 | for i in range(n_pairs): 19 | filename = imglist[random.randint(0, n_samples-1)] 20 | digit = pattern.findall(filename)[0] 21 | filepath = os.sep.join([img_dir, filename]) 22 | 23 | filename_p = imglist[random.randint(0, n_samples-1)] 24 | digit_p = pattern.findall(filename_p)[0] 25 | filepath_p = os.sep.join([img_dir, filename_p]) 26 | 27 | label = 1 if digit == digit_p else 0 28 | 29 | f.write('{} {}\n'.format(filepath, label)) 30 | f_p.write('{} {}\n'.format(filepath_p, label)) 31 | -------------------------------------------------------------------------------- /chap12/mnist_siamese.prototxt: -------------------------------------------------------------------------------- 1 | name: "mnist_siamese" 2 | layer { 3 | name: "data" 4 | type: "Input" 5 | top: "data" 6 | input_param { 7 | shape: { dim: 10000 dim: 1 dim: 28 dim: 28 } 8 | } 9 | } 10 | layer { 11 | name: "conv1" 12 | type: "Convolution" 13 | bottom: "data" 14 | top: "conv1" 15 | param { 16 | lr_mult: 1 17 | } 18 | param { 19 | lr_mult: 2 20 | } 21 | convolution_param { 22 | num_output: 20 23 | kernel_size: 5 24 | stride: 1 25 | } 26 | } 27 | layer { 28 | name: "pool1" 29 | type: "Pooling" 30 | bottom: "conv1" 31 | top: "pool1" 32 | pooling_param { 33 | pool: MAX 34 | kernel_size: 2 35 | stride: 2 36 | } 37 | } 38 | layer { 39 | name: "conv2" 40 | type: "Convolution" 41 | bottom: "pool1" 42 | top: "conv2" 43 | param { 44 | lr_mult: 1 45 | } 46 | param { 47 | lr_mult: 2 48 | } 49 | convolution_param { 50 | num_output: 50 51 | kernel_size: 5 52 | stride: 1 53 | } 54 | } 55 | layer { 56 | name: "pool2" 57 | type: "Pooling" 58 | bottom: "conv2" 59 | top: "pool2" 60 | pooling_param { 61 | pool: MAX 62 | kernel_size: 2 63 | stride: 2 64 | } 65 | } 66 | layer { 67 | name: "ip1" 68 | type: "InnerProduct" 69 | bottom: "pool2" 70 | top: "ip1" 71 | param { 72 | lr_mult: 1 73 | } 74 | param { 75 | lr_mult: 2 76 | } 77 | inner_product_param { 78 | num_output: 500 79 | } 80 | } 81 | layer { 82 | name: "relu1" 83 | type: "ReLU" 84 | bottom: "ip1" 85 | top: "ip1" 86 | } 87 | layer { 88 | name: "ip2" 89 | type: "InnerProduct" 90 | bottom: "ip1" 91 | top: "ip2" 92 | param { 93 | lr_mult: 1 94 | } 95 | param { 96 | lr_mult: 2 97 | } 98 | inner_product_param { 99 | num_output: 10 100 | } 101 | } 102 | layer { 103 | name: "feat" 104 | type: "InnerProduct" 105 | bottom: "ip2" 106 | top: "feat" 107 | param { 108 | lr_mult: 1 109 | } 110 | param { 111 | lr_mult: 2 112 | } 113 | inner_product_param { 114 | num_output: 2 115 | } 116 | } 117 | -------------------------------------------------------------------------------- /chap12/mnist_siamese_solver.prototxt: -------------------------------------------------------------------------------- 1 | # The train/test net protocol buffer definition 2 | net: "mnist_siamese_train_val.prototxt" 3 | # test_iter specifies how many forward passes the test should carry out. 4 | # In the case of MNIST, we have test batch size 100 and 100 test iterations, 5 | # covering the full 10,000 testing images. 6 | test_iter: 100 7 | # Carry out testing every 500 training iterations. 8 | test_interval: 500 9 | # The base learning rate, momentum and the weight decay of the network. 10 | base_lr: 0.01 11 | momentum: 0.9 12 | weight_decay: 0.0000 13 | # The learning rate policy 14 | lr_policy: "inv" 15 | gamma: 0.0001 16 | power: 0.75 17 | # Display every 100 iterations 18 | display: 100 19 | # The maximum number of iterations 20 | max_iter: 50000 21 | # snapshot intermediate results 22 | snapshot: 5000 23 | snapshot_prefix: "mnist_siamese" 24 | # solver mode: CPU or GPU 25 | solver_mode: GPU 26 | -------------------------------------------------------------------------------- /chap12/mnist_siamese_train_val.prototxt: -------------------------------------------------------------------------------- 1 | name: "mnist_siamese_train_test" 2 | layer { 3 | name: "mnist" 4 | type: "Data" 5 | top: "data" 6 | top: "label" 7 | include { 8 | phase: TRAIN 9 | } 10 | transform_param { 11 | mean_value: 128 12 | scale: 0.00390625 13 | } 14 | data_param { 15 | source: "train_lmdb" 16 | batch_size: 64 17 | backend: LMDB 18 | } 19 | } 20 | layer { 21 | name: "mnist_p" 22 | type: "Data" 23 | top: "data_p" 24 | include { 25 | phase: TRAIN 26 | } 27 | transform_param { 28 | mean_value: 128 29 | scale: 0.00390625 30 | } 31 | data_param { 32 | source: "train_p_lmdb" 33 | batch_size: 64 34 | backend: LMDB 35 | } 36 | } 37 | layer { 38 | name: "mnist" 39 | type: "Data" 40 | top: "data" 41 | top: "label" 42 | include { 43 | phase: TEST 44 | } 45 | transform_param { 46 | mean_value: 128 47 | scale: 0.00390625 48 | } 49 | data_param { 50 | source: "val_lmdb" 51 | batch_size: 100 52 | backend: LMDB 53 | } 54 | } 55 | layer { 56 | name: "mnist_p" 57 | type: "Data" 58 | top: "data_p" 59 | include { 60 | phase: TEST 61 | } 62 | transform_param { 63 | mean_value: 128 64 | scale: 0.00390625 65 | } 66 | data_param { 67 | source: "val_p_lmdb" 68 | batch_size: 100 69 | backend: LMDB 70 | } 71 | } 72 | layer { 73 | name: "conv1" 74 | type: "Convolution" 75 | bottom: "data" 76 | top: "conv1" 77 | param { 78 | name: "conv1_w" 79 | lr_mult: 1 80 | } 81 | param { 82 | name: "conv1_b" 83 | lr_mult: 2 84 | } 85 | convolution_param { 86 | num_output: 20 87 | kernel_size: 5 88 | stride: 1 89 | weight_filler { 90 | type: "xavier" 91 | } 92 | bias_filler { 93 | type: "constant" 94 | } 95 | } 96 | } 97 | layer { 98 | name: "pool1" 99 | type: "Pooling" 100 | bottom: "conv1" 101 | top: "pool1" 102 | pooling_param { 103 | pool: MAX 104 | kernel_size: 2 105 | stride: 2 106 | } 107 | } 108 | layer { 109 | name: "conv2" 110 | type: "Convolution" 111 | bottom: "pool1" 112 | top: "conv2" 113 | param { 114 | name: "conv2_w" 115 | lr_mult: 1 116 | } 117 | param { 118 | name: "conv2_b" 119 | lr_mult: 2 120 | } 121 | convolution_param { 122 | num_output: 50 123 | kernel_size: 5 124 | stride: 1 125 | weight_filler { 126 | type: "xavier" 127 | } 128 | bias_filler { 129 | type: "constant" 130 | } 131 | } 132 | } 133 | layer { 134 | name: "pool2" 135 | type: "Pooling" 136 | bottom: "conv2" 137 | top: "pool2" 138 | pooling_param { 139 | pool: MAX 140 | kernel_size: 2 141 | stride: 2 142 | } 143 | } 144 | layer { 145 | name: "ip1" 146 | type: "InnerProduct" 147 | bottom: "pool2" 148 | top: "ip1" 149 | param { 150 | name: "ip1_w" 151 | lr_mult: 1 152 | } 153 | param { 154 | name: "ip1_b" 155 | lr_mult: 2 156 | } 157 | inner_product_param { 158 | num_output: 500 159 | weight_filler { 160 | type: "xavier" 161 | } 162 | bias_filler { 163 | type: "constant" 164 | } 165 | } 166 | } 167 | layer { 168 | name: "relu1" 169 | type: "ReLU" 170 | bottom: "ip1" 171 | top: "ip1" 172 | } 173 | layer { 174 | name: "ip2" 175 | type: "InnerProduct" 176 | bottom: "ip1" 177 | top: "ip2" 178 | param { 179 | name: "ip2_w" 180 | lr_mult: 1 181 | } 182 | param { 183 | name: "ip2_b" 184 | lr_mult: 2 185 | } 186 | inner_product_param { 187 | num_output: 10 188 | weight_filler { 189 | type: "xavier" 190 | } 191 | bias_filler { 192 | type: "constant" 193 | } 194 | } 195 | } 196 | layer { 197 | name: "feat" 198 | type: "InnerProduct" 199 | bottom: "ip2" 200 | top: "feat" 201 | param { 202 | name: "feat_w" 203 | lr_mult: 1 204 | } 205 | param { 206 | name: "feat_b" 207 | lr_mult: 2 208 | } 209 | inner_product_param { 210 | num_output: 2 211 | weight_filler { 212 | type: "xavier" 213 | } 214 | bias_filler { 215 | type: "constant" 216 | } 217 | } 218 | } 219 | layer { 220 | name: "conv1_p" 221 | type: "Convolution" 222 | bottom: "data_p" 223 | top: "conv1_p" 224 | param { 225 | name: "conv1_w" 226 | lr_mult: 1 227 | } 228 | param { 229 | name: "conv1_b" 230 | lr_mult: 2 231 | } 232 | convolution_param { 233 | num_output: 20 234 | kernel_size: 5 235 | stride: 1 236 | weight_filler { 237 | type: "xavier" 238 | } 239 | bias_filler { 240 | type: "constant" 241 | } 242 | } 243 | } 244 | layer { 245 | name: "pool1_p" 246 | type: "Pooling" 247 | bottom: "conv1_p" 248 | top: "pool1_p" 249 | pooling_param { 250 | pool: MAX 251 | kernel_size: 2 252 | stride: 2 253 | } 254 | } 255 | layer { 256 | name: "conv2_p" 257 | type: "Convolution" 258 | bottom: "pool1_p" 259 | top: "conv2_p" 260 | param { 261 | name: "conv2_w" 262 | lr_mult: 1 263 | } 264 | param { 265 | name: "conv2_b" 266 | lr_mult: 2 267 | } 268 | convolution_param { 269 | num_output: 50 270 | kernel_size: 5 271 | stride: 1 272 | weight_filler { 273 | type: "xavier" 274 | } 275 | bias_filler { 276 | type: "constant" 277 | } 278 | } 279 | } 280 | layer { 281 | name: "pool2_p" 282 | type: "Pooling" 283 | bottom: "conv2_p" 284 | top: "pool2_p" 285 | pooling_param { 286 | pool: MAX 287 | kernel_size: 2 288 | stride: 2 289 | } 290 | } 291 | layer { 292 | name: "ip1_p" 293 | type: "InnerProduct" 294 | bottom: "pool2_p" 295 | top: "ip1_p" 296 | param { 297 | name: "ip1_w" 298 | lr_mult: 1 299 | } 300 | param { 301 | name: "ip1_b" 302 | lr_mult: 2 303 | } 304 | inner_product_param { 305 | num_output: 500 306 | weight_filler { 307 | type: "xavier" 308 | } 309 | bias_filler { 310 | type: "constant" 311 | } 312 | } 313 | } 314 | layer { 315 | name: "relu1_p" 316 | type: "ReLU" 317 | bottom: "ip1_p" 318 | top: "ip1_p" 319 | } 320 | layer { 321 | name: "ip2_p" 322 | type: "InnerProduct" 323 | bottom: "ip1_p" 324 | top: "ip2_p" 325 | param { 326 | name: "ip2_w" 327 | lr_mult: 1 328 | } 329 | param { 330 | name: "ip2_b" 331 | lr_mult: 2 332 | } 333 | inner_product_param { 334 | num_output: 10 335 | weight_filler { 336 | type: "xavier" 337 | } 338 | bias_filler { 339 | type: "constant" 340 | } 341 | } 342 | } 343 | layer { 344 | name: "feat_p" 345 | type: "InnerProduct" 346 | bottom: "ip2_p" 347 | top: "feat_p" 348 | param { 349 | name: "feat_w" 350 | lr_mult: 1 351 | } 352 | param { 353 | name: "feat_b" 354 | lr_mult: 2 355 | } 356 | inner_product_param { 357 | num_output: 2 358 | weight_filler { 359 | type: "xavier" 360 | } 361 | bias_filler { 362 | type: "constant" 363 | } 364 | } 365 | } 366 | layer { 367 | name: "loss" 368 | type: "ContrastiveLoss" 369 | bottom: "feat" 370 | bottom: "feat_p" 371 | bottom: "label" 372 | top: "loss" 373 | contrastive_loss_param { 374 | margin: 1 375 | } 376 | } 377 | -------------------------------------------------------------------------------- /chap12/visualize_result.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | sys.path.append('/path/to/caffe/python') 4 | import re 5 | import numpy as np 6 | import matplotlib.pyplot as plt 7 | from sklearn.manifold import TSNE 8 | import cv2 9 | import caffe 10 | 11 | WEIGHTS_FILE = 'mnist_siamese_iter_20000.caffemodel' 12 | DEPLOY_FILE = 'mnist_siamese.prototxt' 13 | IMG_DIR = 'mnist/test' 14 | MEAN = 128 15 | SCALE = 0.00390625 16 | 17 | caffe.set_mode_gpu() 18 | caffe.set_device(0) 19 | net = caffe.Net(DEPLOY_FILE, WEIGHTS_FILE, caffe.TEST) 20 | 21 | pattern = re.compile('\d+_(\d)\.jpg') 22 | 23 | image_list = os.listdir(IMG_DIR) 24 | n_imgs = len(image_list) 25 | 26 | net.blobs['data'].reshape(n_imgs, 1, 28, 28) 27 | 28 | labels = [] 29 | for i, filename in enumerate(image_list): 30 | digit = int(pattern.findall(filename)[0]) 31 | labels.append(digit) 32 | filepath = os.sep.join([IMG_DIR, filename]) 33 | image = cv2.imread(filepath, cv2.IMREAD_GRAYSCALE).astype(np.float) - MEAN 34 | image *= SCALE 35 | net.blobs['data'].data[i, ...] = image 36 | 37 | labels = np.array(labels) 38 | 39 | output = net.forward() 40 | feat = output['feat'] 41 | 42 | colors = ['#ff0000', '#ffff00', '#00ff00', '#00ffff', '#0000ff', 43 | '#ff00ff', '#990000', '#999900', '#009900', '#009999'] 44 | legend = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9'] 45 | 46 | plt.figure('feat') 47 | for i in range(10): 48 | plt.plot(feat[labels==i,0].flatten(), 49 | feat[labels==i,1].flatten(), 50 | '.', c=colors[i]) 51 | plt.legend(legend) 52 | 53 | plt.figure('ip2') 54 | ip2_feat = net.blobs['ip2'].data 55 | model = TSNE(n_components=2) 56 | 57 | ip2_vis_feat = model.fit_transform(ip2_feat) 58 | for i in range(10): 59 | plt.plot(ip2_vis_feat[labels==i,0].flatten(), 60 | ip2_vis_feat[labels==i,1].flatten(), 61 | '.', c=colors[i]) 62 | plt.legend(legend) 63 | 64 | plt.show() 65 | -------------------------------------------------------------------------------- /chap5/README.md: -------------------------------------------------------------------------------- 1 | ## 第5章示例代码 2 | 3 | 参考本书第5章,或在线版本: 4 | https://zhuanlan.zhihu.com/p/24162430 5 | https://zhuanlan.zhihu.com/p/24309547 6 | 7 | -------------------------------------------------------------------------------- /chap5/bar_n_pie_chart.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import matplotlib as mpl 3 | import matplotlib.pyplot as plt 4 | 5 | mpl.rcParams['axes.titlesize'] = 20 6 | mpl.rcParams['xtick.labelsize'] = 16 7 | mpl.rcParams['ytick.labelsize'] = 16 8 | mpl.rcParams['axes.labelsize'] = 16 9 | mpl.rcParams['xtick.major.size'] = 0 10 | mpl.rcParams['ytick.major.size'] = 0 11 | 12 | speed_map = { 13 | 'dog': (48, '#7199cf'), 14 | 'cat': (45, '#4fc4aa'), 15 | 'cheetah': (120, '#e1a7a2') 16 | } 17 | 18 | fig = plt.figure('Bar chart & Pie chart') 19 | 20 | ax = fig.add_subplot(121) 21 | ax.set_title('Running speed - bar chart') 22 | 23 | xticks = np.arange(3) 24 | 25 | bar_width = 0.5 26 | 27 | animals = speed_map.keys() 28 | speeds = [x[0] for x in speed_map.values()] 29 | colors = [x[1] for x in speed_map.values()] 30 | bars = ax.bar(xticks, speeds, width=bar_width, edgecolor='none') 31 | 32 | ax.set_ylabel('Speed(km/h)') 33 | ax.set_xticks(xticks+bar_width/2) 34 | ax.set_xticklabels(animals) 35 | ax.set_xlim([bar_width/2-0.5, 3-bar_width/2]) 36 | ax.set_ylim([0, 125]) 37 | 38 | for bar, color in zip(bars, colors): 39 | bar.set_color(color) 40 | 41 | ax = fig.add_subplot(122) 42 | ax.set_title('Running speed - pie chart') 43 | 44 | labels = ['{}\n{} km/h'.format(a, s) for a, s in zip(animals, speeds)] 45 | 46 | ax.pie(speeds, labels=labels, colors=colors) 47 | 48 | plt.axis('equal') 49 | plt.show() 50 | -------------------------------------------------------------------------------- /chap5/fit_data.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import matplotlib as mpl 3 | import matplotlib.pyplot as plt 4 | 5 | mpl.rcParams['xtick.labelsize'] = 24 6 | mpl.rcParams['ytick.labelsize'] = 24 7 | 8 | np.random.seed(42) 9 | 10 | x = np.linspace(0, 5, 100) 11 | y = 2*np.sin(x) + 0.3*x**2 12 | y_data = y + np.random.normal(scale=0.3, size=100) 13 | 14 | plt.figure('data') 15 | plt.plot(x, y_data, '.') 16 | 17 | plt.figure('model') 18 | plt.plot(x, y) 19 | 20 | plt.figure('data & model') 21 | plt.plot(x, y, 'k', lw=3) 22 | plt.scatter(x, y_data) 23 | 24 | plt.show() 25 | -------------------------------------------------------------------------------- /chap5/scatter_3d.py: -------------------------------------------------------------------------------- 1 | import matplotlib.pyplot as plt 2 | import numpy as np 3 | 4 | from mpl_toolkits.mplot3d import Axes3D 5 | 6 | np.random.seed(42) 7 | 8 | n_samples = 500 9 | dim = 3 10 | 11 | samples = np.random.multivariate_normal( 12 | np.zeros(dim), 13 | np.eye(dim), 14 | n_samples 15 | ) 16 | 17 | for i in range(samples.shape[0]): 18 | r = np.power(np.random.random(), 1.0/3.0) 19 | samples[i] *= r / np.linalg.norm(samples[i]) 20 | 21 | upper_samples = [] 22 | lower_samples = [] 23 | for x, y, z in samples: 24 | if z > 3*x + 2*y - 1: 25 | upper_samples.append((x, y, z)) 26 | else: 27 | lower_samples.append((x, y, z)) 28 | 29 | fig = plt.figure('3D scatter plot') 30 | ax = fig.add_subplot(111, projection='3d') 31 | 32 | uppers = np.array(upper_samples) 33 | lowers = np.array(lower_samples) 34 | 35 | ax.scatter(uppers[:, 0], uppers[:, 1], uppers[:, 2], c='r', marker='o') 36 | ax.scatter(lowers[:, 0], lowers[:, 1], lowers[:, 2], c='g', marker='^') 37 | 38 | plt.show() 39 | -------------------------------------------------------------------------------- /chap5/surface_3d.py: -------------------------------------------------------------------------------- 1 | import matplotlib.pyplot as plt 2 | import numpy as np 3 | 4 | from mpl_toolkits.mplot3d import Axes3D 5 | 6 | np.random.seed(42) 7 | 8 | n_grids = 51 9 | c = n_grids / 2 10 | nf = 2 11 | 12 | x = np.linspace(0, 1, n_grids) 13 | y = np.linspace(0, 1, n_grids) 14 | X, Y = np.meshgrid(x, y) 15 | 16 | spectrum = np.zeros((n_grids, n_grids), dtype=np.complex) 17 | noise = [np.complex(x, y) for x, y in np.random.uniform(-1,1,((2*nf+1)**2/2, 2))] 18 | noisy_block = np.concatenate((noise, [0j], np.conjugate(noise[::-1]))) 19 | 20 | spectrum[c-nf:c+nf+1, c-nf:c+nf+1] = noisy_block.reshape((2*nf+1, 2*nf+1)) 21 | Z = np.real(np.fft.ifft2(np.fft.ifftshift(spectrum))) 22 | 23 | fig = plt.figure('3D surface & wire') 24 | 25 | ax = fig.add_subplot(1, 2, 1, projection='3d') 26 | ax.plot_surface(X, Y, Z, alpha=0.7, cmap='jet', rstride=1, cstride=1, lw=0) 27 | 28 | ax = fig.add_subplot(1, 2, 2, projection='3d') 29 | ax.plot_wireframe(X, Y, Z, rstride=3, cstride=3, lw=0.5) 30 | 31 | plt.show() 32 | -------------------------------------------------------------------------------- /chap5/three_doors.py: -------------------------------------------------------------------------------- 1 | import numpy.random as random 2 | 3 | random.seed(42) 4 | 5 | n_tests = 10000 6 | 7 | winning_doors = random.randint(0, 3, n_tests) 8 | change_mind_wins = 0 9 | insist_wins = 0 10 | 11 | for winning_door in winning_doors: 12 | 13 | first_try = random.randint(0, 3) 14 | remaining_choices = [i for i in range(3) if i != first_try] 15 | wrong_choices = [i for i in range(3) if i != winning_door] 16 | 17 | if first_try in wrong_choices: 18 | wrong_choices.remove(first_try) 19 | 20 | screened_out = random.choice(wrong_choices) 21 | remaining_choices.remove(screened_out) 22 | 23 | changed_mind_try = remaining_choices[0] 24 | 25 | change_mind_wins += 1 if changed_mind_try == winning_door else 0 26 | insist_wins += 1 if first_try == winning_door else 0 27 | 28 | print( 29 | 'You win {1} out of {0} tests if you changed your mind\n' 30 | 'You win {2} out of {0} tests if you insist on the initial choice'.format( 31 | n_tests, change_mind_wins, insist_wins 32 | ) 33 | ) 34 | -------------------------------------------------------------------------------- /chap6/README.md: -------------------------------------------------------------------------------- 1 | ## 第6章示例代码 2 | 包含并行数据增加工具和物体框标注小工具。 3 | 参考本书第6章,或在线版本: 4 | https://zhuanlan.zhihu.com/p/24425116 5 | 6 | 书中的数据增加小工具使用的是手动均衡,使用Python进程池的自动均衡版本,可以参考run_augmentation_pool_map.py 7 | -------------------------------------------------------------------------------- /chap6/bbox_labeling/bbox_labeling.py: -------------------------------------------------------------------------------- 1 | import os 2 | import cv2 3 | from tkFileDialog import askdirectory 4 | from tkMessageBox import askyesno 5 | 6 | WINDOW_NAME = 'Simple Bounding Box Labeling Tool' 7 | FPS = 24 8 | SUPPOTED_FORMATS = ['jpg', 'jpeg', 'png'] 9 | DEFAULT_COLOR = {'Object': (255, 0, 0)} 10 | COLOR_GRAY = (192, 192, 192) 11 | BAR_HEIGHT = 16 12 | 13 | KEY_UP = 65362 14 | KEY_DOWN = 65364 15 | KEY_LEFT = 65361 16 | KEY_RIGHT = 65363 17 | KEY_ESC = 27 18 | KEY_DELETE = 65535 19 | KEY_EMPTY = 0 20 | 21 | get_bbox_name = '{}.bbox'.format 22 | 23 | 24 | class SimpleBBoxLabeling: 25 | 26 | def __init__(self, data_dir, fps=FPS, window_name=None): 27 | self._data_dir = data_dir 28 | self.fps = fps 29 | self.window_name = window_name if window_name else WINDOW_NAME 30 | 31 | self._pt0 = None 32 | self._pt1 = None 33 | self._drawing = False 34 | self._cur_label = None 35 | self._bboxes = [] 36 | 37 | label_path = '{}.labels'.format(self._data_dir) 38 | self.label_colors = DEFAULT_COLOR if not os.path.exists(label_path) else self.load_labels(label_path) 39 | 40 | imagefiles = [x for x in os.listdir(self._data_dir) if x[x.rfind('.') + 1:].lower() in SUPPOTED_FORMATS] 41 | labeled = [x for x in imagefiles if os.path.exists(get_bbox_name(x))] 42 | to_be_labeled = [x for x in imagefiles if x not in labeled] 43 | 44 | self._filelist = labeled + to_be_labeled 45 | self._index = len(labeled) 46 | if self._index > len(self._filelist) - 1: 47 | self._index = len(self._filelist) - 1 48 | 49 | def _mouse_ops(self, event, x, y, flags, param): 50 | 51 | if event == cv2.EVENT_LBUTTONDOWN: 52 | self._drawing = True 53 | self._pt0 = (x, y) 54 | 55 | elif event == cv2.EVENT_LBUTTONUP: 56 | self._drawing = False 57 | self._pt1 = (x, y) 58 | self._bboxes.append((self._cur_label, (self._pt0, self._pt1))) 59 | 60 | elif event == cv2.EVENT_MOUSEMOVE: 61 | self._pt1 = (x, y) 62 | 63 | elif event == cv2.EVENT_RBUTTONUP: 64 | if self._bboxes: 65 | self._bboxes.pop() 66 | 67 | def _clean_bbox(self): 68 | self._pt0 = None 69 | self._pt1 = None 70 | self._drawing = False 71 | self._bboxes = [] 72 | 73 | def _draw_bbox(self, img): 74 | 75 | h, w = img.shape[:2] 76 | canvas = cv2.copyMakeBorder(img, 0, BAR_HEIGHT, 0, 0, cv2.BORDER_CONSTANT, value=COLOR_GRAY) 77 | 78 | label_msg = '{}: {}, {}'.format(self._cur_label, self._pt0, self._pt1) \ 79 | if self._drawing \ 80 | else 'Current label: {}'.format(self._cur_label) 81 | msg = '{}/{}: {} | {}'.format(self._index + 1, len(self._filelist), self._filelist[self._index], label_msg) 82 | 83 | cv2.putText(canvas, msg, (1, h+12), 84 | cv2.FONT_HERSHEY_SIMPLEX, 85 | 0.5, (0, 0, 0), 1) 86 | for label, (bpt0, bpt1) in self._bboxes: 87 | label_color = self.label_colors[label] if label in self.label_colors else COLOR_GRAY 88 | cv2.rectangle(canvas, bpt0, bpt1, label_color, thickness=2) 89 | cv2.putText(canvas, label, (bpt0[0]+3, bpt0[1]+15), 90 | cv2.FONT_HERSHEY_SIMPLEX, 91 | 0.5, label_color, 2) 92 | if self._drawing: 93 | label_color = self.label_colors[self._cur_label] if self._cur_label in self.label_colors else COLOR_GRAY 94 | if self._pt1[0] >= self._pt0[0] and self._pt1[1] >= self._pt0[1]: 95 | cv2.rectangle(canvas, self._pt0, self._pt1, label_color, thickness=2) 96 | cv2.putText(canvas, self._cur_label, (self._pt0[0] + 3, self._pt0[1] + 15), 97 | cv2.FONT_HERSHEY_SIMPLEX, 98 | 0.5, label_color, 2) 99 | return canvas 100 | 101 | @staticmethod 102 | def export_bbox(filepath, bboxes): 103 | if bboxes: 104 | with open(filepath, 'w') as f: 105 | for bbox in bboxes: 106 | line = repr(bbox) + '\n' 107 | f.write(line) 108 | elif os.path.exists(filepath): 109 | os.remove(filepath) 110 | 111 | @staticmethod 112 | def load_bbox(filepath): 113 | bboxes = [] 114 | with open(filepath, 'r') as f: 115 | line = f.readline().rstrip() 116 | while line: 117 | bboxes.append(eval(line)) 118 | line = f.readline().rstrip() 119 | return bboxes 120 | 121 | @staticmethod 122 | def load_labels(filepath): 123 | label_colors = {} 124 | with open(filepath, 'r') as f: 125 | line = f.readline().rstrip() 126 | while line: 127 | label, color = eval(line) 128 | label_colors[label] = color 129 | line = f.readline().rstrip() 130 | return label_colors 131 | 132 | @staticmethod 133 | def load_sample(filepath): 134 | img = cv2.imread(filepath) 135 | bbox_filepath = get_bbox_name(filepath) 136 | bboxes = [] 137 | if os.path.exists(bbox_filepath): 138 | bboxes = SimpleBBoxLabeling.load_bbox(bbox_filepath) 139 | return img, bboxes 140 | 141 | def _export_n_clean_bbox(self): 142 | bbox_filepath = os.sep.join([self._data_dir, get_bbox_name(self._filelist[self._index])]) 143 | self.export_bbox(bbox_filepath, self._bboxes) 144 | self._clean_bbox() 145 | 146 | def _delete_current_sample(self): 147 | filename = self._filelist[self._index] 148 | filepath = os.sep.join([self._data_dir, filename]) 149 | if os.path.exists(filepath): 150 | os.remove(filepath) 151 | filepath = get_bbox_name(filepath) 152 | if os.path.exists(filepath): 153 | os.remove(filepath) 154 | self._filelist.pop(self._index) 155 | print('{} is deleted!'.format(filename)) 156 | 157 | def start(self): 158 | 159 | last_filename = '' 160 | label_index = 0 161 | labels = self.label_colors.keys() 162 | n_labels = len(labels) 163 | 164 | cv2.namedWindow(self.window_name) 165 | cv2.setMouseCallback(self.window_name, self._mouse_ops) 166 | key = KEY_EMPTY 167 | delay = int(1000 / FPS) 168 | 169 | while key != KEY_ESC: 170 | 171 | if key == KEY_UP: 172 | if label_index == 0: 173 | pass 174 | else: 175 | label_index -= 1 176 | 177 | elif key == KEY_DOWN: 178 | if label_index == n_labels - 1: 179 | pass 180 | else: 181 | label_index += 1 182 | 183 | elif key == KEY_LEFT: 184 | if self._index > 0: 185 | self._export_n_clean_bbox() 186 | 187 | self._index -= 1 188 | if self._index < 0: 189 | self._index = 0 190 | 191 | elif key == KEY_RIGHT: 192 | if self._index < len(self._filelist) - 1: 193 | self._export_n_clean_bbox() 194 | 195 | self._index += 1 196 | if self._index > len(self._filelist) - 1: 197 | self._index = len(self._filelist) - 1 198 | 199 | elif key == KEY_DELETE: 200 | if askyesno('Delete Sample', 'Are you sure?'): 201 | self._delete_current_sample() 202 | key = KEY_EMPTY 203 | continue 204 | 205 | filename = self._filelist[self._index] 206 | if filename != last_filename: 207 | filepath = os.sep.join([self._data_dir, filename]) 208 | img, self._bboxes = self.load_sample(filepath) 209 | 210 | self._cur_label = labels[label_index] 211 | 212 | canvas = self._draw_bbox(img) 213 | cv2.imshow(self.window_name, canvas) 214 | key = cv2.waitKey(delay) 215 | 216 | last_filename = filename 217 | 218 | print('Finished!') 219 | 220 | cv2.destroyAllWindows() 221 | self.export_bbox(os.sep.join([self._data_dir, get_bbox_name(filename)]), self._bboxes) 222 | 223 | print('Labels updated!') 224 | 225 | if __name__ == '__main__': 226 | dir_with_images = askdirectory(title='Where are the images?') 227 | labeling_task = SimpleBBoxLabeling(dir_with_images) 228 | labeling_task.start() 229 | -------------------------------------------------------------------------------- /chap6/bbox_labeling/detection_anno_bbox2voc.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import xml.etree.ElementTree as ET 4 | #import xml.dom.minidom as minidom 5 | import cv2 6 | from bbox_labeling import SimpleBBoxLabeling 7 | 8 | input_dir = sys.argv[1].rstrip(os.sep) 9 | 10 | bbox_filenames = [x for x in os.listdir(input_dir) if x.endswith('.bbox')] 11 | 12 | for bbox_filename in bbox_filenames: 13 | bbox_filepath = os.sep.join([input_dir, bbox_filename]) 14 | jpg_filepath = bbox_filepath[:-5] 15 | if not os.path.exists(jpg_filepath): 16 | print('Something is wrong with {}!'.format(bbox_filepath)) 17 | break 18 | 19 | root = ET.Element('annotation') 20 | 21 | filename = ET.SubElement(root, 'filename') 22 | jpg_filename = jpg_filepath.split(os.sep)[-1] 23 | filename.text = jpg_filename 24 | 25 | img = cv2.imread(jpg_filepath) 26 | h, w, c = img.shape 27 | size = ET.SubElement(root, 'size') 28 | width = ET.SubElement(size, 'width') 29 | width.text = str(w) 30 | height = ET.SubElement(size, 'height') 31 | height.text = str(h) 32 | depth = ET.SubElement(size, 'depth') 33 | depth.text = str(c) 34 | 35 | bboxes = SimpleBBoxLabeling.load_bbox(bbox_filepath) 36 | for obj_name, coord in bboxes: 37 | obj = ET.SubElement(root, 'object') 38 | name = ET.SubElement(obj, 'name') 39 | name.text = obj_name 40 | bndbox = ET.SubElement(obj, 'bndbox') 41 | xmin = ET.SubElement(bndbox, 'xmin') 42 | xmax = ET.SubElement(bndbox, 'xmax') 43 | ymin = ET.SubElement(bndbox, 'ymin') 44 | ymax = ET.SubElement(bndbox, 'ymax') 45 | (left, top), (right, bottom) = coord 46 | xmin.text = str(left) 47 | xmax.text = str(right) 48 | ymin.text = str(top) 49 | ymax.text = str(bottom) 50 | 51 | xml_filepath = jpg_filepath[:jpg_filepath.rfind('.')] + '.xml' 52 | with open(xml_filepath, 'w') as f: 53 | anno_xmlstr = ET.tostring(root) 54 | 55 | # In case a nicely formatted xml is needed 56 | # uncomment the following 2 lines and minidom import 57 | #anno_xml = minidom.parseString(anno_xmlstr) 58 | #anno_xmlstr = anno_xml.toprettyxml() 59 | f.write(anno_xmlstr) 60 | -------------------------------------------------------------------------------- /chap6/bbox_labeling/samples.labels: -------------------------------------------------------------------------------- 1 | 'Horse', (255, 255, 0) 2 | 'Hill', (0, 255, 255) 3 | 'DiaoSi', (0, 0, 255) 4 | -------------------------------------------------------------------------------- /chap6/data_augmentation/image_augmentation.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import cv2 3 | 4 | crop_image = lambda img, x0, y0, w, h: img[y0:y0+h, x0:x0+w] 5 | 6 | def random_crop(img, area_ratio, hw_vari): 7 | h, w = img.shape[:2] 8 | hw_delta = np.random.uniform(-hw_vari, hw_vari) 9 | hw_mult = 1 + hw_delta 10 | w_crop = int(round(w*np.sqrt(area_ratio*hw_mult))) 11 | if w_crop > w - 2: 12 | w_crop = w - 2 13 | h_crop = int(round(h*np.sqrt(area_ratio/hw_mult))) 14 | if h_crop > h - 2: 15 | h_crop = h - 2 16 | x0 = np.random.randint(0, w-w_crop-1) 17 | y0 = np.random.randint(0, h-h_crop-1) 18 | return crop_image(img, x0, y0, w_crop, h_crop) 19 | 20 | def rotate_image(img, angle, crop): 21 | h, w = img.shape[:2] 22 | angle %= 360 23 | M_rotate = cv2.getRotationMatrix2D((w/2, h/2), angle, 1) 24 | img_rotated = cv2.warpAffine(img, M_rotate, (w, h)) 25 | 26 | if crop: 27 | angle_crop = angle % 180 28 | if angle_crop > 90: 29 | angle_crop = 180 - angle_crop 30 | theta = angle_crop * np.pi / 180.0 31 | hw_ratio = float(h) / float(w) 32 | tan_theta = np.tan(theta) 33 | numerator = np.cos(theta) + np.sin(theta) * tan_theta 34 | r = hw_ratio if h > w else 1 / hw_ratio 35 | denominator = r * tan_theta + 1 36 | crop_mult = numerator / denominator 37 | w_crop = int(round(crop_mult*w)) 38 | h_crop = int(round(crop_mult*h)) 39 | x0 = int((w-w_crop)/2) 40 | y0 = int((h-h_crop)/2) 41 | 42 | img_rotated = crop_image(img_rotated, x0, y0, w_crop, h_crop) 43 | 44 | return img_rotated 45 | 46 | def random_rotate(img, angle_vari, p_crop): 47 | angle = np.random.uniform(-angle_vari, angle_vari) 48 | crop = False if np.random.random() > p_crop else True 49 | return rotate_image(img, angle, crop) 50 | 51 | def hsv_transform(img, hue_delta, sat_mult, val_mult): 52 | img_hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV).astype(np.float) 53 | img_hsv[:, :, 0] = (img_hsv[:, :, 0] + hue_delta) % 180 54 | img_hsv[:, :, 1] *= sat_mult 55 | img_hsv[:, :, 2] *= val_mult 56 | img_hsv[img_hsv > 255] = 255 57 | return cv2.cvtColor(np.round(img_hsv).astype(np.uint8), cv2.COLOR_HSV2BGR) 58 | 59 | def random_hsv_transform(img, hue_vari, sat_vari, val_vari): 60 | hue_delta = np.random.randint(-hue_vari, hue_vari) 61 | sat_mult = 1 + np.random.uniform(-sat_vari, sat_vari) 62 | val_mult = 1 + np.random.uniform(-val_vari, val_vari) 63 | return hsv_transform(img, hue_delta, sat_mult, val_mult) 64 | 65 | def gamma_transform(img, gamma): 66 | gamma_table = [np.power(x / 255.0, gamma) * 255.0 for x in range(256)] 67 | gamma_table = np.round(np.array(gamma_table)).astype(np.uint8) 68 | return cv2.LUT(img, gamma_table) 69 | 70 | def random_gamma_transform(img, gamma_vari): 71 | log_gamma_vari = np.log(gamma_vari) 72 | alpha = np.random.uniform(-log_gamma_vari, log_gamma_vari) 73 | gamma = np.exp(alpha) 74 | return gamma_transform(img, gamma) 75 | 76 | 77 | -------------------------------------------------------------------------------- /chap6/data_augmentation/run_augmentation.py: -------------------------------------------------------------------------------- 1 | import os 2 | import argparse 3 | import random 4 | import math 5 | from multiprocessing import Process, cpu_count 6 | 7 | import cv2 8 | 9 | import image_augmentation as ia 10 | 11 | def parse_args(): 12 | parser = argparse.ArgumentParser( 13 | description='A Simple Image Data Augmentation Tool', 14 | formatter_class=argparse.ArgumentDefaultsHelpFormatter) 15 | 16 | parser.add_argument('input_dir', 17 | help='Directory containing images') 18 | parser.add_argument('output_dir', 19 | help='Directory for augmented images') 20 | parser.add_argument('num', 21 | help='Number of images to be augmented', 22 | type=int) 23 | 24 | parser.add_argument('--num_procs', 25 | help='Number of processes for paralleled augmentation', 26 | type=int, default=cpu_count()) 27 | 28 | parser.add_argument('--p_mirror', 29 | help='Ratio to mirror an image', 30 | type=float, default=0.5) 31 | 32 | parser.add_argument('--p_crop', 33 | help='Ratio to randomly crop an image', 34 | type=float, default=1.0) 35 | parser.add_argument('--crop_size', 36 | help='The ratio of cropped image size to original image size, in area', 37 | type=float, default=0.8) 38 | parser.add_argument('--crop_hw_vari', 39 | help='Variation of h/w ratio', 40 | type=float, default=0.1) 41 | 42 | parser.add_argument('--p_rotate', 43 | help='Ratio to randomly rotate an image', 44 | type=float, default=1.0) 45 | parser.add_argument('--p_rotate_crop', 46 | help='Ratio to crop out the empty part in a rotated image', 47 | type=float, default=1.0) 48 | parser.add_argument('--rotate_angle_vari', 49 | help='Variation range of rotate angle', 50 | type=float, default=10.0) 51 | 52 | parser.add_argument('--p_hsv', 53 | help='Ratio to randomly change gamma of an image', 54 | type=float, default=1.0) 55 | parser.add_argument('--hue_vari', 56 | help='Variation of hue', 57 | type=int, default=10) 58 | parser.add_argument('--sat_vari', 59 | help='Variation of saturation', 60 | type=float, default=0.1) 61 | parser.add_argument('--val_vari', 62 | help='Variation of value', 63 | type=float, default=0.1) 64 | 65 | parser.add_argument('--p_gamma', 66 | help='Ratio to randomly change gamma of an image', 67 | type=float, default=1.0) 68 | parser.add_argument('--gamma_vari', 69 | help='Variation of gamma', 70 | type=float, default=2.0) 71 | 72 | args = parser.parse_args() 73 | args.input_dir = args.input_dir.rstrip('/') 74 | args.output_dir = args.output_dir.rstrip('/') 75 | 76 | return args 77 | 78 | def generate_image_list(args): 79 | filenames = os.listdir(args.input_dir) 80 | num_imgs = len(filenames) 81 | 82 | num_ave_aug = int(math.floor(args.num/num_imgs)) 83 | rem = args.num - num_ave_aug*num_imgs 84 | lucky_seq = [True]*rem + [False]*(num_imgs-rem) 85 | random.shuffle(lucky_seq) 86 | 87 | img_list = [ 88 | (os.sep.join([args.input_dir, filename]), num_ave_aug+1 if lucky else num_ave_aug) 89 | for filename, lucky in zip(filenames, lucky_seq) 90 | ] 91 | 92 | random.shuffle(img_list) # in case the file size are not uniformly distributed 93 | 94 | length = float(num_imgs) / float(args.num_procs) 95 | indices = [int(round(i * length)) for i in range(args.num_procs + 1)] 96 | return [img_list[indices[i]:indices[i + 1]] for i in range(args.num_procs)] 97 | 98 | def augment_images(filelist, args): 99 | for filepath, n in filelist: 100 | img = cv2.imread(filepath) 101 | filename = filepath.split(os.sep)[-1] 102 | dot_pos = filename.rfind('.') 103 | imgname = filename[:dot_pos] 104 | ext = filename[dot_pos:] 105 | 106 | print('Augmenting {} ...'.format(filename)) 107 | for i in range(n): 108 | img_varied = img.copy() 109 | varied_imgname = '{}_{:0>3d}_'.format(imgname, i) 110 | if random.random() < args.p_mirror: 111 | img_varied = cv2.flip(img_varied, 1) 112 | varied_imgname += 'm' 113 | if random.random() < args.p_crop: 114 | img_varied = ia.random_crop( 115 | img_varied, 116 | args.crop_size, 117 | args.crop_hw_vari) 118 | varied_imgname += 'c' 119 | if random.random() < args.p_rotate: 120 | img_varied = ia.random_rotate( 121 | img_varied, 122 | args.rotate_angle_vari, 123 | args.p_rotate_crop) 124 | varied_imgname += 'r' 125 | if random.random() < args.p_hsv: 126 | img_varied = ia.random_hsv_transform( 127 | img_varied, 128 | args.hue_vari, 129 | args.sat_vari, 130 | args.val_vari) 131 | varied_imgname += 'h' 132 | if random.random() < args.p_gamma: 133 | img_varied = ia.random_gamma_transform( 134 | img_varied, 135 | args.gamma_vari) 136 | varied_imgname += 'g' 137 | output_filepath = os.sep.join([ 138 | args.output_dir, 139 | '{}{}'.format(varied_imgname, ext)]) 140 | cv2.imwrite(output_filepath, img_varied) 141 | 142 | def main(): 143 | args = parse_args() 144 | params_str = str(args)[10:-1] 145 | 146 | if not os.path.exists(args.output_dir): 147 | os.mkdir(args.output_dir) 148 | 149 | print('Starting image data augmentation for {}\n' 150 | 'with\n{}\n'.format(args.input_dir, params_str)) 151 | 152 | sublists = generate_image_list(args) 153 | processes = [Process(target=augment_images, args=(x, args, )) for x in sublists] 154 | 155 | for p in processes: 156 | p.start() 157 | 158 | for p in processes: 159 | p.join() 160 | 161 | print('\nDone!') 162 | 163 | if __name__ == '__main__': 164 | main() 165 | -------------------------------------------------------------------------------- /chap6/data_augmentation/run_augmentation_pool_map.py: -------------------------------------------------------------------------------- 1 | import os 2 | import argparse 3 | import random 4 | import math 5 | from multiprocessing import cpu_count, Pool 6 | from functools import partial 7 | 8 | import cv2 9 | 10 | import image_augmentation as ia 11 | 12 | def parse_args(): 13 | parser = argparse.ArgumentParser( 14 | description='A Simple Image Data Augmentation Tool', 15 | formatter_class=argparse.ArgumentDefaultsHelpFormatter) 16 | 17 | parser.add_argument('input_dir', 18 | help='Directory containing images') 19 | parser.add_argument('output_dir', 20 | help='Directory for augmented images') 21 | parser.add_argument('num', 22 | help='Number of images to be augmented', 23 | type=int) 24 | 25 | parser.add_argument('--num_procs', 26 | help='Number of processes for paralleled augmentation', 27 | type=int, default=cpu_count()) 28 | 29 | parser.add_argument('--p_mirror', 30 | help='Ratio to mirror an image', 31 | type=float, default=0.5) 32 | 33 | parser.add_argument('--p_crop', 34 | help='Ratio to randomly crop an image', 35 | type=float, default=1.0) 36 | parser.add_argument('--crop_size', 37 | help='The ratio of cropped image size to original image size, in area', 38 | type=float, default=0.8) 39 | parser.add_argument('--crop_hw_vari', 40 | help='Variation of h/w ratio', 41 | type=float, default=0.1) 42 | 43 | parser.add_argument('--p_rotate', 44 | help='Ratio to randomly rotate an image', 45 | type=float, default=1.0) 46 | parser.add_argument('--p_rotate_crop', 47 | help='Ratio to crop out the empty part in a rotated image', 48 | type=float, default=1.0) 49 | parser.add_argument('--rotate_angle_vari', 50 | help='Variation range of rotate angle', 51 | type=float, default=10.0) 52 | 53 | parser.add_argument('--p_hsv', 54 | help='Ratio to randomly change gamma of an image', 55 | type=float, default=1.0) 56 | parser.add_argument('--hue_vari', 57 | help='Variation of hue', 58 | type=int, default=10) 59 | parser.add_argument('--sat_vari', 60 | help='Variation of saturation', 61 | type=float, default=0.1) 62 | parser.add_argument('--val_vari', 63 | help='Variation of value', 64 | type=float, default=0.1) 65 | 66 | parser.add_argument('--p_gamma', 67 | help='Ratio to randomly change gamma of an image', 68 | type=float, default=1.0) 69 | parser.add_argument('--gamma_vari', 70 | help='Variation of gamma', 71 | type=float, default=2.0) 72 | 73 | args = parser.parse_args() 74 | args.input_dir = args.input_dir.rstrip('/') 75 | args.output_dir = args.output_dir.rstrip('/') 76 | 77 | return args 78 | 79 | def generate_image_list(args): 80 | filenames = os.listdir(args.input_dir) 81 | num_imgs = len(filenames) 82 | 83 | num_ave_aug = int(math.floor(args.num/num_imgs)) 84 | rem = args.num - num_ave_aug*num_imgs 85 | lucky_seq = [True]*rem + [False]*(num_imgs-rem) 86 | random.shuffle(lucky_seq) 87 | 88 | img_list = [ 89 | (os.sep.join([args.input_dir, filename]), num_ave_aug+1 if lucky else num_ave_aug) 90 | for filename, lucky in zip(filenames, lucky_seq) 91 | ] 92 | 93 | random.shuffle(img_list) # in case the file size are not uniformly distributed 94 | return img_list 95 | 96 | def augment_image(image_num_pair, args): 97 | filepath, n = image_num_pair 98 | img = cv2.imread(filepath) 99 | filename = filepath.split(os.sep)[-1] 100 | dot_pos = filename.rfind('.') 101 | imgname = filename[:dot_pos] 102 | ext = filename[dot_pos:] 103 | 104 | print('Augmenting {} ...'.format(filename)) 105 | for i in range(n): 106 | img_varied = img.copy() 107 | varied_imgname = '{}_{:0>3d}_'.format(imgname, i) 108 | if random.random() < args.p_mirror: 109 | img_varied = cv2.flip(img_varied, 1) 110 | varied_imgname += 'm' 111 | if random.random() < args.p_crop: 112 | img_varied = ia.random_crop( 113 | img_varied, 114 | args.crop_size, 115 | args.crop_hw_vari) 116 | varied_imgname += 'c' 117 | if random.random() < args.p_rotate: 118 | img_varied = ia.random_rotate( 119 | img_varied, 120 | args.rotate_angle_vari, 121 | args.p_rotate_crop) 122 | varied_imgname += 'r' 123 | if random.random() < args.p_hsv: 124 | img_varied = ia.random_hsv_transform( 125 | img_varied, 126 | args.hue_vari, 127 | args.sat_vari, 128 | args.val_vari) 129 | varied_imgname += 'h' 130 | if random.random() < args.p_gamma: 131 | img_varied = ia.random_gamma_transform( 132 | img_varied, 133 | args.gamma_vari) 134 | varied_imgname += 'g' 135 | output_filepath = os.sep.join([ 136 | args.output_dir, 137 | '{}{}'.format(varied_imgname, ext)]) 138 | cv2.imwrite(output_filepath, img_varied) 139 | 140 | def main(): 141 | args = parse_args() 142 | params_str = str(args)[10:-1] 143 | 144 | if not os.path.exists(args.output_dir): 145 | os.mkdir(args.output_dir) 146 | 147 | print('Starting image data augmentation for {}\n' 148 | 'with\n{}\n'.format(args.input_dir, params_str)) 149 | 150 | image_list = generate_image_list(args) 151 | aug_img = partial(augment_image, args=args) 152 | pool = Pool(args.num_procs) 153 | pool.map(aug_img, image_list) 154 | 155 | print('\nDone!') 156 | 157 | if __name__ == '__main__': 158 | main() 159 | -------------------------------------------------------------------------------- /chap7/README.md: -------------------------------------------------------------------------------- 1 | ## Prepare Data 2 | 运行 *gen_data.py* 产生随机数据并用pickle导出为文件 3 | 4 | ## MXNet 5 | 在mxnet文件夹下运行 *simple_mlp.py* 训练模型并进行结果可视化 6 | 7 | ## Caffe 8 | ### step 1 9 | 在caffe文件夹下运行 *gen_hdf5.py* 将数据转换为HDF5格式 10 | ### step 2 11 | 运行 *simple_mlp_train.py* 训练模型 12 | ### step 3 13 | 运行 *simple_mlp_test.py* 测试模型及可视化 -------------------------------------------------------------------------------- /chap7/caffe/gen_hdf5.py: -------------------------------------------------------------------------------- 1 | import pickle 2 | import numpy as np 3 | import h5py 4 | 5 | with open('../data.pkl', 'rb') as f: 6 | samples, labels = pickle.load(f) 7 | sample_size = len(labels) 8 | 9 | samples = np.array(samples).reshape((sample_size, 2)) 10 | labels = np.array(labels).reshape((sample_size, 1)) 11 | 12 | h5_filename = 'data.h5' 13 | with h5py.File(h5_filename, 'w') as h: 14 | h.create_dataset('data', data=samples) 15 | h.create_dataset('label', data=labels) 16 | 17 | with open('data_h5.txt', 'w') as f: 18 | f.write(h5_filename) 19 | -------------------------------------------------------------------------------- /chap7/caffe/simple_mlp_test.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import pickle 3 | import numpy as np 4 | import matplotlib.pyplot as plt 5 | from mpl_toolkits.mplot3d import Axes3D 6 | sys.path.append('/path/to/caffe/python') 7 | import caffe 8 | 9 | net = caffe.Net('test.prototxt', 'simple_mlp_iter_2000.caffemodel', caffe.TEST) 10 | 11 | # load original data 12 | with open('../data.pkl', 'rb') as f: 13 | samples, labels = pickle.load(f) 14 | samples = np.array(samples) 15 | labels = np.array(labels) 16 | 17 | # Visualize result 18 | X = np.arange(0, 1.05, 0.05) 19 | Y = np.arange(0, 1.05, 0.05) 20 | X, Y = np.meshgrid(X, Y) 21 | 22 | # Plot the surface of probability 23 | grids = np.array([[X[i][j], Y[i][j]] for i in range(X.shape[0]) for j in range(X.shape[1])]) 24 | grid_probs = [] 25 | for grid in grids: 26 | net.blobs['data'].data[...] = grid.reshape((1, 2))[...] 27 | output = net.forward() 28 | grid_probs.append(output['prob'][0][1]) 29 | 30 | grid_probs = np.array(grid_probs).reshape(X.shape) 31 | 32 | fig = plt.figure('Sample Surface') 33 | ax = fig.gca(projection='3d') 34 | 35 | ax.plot_surface(X, Y, grid_probs, alpha=0.15, color='k', rstride=2, cstride=2, lw=0.5) 36 | 37 | # Plot the predicted probability of samples 38 | samples0 = samples[labels==0] 39 | samples0_probs = [] 40 | for sample in samples0: 41 | net.blobs['data'].data[...] = sample.reshape((1, 2))[...] 42 | output = net.forward() 43 | samples0_probs.append(output['prob'][0][1]) 44 | 45 | samples1 = samples[labels==1] 46 | samples1_probs = [] 47 | for sample in samples1: 48 | net.blobs['data'].data[...] = sample.reshape((1, 2))[...] 49 | output = net.forward() 50 | samples1_probs.append(output['prob'][0][1]) 51 | 52 | ax.scatter(samples0[:, 0], samples0[:, 1], samples0_probs, c='b', marker='^', s=50) 53 | ax.scatter(samples1[:, 0], samples1[:, 1], samples1_probs, c='r', marker='o', s=50) 54 | 55 | plt.show() 56 | -------------------------------------------------------------------------------- /chap7/caffe/simple_mlp_train.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import numpy as np 3 | 4 | sys.path.append('/path/to/caffe/python') 5 | import caffe 6 | 7 | solver = caffe.SGDSolver('solver.prototxt') 8 | solver.solve() 9 | 10 | net = solver.net 11 | net.blobs['data'] = np.array([[0.5, 0.5]]) 12 | output = net.forward() 13 | print(output) 14 | -------------------------------------------------------------------------------- /chap7/caffe/solver.prototxt: -------------------------------------------------------------------------------- 1 | net: "train.prototxt" 2 | base_lr: 0.15 3 | lr_policy: "fixed" 4 | display: 100 5 | max_iter: 2000 6 | momentum: 0.95 7 | snapshot_prefix: "simple_mlp" 8 | solver_mode: CPU -------------------------------------------------------------------------------- /chap7/caffe/test.prototxt: -------------------------------------------------------------------------------- 1 | name: "SimpleMLP" 2 | input: "data" 3 | input_shape { 4 | dim: 1 5 | dim: 2 6 | } 7 | layer { 8 | name: "fc1" 9 | type: "InnerProduct" 10 | bottom: "data" 11 | top: "fc1" 12 | inner_product_param { 13 | num_output: 2 14 | } 15 | } 16 | layer { 17 | name: "sigmoid1" 18 | type: "Sigmoid" 19 | bottom: "fc1" 20 | top: "sigmoid1" 21 | } 22 | layer { 23 | name: "fc2" 24 | type: "InnerProduct" 25 | bottom: "sigmoid1" 26 | top: "fc2" 27 | inner_product_param { 28 | num_output: 2 29 | } 30 | } 31 | layer { 32 | name: "softmax" 33 | type: "Softmax" 34 | bottom: "fc2" 35 | top: "prob" 36 | } 37 | -------------------------------------------------------------------------------- /chap7/caffe/train.prototxt: -------------------------------------------------------------------------------- 1 | name: "SimpleMLP" 2 | layer { 3 | name: "data" 4 | type: "HDF5Data" 5 | top: "data" 6 | top: "label" 7 | include { 8 | phase: TRAIN 9 | } 10 | hdf5_data_param { 11 | source: "data_h5.txt" 12 | batch_size: 41 13 | } 14 | } 15 | layer { 16 | name: "fc1" 17 | type: "InnerProduct" 18 | bottom: "data" 19 | top: "fc1" 20 | inner_product_param { 21 | num_output: 2 22 | weight_filler { 23 | type: "uniform" 24 | } 25 | } 26 | } 27 | layer { 28 | name: "sigmoid1" 29 | type: "Sigmoid" 30 | bottom: "fc1" 31 | top: "sigmoid1" 32 | } 33 | layer { 34 | name: "fc2" 35 | type: "InnerProduct" 36 | bottom: "sigmoid1" 37 | top: "fc2" 38 | inner_product_param { 39 | num_output: 2 40 | weight_filler { 41 | type: "uniform" 42 | } 43 | } 44 | } 45 | layer { 46 | name: "loss" 47 | type: "SoftmaxWithLoss" 48 | bottom: "fc2" 49 | bottom: "label" 50 | top: "loss" 51 | } 52 | -------------------------------------------------------------------------------- /chap7/gen_data.py: -------------------------------------------------------------------------------- 1 | import pickle 2 | import numpy as np 3 | import matplotlib.pyplot as plt 4 | 5 | 6 | def cos_curve(x): 7 | return 0.25*np.sin(2*x*np.pi+0.5*np.pi) + 0.5 8 | 9 | np.random.seed(123) 10 | samples = [] 11 | labels = [] 12 | 13 | sample_density = 50 14 | for i in range(sample_density): 15 | x1, x2 = np.random.random(2) 16 | bound = cos_curve(x1) 17 | if bound - 0.1 < x2 <= bound + 0.1: 18 | continue 19 | else: 20 | samples.append((x1, x2)) 21 | if x2 > bound: 22 | labels.append(1) 23 | else: 24 | labels.append(0) 25 | 26 | with open('data.pkl', 'wb') as f: 27 | pickle.dump((samples, labels), f) 28 | 29 | for i, sample in enumerate(samples): 30 | plt.plot(sample[0], sample[1], 31 | 'o' if labels[i] else '^', 32 | mec='r' if labels[i] else 'b', 33 | mfc='none', 34 | markersize=10) 35 | 36 | x1 = np.linspace(0, 1) 37 | plt.plot(x1, cos_curve(x1), 'k--') 38 | plt.show() 39 | -------------------------------------------------------------------------------- /chap7/mxnet/simple_mlp.py: -------------------------------------------------------------------------------- 1 | import pickle 2 | import logging 3 | import numpy as np 4 | import mxnet as mx 5 | import matplotlib.pyplot as plt 6 | from mpl_toolkits.mplot3d import Axes3D 7 | 8 | # Define the network 9 | data = mx.sym.Variable('data') 10 | fc1 = mx.sym.FullyConnected(data=data, name='fc1', num_hidden=2) 11 | sigmoid1 = mx.sym.Activation(data=fc1, name='sigmoid1', act_type='sigmoid') 12 | fc2 = mx.sym.FullyConnected(data=sigmoid1, name='fc2', num_hidden=2) 13 | mlp = mx.sym.SoftmaxOutput(data=fc2, name='softmax') 14 | 15 | shape = {'data': (2,)} 16 | mlp_dot = mx.viz.plot_network(symbol=mlp, shape=shape) 17 | mlp_dot.render('simple_mlp.gv', view=True) 18 | 19 | # Load data & train the model 20 | with open('../data.pkl', 'rb') as f: 21 | samples, labels = pickle.load(f) 22 | 23 | logging.getLogger().setLevel(logging.DEBUG) 24 | 25 | batch_size = len(labels) 26 | samples = np.array(samples) 27 | labels = np.array(labels) 28 | 29 | train_iter = mx.io.NDArrayIter(samples, labels, batch_size) 30 | 31 | model = mx.model.FeedForward.create( 32 | symbol=mlp, 33 | X=train_iter, 34 | num_epoch=1000, 35 | learning_rate=0.1, 36 | momentum=0.99) 37 | 38 | ''' 39 | # Alternative interface to train the model 40 | model = mx.model.FeedForward( 41 | symbol=mlp, 42 | num_epoch=1000, 43 | learning_rate=0.1, 44 | momentum=0.99) 45 | model.fit(X=train_iter) 46 | ''' 47 | 48 | print(model.predict(mx.nd.array([[0.5, 0.5]]))) 49 | 50 | # Visualize result 51 | X = np.arange(0, 1.05, 0.05) 52 | Y = np.arange(0, 1.05, 0.05) 53 | X, Y = np.meshgrid(X, Y) 54 | 55 | grids = mx.nd.array([[X[i][j], Y[i][j]] for i in range(X.shape[0]) for j in range(X.shape[1])]) 56 | grid_probs = model.predict(grids)[:, 1].reshape(X.shape) 57 | 58 | fig = plt.figure('Sample Surface') 59 | ax = fig.gca(projection='3d') 60 | 61 | ax.plot_surface(X, Y, grid_probs, alpha=0.15, color='k', rstride=2, cstride=2, lw=0.5) 62 | samples0 = samples[labels==0] 63 | samples0_probs = model.predict(samples0)[:, 1] 64 | samples1 = samples[labels==1] 65 | samples1_probs = model.predict(samples1)[:, 1] 66 | 67 | ax.scatter(samples0[:, 0], samples0[:, 1], samples0_probs, c='b', marker='^', s=50) 68 | ax.scatter(samples1[:, 0], samples1[:, 1], samples1_probs, c='r', marker='o', s=50) 69 | 70 | plt.show() 71 | -------------------------------------------------------------------------------- /chap8/README.md: -------------------------------------------------------------------------------- 1 | ## Prepare Data 2 | ### step 1 3 | > ./download_mnist.sh 4 | 5 | 下载mnist.pkl.gz,然后运行 *convert_mnist.py* 将pickle格式的数据转换为图片 6 | 7 | 如果原链接无法下载可以到下面云盘地址下载: 8 | http://pan.baidu.com/s/1bHmm7s 9 | 10 | ### step 2 11 | > python gen_caffe_imglist.py mnist/train train.txt 12 | 13 | > python gen_caffe_imglist.py mnist/val val.txt 14 | 15 | > python gen_caffe_imglist.py mnist/test test.txt 16 | 17 | 得到图片的列表,然后运行: 18 | > /path/to/caffe/built/tools/convert_imageset ./ train.txt train_lmdb --gray --shuffle 19 | 20 | > /path/to/caffe/built/tools/convert_imageset ./ val.txt val_lmdb --gray --shuffle 21 | 22 | > /path/to/caffe/built/tools/convert_imageset ./ test.txt test_lmdb --gray --shuffle 23 | 24 | 25 | 产生lmdb 26 | 27 | ### step 3 28 | > python gen_mxnet_imglist.py mnist/train train.lst 29 | 30 | > python gen_mxnet_imglist.py mnist/val val.lst 31 | 32 | > python gen_mxnet_imglist.py mnist/test test.lst 33 | 34 | 用于产生图片的列表,然后运行 35 | 36 | > /path/to/mxnet/bin/im2rec train.lst ./ train.rec color=0 37 | 38 | > /path/to/mxnet/bin/im2rec val.lst ./ val.rec color=0 39 | 40 | > /path/to/mxnet/bin/im2rec test.lst ./ test.rec color=0 41 | 42 | 产生ImageRecordio文件 43 | 44 | 45 | ## MXNet 46 | 47 | 运行 *train_lenet5.py* 训练模型 48 | 49 | 运行 *score_model.py* 在测试集上评估模型 50 | 51 | 运行 *benchmark_model.py* 测试模型性能 52 | 53 | 运行 *recognize_digit.py* 跟图片路径作为参数用于手写数字图片识别 54 | 55 | ## Caffe 56 | *lenet_train_val.prototxt* & *lenet_train_val_aug.prototxt* 分别是用原始数据和增加后数据训练模型的网络结构和数据定义文件 57 | 58 | *lenet_solver.prototxt* & *lenet_solver_aug.prototxt* 分别是训练原始数据和增加后数据的solver文件 59 | 60 | *lenet_test.prototxt* 是用于在测试数据上测试模型的网络结构和数据源定义文件 61 | 62 | *lenet.prototxt* 是用于部署的网络结构定义文件 63 | 64 | 运行 *recognize_digit.py* 接测试文件的列表用来演示手写数字图片识别 -------------------------------------------------------------------------------- /chap8/caffe/lenet.prototxt: -------------------------------------------------------------------------------- 1 | name: "LeNet" 2 | layer { 3 | name: "data" 4 | type: "Input" 5 | top: "data" 6 | input_param { 7 | shape: { 8 | dim: 64 9 | dim: 1 10 | dim: 28 11 | dim: 28 12 | } 13 | } 14 | } 15 | layer { 16 | name: "conv1" 17 | type: "Convolution" 18 | bottom: "data" 19 | top: "conv1" 20 | param { 21 | lr_mult: 1 22 | } 23 | param { 24 | lr_mult: 2 25 | } 26 | convolution_param { 27 | num_output: 20 28 | kernel_size: 5 29 | stride: 1 30 | weight_filler { 31 | type: "xavier" 32 | } 33 | bias_filler { 34 | type: "constant" 35 | } 36 | } 37 | } 38 | layer { 39 | name: "pool1" 40 | type: "Pooling" 41 | bottom: "conv1" 42 | top: "pool1" 43 | pooling_param { 44 | pool: MAX 45 | kernel_size: 2 46 | stride: 2 47 | } 48 | } 49 | layer { 50 | name: "conv2" 51 | type: "Convolution" 52 | bottom: "pool1" 53 | top: "conv2" 54 | param { 55 | lr_mult: 1 56 | } 57 | param { 58 | lr_mult: 2 59 | } 60 | convolution_param { 61 | num_output: 50 62 | kernel_size: 5 63 | stride: 1 64 | weight_filler { 65 | type: "xavier" 66 | } 67 | bias_filler { 68 | type: "constant" 69 | } 70 | } 71 | } 72 | layer { 73 | name: "pool2" 74 | type: "Pooling" 75 | bottom: "conv2" 76 | top: "pool2" 77 | pooling_param { 78 | pool: MAX 79 | kernel_size: 2 80 | stride: 2 81 | } 82 | } 83 | layer { 84 | name: "ip1" 85 | type: "InnerProduct" 86 | bottom: "pool2" 87 | top: "ip1" 88 | param { 89 | lr_mult: 1 90 | } 91 | param { 92 | lr_mult: 2 93 | } 94 | inner_product_param { 95 | num_output: 500 96 | weight_filler { 97 | type: "xavier" 98 | } 99 | bias_filler { 100 | type: "constant" 101 | } 102 | } 103 | } 104 | layer { 105 | name: "relu1" 106 | type: "ReLU" 107 | bottom: "ip1" 108 | top: "ip1" 109 | } 110 | layer { 111 | name: "ip2" 112 | type: "InnerProduct" 113 | bottom: "ip1" 114 | top: "ip2" 115 | param { 116 | lr_mult: 1 117 | } 118 | param { 119 | lr_mult: 2 120 | } 121 | inner_product_param { 122 | num_output: 10 123 | weight_filler { 124 | type: "xavier" 125 | } 126 | bias_filler { 127 | type: "constant" 128 | } 129 | } 130 | } 131 | layer { 132 | name: "prob" 133 | type: "Softmax" 134 | bottom: "ip2" 135 | top: "prob" 136 | } 137 | 138 | -------------------------------------------------------------------------------- /chap8/caffe/lenet_solver.prototxt: -------------------------------------------------------------------------------- 1 | # The train/validate net protocol buffer definition 2 | net: "lenet_train_val.prototxt" 3 | # test_iter specifies how many forward passes the test should carry out. 4 | # In the case of MNIST, we have test batch size 100 and 100 test iterations, 5 | # covering the full 10,000 testing images. 6 | test_iter: 100 7 | # Carry out testing every 500 training iterations. 8 | test_interval: 500 9 | # The base learning rate, momentum and the weight decay of the network. 10 | base_lr: 0.01 11 | momentum: 0.9 12 | weight_decay: 0.0005 13 | # The learning rate policy 14 | lr_policy: "inv" 15 | gamma: 0.0001 16 | power: 0.75 17 | # Display every 100 iterations 18 | display: 100 19 | # The maximum number of iterations 20 | max_iter: 36000 21 | # snapshot intermediate results 22 | snapshot: 5000 23 | snapshot_prefix: "mnist_lenet" 24 | # solver mode: CPU or GPU 25 | solver_mode: GPU 26 | -------------------------------------------------------------------------------- /chap8/caffe/lenet_solver_aug.prototxt: -------------------------------------------------------------------------------- 1 | # The train/validate net protocol buffer definition 2 | net: "lenet_train_val_aug.prototxt" 3 | # test_iter specifies how many forward passes the test should carry out. 4 | # In the case of MNIST, we have test batch size 100 and 100 test iterations, 5 | # covering the full 10,000 testing images. 6 | test_iter: 100 7 | # Carry out testing every 500 training iterations. 8 | test_interval: 500 9 | # The base learning rate, momentum and the weight decay of the network. 10 | base_lr: 0.01 11 | momentum: 0.9 12 | weight_decay: 0.0005 13 | # The learning rate policy 14 | lr_policy: "inv" 15 | gamma: 0.0001 16 | power: 0.75 17 | # Display every 100 iterations 18 | display: 100 19 | # The maximum number of iterations 20 | max_iter: 36000 21 | #max_iter: 120000 22 | # snapshot intermediate results 23 | snapshot: 5000 24 | snapshot_prefix: "mnist_aug_lenet" 25 | # solver mode: CPU or GPU 26 | solver_mode: GPU 27 | -------------------------------------------------------------------------------- /chap8/caffe/lenet_test.prototxt: -------------------------------------------------------------------------------- 1 | name: "LeNet Test" 2 | layer { 3 | name: "mnist" 4 | type: "Data" 5 | top: "data" 6 | top: "label" 7 | include { 8 | phase: TEST 9 | } 10 | transform_param { 11 | mean_value: 128 12 | scale: 0.00390625 13 | } 14 | data_param { 15 | source: "../data/test_lmdb" 16 | batch_size: 100 17 | backend: LMDB 18 | } 19 | } 20 | layer { 21 | name: "conv1" 22 | type: "Convolution" 23 | bottom: "data" 24 | top: "conv1" 25 | param { 26 | lr_mult: 1 27 | } 28 | param { 29 | lr_mult: 2 30 | } 31 | convolution_param { 32 | num_output: 20 33 | kernel_size: 5 34 | stride: 1 35 | weight_filler { 36 | type: "xavier" 37 | } 38 | bias_filler { 39 | type: "constant" 40 | } 41 | } 42 | } 43 | layer { 44 | name: "pool1" 45 | type: "Pooling" 46 | bottom: "conv1" 47 | top: "pool1" 48 | pooling_param { 49 | pool: MAX 50 | kernel_size: 2 51 | stride: 2 52 | } 53 | } 54 | layer { 55 | name: "conv2" 56 | type: "Convolution" 57 | bottom: "pool1" 58 | top: "conv2" 59 | param { 60 | lr_mult: 1 61 | } 62 | param { 63 | lr_mult: 2 64 | } 65 | convolution_param { 66 | num_output: 50 67 | kernel_size: 5 68 | stride: 1 69 | weight_filler { 70 | type: "xavier" 71 | } 72 | bias_filler { 73 | type: "constant" 74 | } 75 | } 76 | } 77 | layer { 78 | name: "pool2" 79 | type: "Pooling" 80 | bottom: "conv2" 81 | top: "pool2" 82 | pooling_param { 83 | pool: MAX 84 | kernel_size: 2 85 | stride: 2 86 | } 87 | } 88 | layer { 89 | name: "ip1" 90 | type: "InnerProduct" 91 | bottom: "pool2" 92 | top: "ip1" 93 | param { 94 | lr_mult: 1 95 | } 96 | param { 97 | lr_mult: 2 98 | } 99 | inner_product_param { 100 | num_output: 500 101 | weight_filler { 102 | type: "xavier" 103 | } 104 | bias_filler { 105 | type: "constant" 106 | } 107 | } 108 | } 109 | layer { 110 | name: "relu1" 111 | type: "ReLU" 112 | bottom: "ip1" 113 | top: "ip1" 114 | } 115 | layer { 116 | name: "ip2" 117 | type: "InnerProduct" 118 | bottom: "ip1" 119 | top: "ip2" 120 | param { 121 | lr_mult: 1 122 | } 123 | param { 124 | lr_mult: 2 125 | } 126 | inner_product_param { 127 | num_output: 10 128 | weight_filler { 129 | type: "xavier" 130 | } 131 | bias_filler { 132 | type: "constant" 133 | } 134 | } 135 | } 136 | layer { 137 | name: "accuracy" 138 | type: "Accuracy" 139 | bottom: "ip2" 140 | bottom: "label" 141 | top: "accuracy" 142 | include { 143 | phase: TEST 144 | } 145 | } 146 | -------------------------------------------------------------------------------- /chap8/caffe/lenet_train_val.prototxt: -------------------------------------------------------------------------------- 1 | name: "LeNet" 2 | layer { 3 | name: "mnist" 4 | type: "Data" 5 | top: "data" 6 | top: "label" 7 | include { 8 | phase: TRAIN 9 | } 10 | transform_param { 11 | mean_value: 128 12 | scale: 0.00390625 13 | } 14 | data_param { 15 | source: "../data/train_lmdb" 16 | batch_size: 50 17 | backend: LMDB 18 | } 19 | } 20 | layer { 21 | name: "mnist" 22 | type: "Data" 23 | top: "data" 24 | top: "label" 25 | include { 26 | phase: TEST 27 | } 28 | transform_param { 29 | mean_value: 128 30 | scale: 0.00390625 31 | } 32 | data_param { 33 | source: "../data/val_lmdb" 34 | batch_size: 100 35 | backend: LMDB 36 | } 37 | } 38 | layer { 39 | name: "conv1" 40 | type: "Convolution" 41 | bottom: "data" 42 | top: "conv1" 43 | param { 44 | lr_mult: 1 45 | } 46 | param { 47 | lr_mult: 2 48 | } 49 | convolution_param { 50 | num_output: 20 51 | kernel_size: 5 52 | stride: 1 53 | weight_filler { 54 | type: "xavier" 55 | } 56 | bias_filler { 57 | type: "constant" 58 | } 59 | } 60 | } 61 | layer { 62 | name: "pool1" 63 | type: "Pooling" 64 | bottom: "conv1" 65 | top: "pool1" 66 | pooling_param { 67 | pool: MAX 68 | kernel_size: 2 69 | stride: 2 70 | } 71 | } 72 | layer { 73 | name: "conv2" 74 | type: "Convolution" 75 | bottom: "pool1" 76 | top: "conv2" 77 | param { 78 | lr_mult: 1 79 | } 80 | param { 81 | lr_mult: 2 82 | } 83 | convolution_param { 84 | num_output: 50 85 | kernel_size: 5 86 | stride: 1 87 | weight_filler { 88 | type: "xavier" 89 | } 90 | bias_filler { 91 | type: "constant" 92 | } 93 | } 94 | } 95 | layer { 96 | name: "pool2" 97 | type: "Pooling" 98 | bottom: "conv2" 99 | top: "pool2" 100 | pooling_param { 101 | pool: MAX 102 | kernel_size: 2 103 | stride: 2 104 | } 105 | } 106 | layer { 107 | name: "ip1" 108 | type: "InnerProduct" 109 | bottom: "pool2" 110 | top: "ip1" 111 | param { 112 | lr_mult: 1 113 | } 114 | param { 115 | lr_mult: 2 116 | } 117 | inner_product_param { 118 | num_output: 500 119 | weight_filler { 120 | type: "xavier" 121 | } 122 | bias_filler { 123 | type: "constant" 124 | } 125 | } 126 | } 127 | layer { 128 | name: "relu1" 129 | type: "ReLU" 130 | bottom: "ip1" 131 | top: "ip1" 132 | } 133 | layer { 134 | name: "ip2" 135 | type: "InnerProduct" 136 | bottom: "ip1" 137 | top: "ip2" 138 | param { 139 | lr_mult: 1 140 | } 141 | param { 142 | lr_mult: 2 143 | } 144 | inner_product_param { 145 | num_output: 10 146 | weight_filler { 147 | type: "xavier" 148 | } 149 | bias_filler { 150 | type: "constant" 151 | } 152 | } 153 | } 154 | layer { 155 | name: "accuracy" 156 | type: "Accuracy" 157 | bottom: "ip2" 158 | bottom: "label" 159 | top: "accuracy" 160 | include { 161 | phase: TEST 162 | } 163 | } 164 | layer { 165 | name: "loss" 166 | type: "SoftmaxWithLoss" 167 | bottom: "ip2" 168 | bottom: "label" 169 | top: "loss" 170 | } 171 | -------------------------------------------------------------------------------- /chap8/caffe/lenet_train_val_aug.prototxt: -------------------------------------------------------------------------------- 1 | name: "LeNet" 2 | layer { 3 | name: "mnist" 4 | type: "Data" 5 | top: "data" 6 | top: "label" 7 | include { 8 | phase: TRAIN 9 | } 10 | transform_param { 11 | mean_value: 128 12 | scale: 0.00390625 13 | } 14 | data_param { 15 | source: "../data/train_aug_lmdb" 16 | batch_size: 50 17 | backend: LMDB 18 | } 19 | } 20 | layer { 21 | name: "mnist" 22 | type: "Data" 23 | top: "data" 24 | top: "label" 25 | include { 26 | phase: TEST 27 | } 28 | transform_param { 29 | mean_value: 128 30 | scale: 0.00390625 31 | } 32 | data_param { 33 | source: "../data/val_lmdb" 34 | batch_size: 100 35 | backend: LMDB 36 | } 37 | } 38 | layer { 39 | name: "conv1" 40 | type: "Convolution" 41 | bottom: "data" 42 | top: "conv1" 43 | param { 44 | lr_mult: 1 45 | } 46 | param { 47 | lr_mult: 2 48 | } 49 | convolution_param { 50 | num_output: 20 51 | kernel_size: 5 52 | stride: 1 53 | weight_filler { 54 | type: "xavier" 55 | } 56 | bias_filler { 57 | type: "constant" 58 | } 59 | } 60 | } 61 | layer { 62 | name: "pool1" 63 | type: "Pooling" 64 | bottom: "conv1" 65 | top: "pool1" 66 | pooling_param { 67 | pool: MAX 68 | kernel_size: 2 69 | stride: 2 70 | } 71 | } 72 | layer { 73 | name: "conv2" 74 | type: "Convolution" 75 | bottom: "pool1" 76 | top: "conv2" 77 | param { 78 | lr_mult: 1 79 | } 80 | param { 81 | lr_mult: 2 82 | } 83 | convolution_param { 84 | num_output: 50 85 | kernel_size: 5 86 | stride: 1 87 | weight_filler { 88 | type: "xavier" 89 | } 90 | bias_filler { 91 | type: "constant" 92 | } 93 | } 94 | } 95 | layer { 96 | name: "pool2" 97 | type: "Pooling" 98 | bottom: "conv2" 99 | top: "pool2" 100 | pooling_param { 101 | pool: MAX 102 | kernel_size: 2 103 | stride: 2 104 | } 105 | } 106 | layer { 107 | name: "ip1" 108 | type: "InnerProduct" 109 | bottom: "pool2" 110 | top: "ip1" 111 | param { 112 | lr_mult: 1 113 | } 114 | param { 115 | lr_mult: 2 116 | } 117 | inner_product_param { 118 | num_output: 500 119 | weight_filler { 120 | type: "xavier" 121 | } 122 | bias_filler { 123 | type: "constant" 124 | } 125 | } 126 | } 127 | layer { 128 | name: "relu1" 129 | type: "ReLU" 130 | bottom: "ip1" 131 | top: "ip1" 132 | } 133 | layer { 134 | name: "ip2" 135 | type: "InnerProduct" 136 | bottom: "ip1" 137 | top: "ip2" 138 | param { 139 | lr_mult: 1 140 | } 141 | param { 142 | lr_mult: 2 143 | } 144 | inner_product_param { 145 | num_output: 10 146 | weight_filler { 147 | type: "xavier" 148 | } 149 | bias_filler { 150 | type: "constant" 151 | } 152 | } 153 | } 154 | layer { 155 | name: "accuracy" 156 | type: "Accuracy" 157 | bottom: "ip2" 158 | bottom: "label" 159 | top: "accuracy" 160 | include { 161 | phase: TEST 162 | } 163 | } 164 | layer { 165 | name: "loss" 166 | type: "SoftmaxWithLoss" 167 | bottom: "ip2" 168 | bottom: "label" 169 | top: "loss" 170 | } 171 | -------------------------------------------------------------------------------- /chap8/caffe/recognize_digit.py: -------------------------------------------------------------------------------- 1 | import sys 2 | sys.path.append('/path/to/caffe/python') 3 | import numpy as np 4 | import cv2 5 | import caffe 6 | 7 | MEAN = 128 8 | SCALE = 0.00390625 9 | 10 | imglist = sys.argv[1] 11 | 12 | caffe.set_mode_gpu() 13 | caffe.set_device(0) 14 | net = caffe.Net('lenet.prototxt', 'mnist_lenet_iter_36000.caffemodel', caffe.TEST) 15 | net.blobs['data'].reshape(1, 1, 28, 28) 16 | 17 | with open(imglist, 'r') as f: 18 | line = f.readline() 19 | while line: 20 | imgpath, label = line.split() 21 | line = f.readline() 22 | image = cv2.imread(imgpath, cv2.IMREAD_GRAYSCALE).astype(np.float) - MEAN 23 | image *= SCALE 24 | net.blobs['data'].data[...] = image 25 | output = net.forward() 26 | pred_label = np.argmax(output['prob'][0]) 27 | print('Predicted digit for {} is {}'.format(imgpath, pred_label)) 28 | -------------------------------------------------------------------------------- /chap8/data/convert_mnist.py: -------------------------------------------------------------------------------- 1 | import os 2 | import pickle, gzip 3 | from matplotlib import pyplot 4 | 5 | # Load the dataset 6 | print('Loading data from mnist.pkl.gz ...') 7 | with gzip.open('mnist.pkl.gz', 'rb') as f: 8 | train_set, valid_set, test_set = pickle.load(f) 9 | 10 | imgs_dir = 'mnist' 11 | os.system('mkdir -p {}'.format(imgs_dir)) 12 | datasets = {'train': train_set, 'val': valid_set, 'test': test_set} 13 | for dataname, dataset in datasets.items(): 14 | print('Converting {} dataset ...'.format(dataname)) 15 | data_dir = os.sep.join([imgs_dir, dataname]) 16 | os.system('mkdir -p {}'.format(data_dir)) 17 | for i, (img, label) in enumerate(zip(*dataset)): 18 | filename = '{:0>6d}_{}.jpg'.format(i, label) 19 | filepath = os.sep.join([data_dir, filename]) 20 | img = img.reshape((28, 28)) 21 | pyplot.imsave(filepath, img, cmap='gray') 22 | if (i+1) % 10000 == 0: 23 | print('{} images converted!'.format(i+1)) 24 | 25 | -------------------------------------------------------------------------------- /chap8/data/download_mnist.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | # wget http://www.iro.umontreal.ca/~lisa/deep/data/mnist/mnist.pkl.gz 3 | wget http://deeplearning.net/data/mnist/mnist.pkl.gz 4 | -------------------------------------------------------------------------------- /chap8/data/gen_caffe_imglist.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | 4 | input_path = sys.argv[1].rstrip(os.sep) 5 | output_path = sys.argv[2] 6 | 7 | filenames = os.listdir(input_path) 8 | 9 | with open(output_path, 'w') as f: 10 | for filename in filenames: 11 | filepath = os.sep.join([input_path, filename]) 12 | label = filename[:filename.rfind('.')].split('_')[1] 13 | line = '{} {}\n'.format(filepath, label) 14 | f.write(line) 15 | 16 | -------------------------------------------------------------------------------- /chap8/data/gen_mxnet_imglist.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | 4 | input_path = sys.argv[1].rstrip(os.sep) 5 | output_path = sys.argv[2] 6 | 7 | filenames = os.listdir(input_path) 8 | 9 | with open(output_path, 'w') as f: 10 | for i, filename in enumerate(filenames): 11 | filepath = os.sep.join([input_path, filename]) 12 | label = filename[:filename.rfind('.')].split('_')[1] 13 | line = '{}\t{}\t{}\n'.format(i, label, filepath) 14 | f.write(line) 15 | 16 | -------------------------------------------------------------------------------- /chap8/mxnet/benchmark_model.py: -------------------------------------------------------------------------------- 1 | import time 2 | import mxnet as mx 3 | 4 | benchmark_dataiter = mx.io.ImageRecordIter( 5 | path_imgrec="../data/test.rec", 6 | data_shape=(1, 28, 28), 7 | batch_size=64, 8 | mean_r=128, 9 | scale=0.00390625, 10 | ) 11 | 12 | mod = mx.mod.Module.load('mnist_lenet', 35, context=mx.gpu(2)) 13 | mod.bind( 14 | data_shapes=benchmark_dataiter.provide_data, 15 | label_shapes=benchmark_dataiter.provide_label, 16 | for_training=False) 17 | 18 | start = time.time() 19 | for i, batch in enumerate(benchmark_dataiter): 20 | mod.forward(batch) 21 | time_elapsed = time.time() - start 22 | msg = '{} batches iterated!\nAverage forward time per batch: {:.6f} ms' 23 | print(msg.format(i+1, 1000*time_elapsed/float(i))) 24 | -------------------------------------------------------------------------------- /chap8/mxnet/recognize_digit.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import os 3 | import cv2 4 | from collections import namedtuple 5 | Batch = namedtuple('Batch', ['data']) 6 | import numpy as np 7 | import mxnet as mx 8 | 9 | input_path = sys.argv[1].rstrip(os.sep) 10 | 11 | mod = mx.mod.Module.load('mnist_lenet', 35, context=mx.gpu(2)) 12 | mod.bind( 13 | data_shapes=[('data', (1, 1, 28, 28))], 14 | for_training=False) 15 | 16 | filenames = os.listdir(input_path) 17 | for filename in filenames: 18 | filepath = os.sep.join([input_path, filename]) 19 | img = cv2.imread(filepath, cv2.IMREAD_GRAYSCALE) 20 | img = (img.astype(np.float)-128) * 0.00390625 21 | img = img.reshape((1, 1)+img.shape) 22 | mod.forward(Batch([mx.nd.array(img)])) 23 | prob = mod.get_outputs()[0].asnumpy() 24 | prob = np.squeeze(prob) 25 | pred_label = np.argmax(prob) 26 | print('Predicted digit for {} is {}'.format(filepath, pred_label)) 27 | -------------------------------------------------------------------------------- /chap8/mxnet/score_model.py: -------------------------------------------------------------------------------- 1 | import mxnet as mx 2 | 3 | test_dataiter = mx.io.ImageRecordIter( 4 | path_imgrec="../data/test.rec", 5 | data_shape=(1, 28, 28), 6 | batch_size=100, 7 | mean_r=128, 8 | scale=0.00390625, 9 | ) 10 | 11 | mod = mx.mod.Module.load('mnist_lenet', 35, context=mx.gpu(2)) 12 | mod.bind( 13 | data_shapes=test_dataiter.provide_data, 14 | label_shapes=test_dataiter.provide_label, 15 | for_training=False) 16 | 17 | ''' 18 | # in case we need to continue to train from epoch 35 19 | mod.fit(..., 20 | arg_params=arg_params, 21 | aux_params=aux_params, 22 | begin_epoch=35) 23 | ''' 24 | 25 | metric = mx.metric.create('acc') 26 | mod.score(test_dataiter, metric) 27 | 28 | for name, val in metric.get_name_value(): 29 | print('{}={:.2f}%'.format(name, val*100)) 30 | -------------------------------------------------------------------------------- /chap8/mxnet/train_lenet5.py: -------------------------------------------------------------------------------- 1 | import mxnet as mx 2 | import logging 3 | 4 | # data & preprocessing 5 | data = mx.symbol.Variable('data') 6 | 7 | # 1st conv 8 | conv1 = mx.symbol.Convolution(data=data, kernel=(5, 5), num_filter=20) 9 | pool1 = mx.symbol.Pooling(data=conv1, pool_type="max", 10 | kernel=(2, 2), stride=(2, 2)) 11 | # 2nd conv 12 | conv2 = mx.symbol.Convolution(data=pool1, kernel=(5, 5), num_filter=50) 13 | pool2 = mx.symbol.Pooling(data=conv2, pool_type="max", 14 | kernel=(2, 2), stride=(2, 2)) 15 | # 1st fc & relu 16 | flatten = mx.symbol.Flatten(data=pool2) 17 | fc1 = mx.symbol.FullyConnected(data=flatten, num_hidden=500) 18 | relu1 = mx.symbol.Activation(data=fc1, act_type="relu") 19 | 20 | # 2nd fc 21 | fc2 = mx.symbol.FullyConnected(data=relu1, num_hidden=10) 22 | # loss 23 | lenet5 = mx.symbol.SoftmaxOutput(data=fc2, name='softmax') 24 | 25 | train_dataiter = mx.io.ImageRecordIter( 26 | path_imgrec="../data/train.rec", 27 | data_shape=(1, 28, 28), 28 | batch_size=50, 29 | mean_r=128, 30 | scale=0.00390625, 31 | rand_crop=True, 32 | min_crop_size=26, 33 | max_crop_size=28, 34 | max_rotate_angle=15, 35 | fill_value=0 36 | ) 37 | 38 | val_dataiter = mx.io.ImageRecordIter( 39 | path_imgrec="../data/val.rec", 40 | data_shape=(1, 28, 28), 41 | batch_size=100, 42 | mean_r=128, 43 | scale=0.00390625, 44 | ) 45 | 46 | logging.getLogger().setLevel(logging.DEBUG) 47 | fh = logging.FileHandler('train_mnist_lenet.log') 48 | logging.getLogger().addHandler(fh) 49 | 50 | lr_scheduler = mx.lr_scheduler.FactorScheduler(1000, factor=0.95) 51 | optimizer_params = { 52 | 'learning_rate': 0.01, 53 | 'momentum': 0.9, 54 | 'wd': 0.0005, 55 | 'lr_scheduler': lr_scheduler 56 | } 57 | checkpoint = mx.callback.do_checkpoint('mnist_lenet', period=5) 58 | 59 | mod = mx.mod.Module(lenet5, context=mx.gpu(2)) 60 | mod.fit(train_dataiter, 61 | eval_data=val_dataiter, 62 | optimizer_params=optimizer_params, 63 | num_epoch=36, 64 | epoch_end_callback=checkpoint) 65 | -------------------------------------------------------------------------------- /chap9/README.md: -------------------------------------------------------------------------------- 1 | ## Step 1 2 | 3 | > python make_noises.py 4 | 5 | ## Step 2 6 | 7 | > python gen_label.py 8 | 9 | ## Step 3 10 | 11 | > python gen_hdf5.py train.txt 12 | > python gen_hdf5.py val.txt 13 | 14 | ## Step 4 15 | 16 | > /path/to/caffe/build/tools/caffe train -solver solver.prototxt 17 | 18 | ## Step 5 19 | > python predict.py test.txt 20 | 21 | ## Visualize Conv1 Kernels 22 | > python visualize_conv1_kernels.py 23 | -------------------------------------------------------------------------------- /chap9/deploy.prototxt: -------------------------------------------------------------------------------- 1 | name: "RegressionExample" 2 | layer { 3 | name: "data" 4 | type: "Input" 5 | top: "data" 6 | input_param { 7 | shape: { 8 | dim: 100 9 | dim: 1 10 | dim: 100 11 | dim: 100 12 | } 13 | } 14 | } 15 | layer { 16 | name: "conv1" 17 | type: "Convolution" 18 | bottom: "data" 19 | top: "conv1" 20 | param { 21 | lr_mult: 1 22 | decay_mult: 1 23 | } 24 | param { 25 | lr_mult: 1 26 | decay_mult: 0 27 | } 28 | convolution_param { 29 | num_output: 96 30 | kernel_size: 5 31 | stride: 2 32 | weight_filler { 33 | type: "gaussian" 34 | std: 0.01 35 | } 36 | bias_filler { 37 | type: "constant" 38 | value: 0 39 | } 40 | } 41 | } 42 | layer { 43 | name: "relu1" 44 | type: "ReLU" 45 | bottom: "conv1" 46 | top: "conv1" 47 | } 48 | layer { 49 | name: "pool1" 50 | type: "Pooling" 51 | bottom: "conv1" 52 | top: "pool1" 53 | pooling_param { 54 | pool: MAX 55 | kernel_size: 3 56 | stride: 2 57 | } 58 | } 59 | layer { 60 | name: "conv2" 61 | type: "Convolution" 62 | bottom: "pool1" 63 | top: "conv2" 64 | param { 65 | lr_mult: 1 66 | decay_mult: 1 67 | } 68 | param { 69 | lr_mult: 1 70 | decay_mult: 0 71 | } 72 | convolution_param { 73 | num_output: 96 74 | pad: 2 75 | kernel_size: 3 76 | weight_filler { 77 | type: "gaussian" 78 | std: 0.01 79 | } 80 | bias_filler { 81 | type: "constant" 82 | value: 0 83 | } 84 | } 85 | } 86 | layer { 87 | name: "relu2" 88 | type: "ReLU" 89 | bottom: "conv2" 90 | top: "conv2" 91 | } 92 | layer { 93 | name: "pool2" 94 | type: "Pooling" 95 | bottom: "conv2" 96 | top: "pool2" 97 | pooling_param { 98 | pool: MAX 99 | kernel_size: 3 100 | stride: 2 101 | } 102 | } 103 | layer { 104 | name: "conv3" 105 | type: "Convolution" 106 | bottom: "pool2" 107 | top: "conv3" 108 | param { 109 | lr_mult: 1 110 | decay_mult: 1 111 | } 112 | param { 113 | lr_mult: 1 114 | decay_mult: 0 115 | } 116 | convolution_param { 117 | num_output: 128 118 | pad: 1 119 | kernel_size: 3 120 | weight_filler { 121 | type: "gaussian" 122 | std: 0.01 123 | } 124 | bias_filler { 125 | type: "constant" 126 | value: 0 127 | } 128 | } 129 | } 130 | layer { 131 | name: "relu3" 132 | type: "ReLU" 133 | bottom: "conv3" 134 | top: "conv3" 135 | } 136 | layer { 137 | name: "pool3" 138 | type: "Pooling" 139 | bottom: "conv3" 140 | top: "pool3" 141 | pooling_param { 142 | pool: MAX 143 | kernel_size: 3 144 | stride: 2 145 | } 146 | } 147 | layer { 148 | name: "fc4" 149 | type: "InnerProduct" 150 | bottom: "pool3" 151 | top: "fc4" 152 | param { 153 | lr_mult: 1 154 | decay_mult: 1 155 | } 156 | param { 157 | lr_mult: 1 158 | decay_mult: 0 159 | } 160 | inner_product_param { 161 | num_output: 192 162 | weight_filler { 163 | type: "gaussian" 164 | std: 0.005 165 | } 166 | bias_filler { 167 | type: "constant" 168 | value: 0 169 | } 170 | } 171 | } 172 | layer { 173 | name: "relu4" 174 | type: "ReLU" 175 | bottom: "fc4" 176 | top: "fc4" 177 | } 178 | layer { 179 | name: "drop4" 180 | type: "Dropout" 181 | bottom: "fc4" 182 | top: "fc4" 183 | dropout_param { 184 | dropout_ratio: 0.35 185 | } 186 | } 187 | layer { 188 | name: "fc5" 189 | type: "InnerProduct" 190 | bottom: "fc4" 191 | top: "fc5" 192 | param { 193 | lr_mult: 1 194 | decay_mult: 1 195 | } 196 | param { 197 | lr_mult: 1 198 | decay_mult: 0 199 | } 200 | inner_product_param { 201 | num_output: 2 202 | weight_filler { 203 | type: "gaussian" 204 | std: 0.005 205 | } 206 | bias_filler { 207 | type: "constant" 208 | value: 0 209 | } 210 | } 211 | } 212 | layer { 213 | name: "sigmoid5" 214 | type: "Sigmoid" 215 | bottom: "fc5" 216 | top: "pred" 217 | } 218 | -------------------------------------------------------------------------------- /chap9/gen_hdf5.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import numpy as np 3 | import matplotlib.pyplot as plt 4 | import h5py 5 | 6 | IMAGE_SIZE = (100, 100) 7 | MEAN_VALUE = 128 8 | 9 | filename = sys.argv[1] 10 | setname, ext = filename.split('.') 11 | 12 | with open(filename, 'r') as f: 13 | lines = f.readlines() 14 | 15 | np.random.shuffle(lines) 16 | 17 | sample_size = len(lines) 18 | imgs = np.zeros((sample_size, 1,) + IMAGE_SIZE, dtype=np.float32) 19 | freqs = np.zeros((sample_size, 2), dtype=np.float32) 20 | 21 | h5_filename = '{}.h5'.format(setname) 22 | with h5py.File(h5_filename, 'w') as h: 23 | for i, line in enumerate(lines): 24 | image_name, fx, fy = line[:-1].split() 25 | img = plt.imread(image_name)[:, :, 0].astype(np.float32) 26 | img = img.reshape((1, )+img.shape) 27 | img -= MEAN_VALUE 28 | imgs[i] = img 29 | freqs[i] = [float(fx), float(fy)] 30 | if (i+1) % 1000 == 0: 31 | print('Processed {} images!'.format(i+1)) 32 | h.create_dataset('data', data=imgs) 33 | h.create_dataset('freq', data=freqs) 34 | 35 | with open('{}_h5.txt'.format(setname), 'w') as f: 36 | f.write(h5_filename) 37 | -------------------------------------------------------------------------------- /chap9/gen_label.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | filename2score = lambda x: x[:x.rfind('.')].split('_')[-2:] 4 | 5 | filenames = os.listdir('samples') 6 | 7 | with open('train.txt', 'w') as f_train_txt: 8 | for filename in filenames[:50000]: 9 | fx, fy = filename2score(filename) 10 | line = 'samples/{} {} {}\n'.format(filename, fx, fy) 11 | f_train_txt.write(line) 12 | 13 | with open('val.txt', 'w') as f_val_txt: 14 | for filename in filenames[50000:60000]: 15 | fx, fy = filename2score(filename) 16 | line = 'samples/{} {} {}\n'.format(filename, fx, fy) 17 | f_val_txt.write(line) 18 | 19 | with open('test.txt', 'w') as f_test_txt: 20 | for filename in filenames[60000:]: 21 | line = 'samples/{}\n'.format(filename) 22 | f_test_txt.write(line) 23 | -------------------------------------------------------------------------------- /chap9/make_noises.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import datetime 4 | import cv2 5 | 6 | from multiprocessing import Process, cpu_count 7 | 8 | import numpy as np 9 | import matplotlib.pyplot as plt 10 | 11 | H_IMG, W_IMG = 100, 100 12 | SAMPLE_SIZE = 70000 13 | SAMPLES_DIR = 'samples' 14 | 15 | def make_noise(index): 16 | h = np.random.randint(1, H_IMG) 17 | w = np.random.randint(1, W_IMG) 18 | noise = np.random.random((h, w)) 19 | noisy_img = cv2.resize(noise, (H_IMG, W_IMG), interpolation=cv2.INTER_CUBIC) 20 | fx = float(w) / float(W_IMG) 21 | fy = float(h) / float(H_IMG) 22 | filename = '{}/{:0>5d}_{}_{}.jpg'.format(SAMPLES_DIR, index, fx, fy) 23 | plt.imsave(filename, noisy_img, cmap='gray') 24 | 25 | def make_noises(i0, i1): 26 | np.random.seed(datetime.datetime.now().microsecond) 27 | for i in xrange(i0, i1): 28 | make_noise(i) 29 | print('Noises from {} to {} are made!'.format(i0+1, i1)) 30 | sys.stdout.flush() 31 | 32 | def main(): 33 | cmd = 'mkdir -p {}'.format(SAMPLES_DIR) 34 | os.system(cmd) 35 | n_procs = cpu_count() 36 | 37 | print('Making noises with {} processes ...'.format(n_procs)) 38 | length = float(SAMPLE_SIZE)/float(n_procs) 39 | indices = [int(round(i * length)) for i in range(n_procs + 1)] 40 | processes = [Process(target=make_noises, args=(indices[i], indices[i+1])) for i in range(n_procs)] 41 | 42 | for p in processes: 43 | p.start() 44 | 45 | for p in processes: 46 | p.join() 47 | 48 | print('Done!') 49 | 50 | if __name__ == '__main__': 51 | main() 52 | -------------------------------------------------------------------------------- /chap9/predict.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import numpy as np 3 | sys.path.append('/path/to/caffe/python') 4 | import caffe 5 | 6 | WEIGHTS_FILE = 'freq_regression_iter_10000.caffemodel' 7 | DEPLOY_FILE = 'deploy.prototxt' 8 | MEAN_VALUE = 128 9 | 10 | #caffe.set_mode_cpu() 11 | net = caffe.Net(DEPLOY_FILE, WEIGHTS_FILE, caffe.TEST) 12 | 13 | transformer = caffe.io.Transformer({'data': net.blobs['data'].data.shape}) 14 | transformer.set_transpose('data', (2,0,1)) 15 | transformer.set_mean('data', np.array([MEAN_VALUE])) 16 | transformer.set_raw_scale('data', 255) 17 | 18 | image_list = sys.argv[1] 19 | 20 | batch_size = net.blobs['data'].data.shape[0] 21 | with open(image_list, 'r') as f: 22 | i = 0 23 | filenames = [] 24 | for line in f.readlines(): 25 | filename = line[:-1] 26 | filenames.append(filename) 27 | image = caffe.io.load_image(filename, False) 28 | transformed_image = transformer.preprocess('data', image) 29 | net.blobs['data'].data[i, ...] = transformed_image 30 | i += 1 31 | 32 | if i == batch_size: 33 | output = net.forward() 34 | freqs = output['pred'] 35 | 36 | for filename, (fx, fy) in zip(filenames, freqs): 37 | print('Predicted frequencies for {} is {:.2f} and {:.2f}'.format(filename, fx, fy)) 38 | 39 | i = 0 40 | filenames = [] 41 | -------------------------------------------------------------------------------- /chap9/solver.prototxt: -------------------------------------------------------------------------------- 1 | net: "./train_val.prototxt" 2 | test_iter: 200 3 | test_interval: 1000 4 | base_lr: 0.01 5 | lr_policy: "step" 6 | gamma: 0.707 7 | stepsize: 2000 8 | display: 100 9 | max_iter: 10000 10 | momentum: 0.9 11 | weight_decay: 0.00001 12 | snapshot_prefix: "./freq_regression" 13 | solver_mode: GPU 14 | type: "Nesterov" 15 | -------------------------------------------------------------------------------- /chap9/train_val.prototxt: -------------------------------------------------------------------------------- 1 | name: "RegressionExample" 2 | layer { 3 | name: "data" 4 | type: "HDF5Data" 5 | top: "data" 6 | top: "freq" 7 | include { 8 | phase: TRAIN 9 | } 10 | hdf5_data_param { 11 | source: "train_h5.txt" 12 | batch_size: 50 13 | } 14 | } 15 | layer { 16 | name: "data" 17 | type: "HDF5Data" 18 | top: "data" 19 | top: "freq" 20 | include { 21 | phase: TEST 22 | } 23 | hdf5_data_param { 24 | source: "val_h5.txt" 25 | batch_size: 50 26 | } 27 | } 28 | layer { 29 | name: "conv1" 30 | type: "Convolution" 31 | bottom: "data" 32 | top: "conv1" 33 | param { 34 | lr_mult: 1 35 | decay_mult: 1 36 | } 37 | param { 38 | lr_mult: 1 39 | decay_mult: 0 40 | } 41 | convolution_param { 42 | num_output: 96 43 | kernel_size: 5 44 | stride: 2 45 | weight_filler { 46 | type: "gaussian" 47 | std: 0.01 48 | } 49 | bias_filler { 50 | type: "constant" 51 | value: 0 52 | } 53 | } 54 | } 55 | layer { 56 | name: "relu1" 57 | type: "ReLU" 58 | bottom: "conv1" 59 | top: "conv1" 60 | } 61 | layer { 62 | name: "pool1" 63 | type: "Pooling" 64 | bottom: "conv1" 65 | top: "pool1" 66 | pooling_param { 67 | pool: MAX 68 | kernel_size: 3 69 | stride: 2 70 | } 71 | } 72 | layer { 73 | name: "conv2" 74 | type: "Convolution" 75 | bottom: "pool1" 76 | top: "conv2" 77 | param { 78 | lr_mult: 1 79 | decay_mult: 1 80 | } 81 | param { 82 | lr_mult: 1 83 | decay_mult: 0 84 | } 85 | convolution_param { 86 | num_output: 96 87 | pad: 2 88 | kernel_size: 3 89 | weight_filler { 90 | type: "gaussian" 91 | std: 0.01 92 | } 93 | bias_filler { 94 | type: "constant" 95 | value: 0 96 | } 97 | } 98 | } 99 | layer { 100 | name: "relu2" 101 | type: "ReLU" 102 | bottom: "conv2" 103 | top: "conv2" 104 | } 105 | layer { 106 | name: "pool2" 107 | type: "Pooling" 108 | bottom: "conv2" 109 | top: "pool2" 110 | pooling_param { 111 | pool: MAX 112 | kernel_size: 3 113 | stride: 2 114 | } 115 | } 116 | layer { 117 | name: "conv3" 118 | type: "Convolution" 119 | bottom: "pool2" 120 | top: "conv3" 121 | param { 122 | lr_mult: 1 123 | decay_mult: 1 124 | } 125 | param { 126 | lr_mult: 1 127 | decay_mult: 0 128 | } 129 | convolution_param { 130 | num_output: 128 131 | pad: 1 132 | kernel_size: 3 133 | weight_filler { 134 | type: "gaussian" 135 | std: 0.01 136 | } 137 | bias_filler { 138 | type: "constant" 139 | value: 0 140 | } 141 | } 142 | } 143 | layer { 144 | name: "relu3" 145 | type: "ReLU" 146 | bottom: "conv3" 147 | top: "conv3" 148 | } 149 | layer { 150 | name: "pool3" 151 | type: "Pooling" 152 | bottom: "conv3" 153 | top: "pool3" 154 | pooling_param { 155 | pool: MAX 156 | kernel_size: 3 157 | stride: 2 158 | } 159 | } 160 | layer { 161 | name: "fc4" 162 | type: "InnerProduct" 163 | bottom: "pool3" 164 | top: "fc4" 165 | param { 166 | lr_mult: 1 167 | decay_mult: 1 168 | } 169 | param { 170 | lr_mult: 1 171 | decay_mult: 0 172 | } 173 | inner_product_param { 174 | num_output: 192 175 | weight_filler { 176 | type: "gaussian" 177 | std: 0.005 178 | } 179 | bias_filler { 180 | type: "constant" 181 | value: 0 182 | } 183 | } 184 | } 185 | layer { 186 | name: "relu4" 187 | type: "ReLU" 188 | bottom: "fc4" 189 | top: "fc4" 190 | } 191 | layer { 192 | name: "drop4" 193 | type: "Dropout" 194 | bottom: "fc4" 195 | top: "fc4" 196 | dropout_param { 197 | dropout_ratio: 0.35 198 | } 199 | } 200 | layer { 201 | name: "fc5" 202 | type: "InnerProduct" 203 | bottom: "fc4" 204 | top: "fc5" 205 | param { 206 | lr_mult: 1 207 | decay_mult: 1 208 | } 209 | param { 210 | lr_mult: 1 211 | decay_mult: 0 212 | } 213 | inner_product_param { 214 | num_output: 2 215 | weight_filler { 216 | type: "gaussian" 217 | std: 0.005 218 | } 219 | bias_filler { 220 | type: "constant" 221 | value: 0 222 | } 223 | } 224 | } 225 | layer { 226 | name: "sigmoid5" 227 | type: "Sigmoid" 228 | bottom: "fc5" 229 | top: "pred" 230 | } 231 | layer { 232 | name: "loss" 233 | type: "EuclideanLoss" 234 | bottom: "pred" 235 | bottom: "freq" 236 | top: "loss" 237 | } 238 | -------------------------------------------------------------------------------- /chap9/visualize_conv1_kernels.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import numpy as np 3 | import matplotlib.pyplot as plt 4 | import cv2 5 | sys.path.append('/path/to/caffe/python') 6 | import caffe 7 | 8 | ZOOM_IN_SIZE = 50 9 | PAD_SIZE = 4 10 | 11 | WEIGHTS_FILE = 'freq_regression_iter_10000.caffemodel' 12 | DEPLOY_FILE = 'deploy.prototxt' 13 | 14 | net = caffe.Net(DEPLOY_FILE, WEIGHTS_FILE, caffe.TEST) 15 | kernels = net.params['conv1'][0].data 16 | 17 | kernels -= kernels.min() 18 | kernels /= kernels.max() 19 | 20 | zoomed_in_kernels = [] 21 | for kernel in kernels: 22 | zoomed_in_kernels.append(cv2.resize(kernel[0], (ZOOM_IN_SIZE, ZOOM_IN_SIZE), interpolation=cv2.INTER_NEAREST)) 23 | 24 | # plot 12*8 squares kernels 25 | half_pad = PAD_SIZE / 2 26 | padded_size = ZOOM_IN_SIZE+PAD_SIZE 27 | padding = ((0, 0), (half_pad, half_pad), (half_pad, half_pad)) 28 | 29 | padded_kernels = np.pad(zoomed_in_kernels, padding, 'constant', constant_values=1) 30 | padded_kernels = padded_kernels.reshape(8, 12, padded_size, padded_size).transpose(0, 2, 1, 3) 31 | kernels_img = padded_kernels.reshape((8*padded_size, 12*padded_size))[half_pad:-half_pad, half_pad: -half_pad] 32 | 33 | plt.imshow(kernels_img, cmap='gray', interpolation='nearest') 34 | plt.axis('off') 35 | 36 | plt.show() 37 | -------------------------------------------------------------------------------- /errata.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yeyun111/dlcv_for_beginners/080c7d3bbb4a68e4fb79e33231ccc666ada16dcc/errata.pdf -------------------------------------------------------------------------------- /random_bonus/README.md: -------------------------------------------------------------------------------- 1 | # 和本书内容无关,知识点可能有关的一些杂乱内容 2 | 3 | ## [基于Caffe的对抗样本的生成](https://github.com/frombeijingwithlove/dlcv_for_beginners/blob/master/random_bonus/adversarial_example_caffe) 4 | ## [GAN和Conditional GAN生成二维样本](https://github.com/frombeijingwithlove/dlcv_for_beginners/tree/master/random_bonus/gan_n_cgan_2d_example) 5 | ## [用Yahoo!的Open NSFW给XX图片生成马赛克](https://github.com/frombeijingwithlove/dlcv_for_beginners/tree/master/random_bonus/generate_mosaic_for_porno_images) 6 | ## [基于PyTorch实现的U-Net](https://github.com/frombeijingwithlove/dlcv_for_beginners/tree/master/random_bonus/image-segmentation(updating)) 7 | ## [Caffe中的模型融合](https://github.com/frombeijingwithlove/dlcv_for_beginners/tree/master/random_bonus/multiple_models_fusion_caffe) 8 | 9 | -------------------------------------------------------------------------------- /random_bonus/adversarial_example_caffe/README.md: -------------------------------------------------------------------------------- 1 | ## Generating Adversarial Examples 2 | Blog: 3 | [用Caffe生成对抗样本](https://zhuanlan.zhihu.com/p/26122612) 4 | 5 | ## step 1 6 | > ./download-squeezenet-v1.0-weights.sh 7 | 8 | to download weights for SqueezeNet v1.0. 9 | 10 | ## step 2 11 | 12 | > python adversarial_example_demo.py little_white_dog.jpg 13 | 14 | to check the demo results. -------------------------------------------------------------------------------- /random_bonus/adversarial_example_caffe/adversarial_example_demo.py: -------------------------------------------------------------------------------- 1 | import sys 2 | from operator import itemgetter 3 | import numpy 4 | from matplotlib import pyplot 5 | sys.path.append('/path/to/caffe/python') 6 | import caffe 7 | 8 | 9 | def make_n_test_adversarial_example( 10 | img, net, transformer, epsilon, 11 | data_blob='data', prob_blob='prob', 12 | label_index=None, top_k=5): 13 | 14 | # Load image & forward 15 | transformed_img = transformer.preprocess(data_blob, img) 16 | net.blobs[data_blob].data[0] = transformed_img 17 | net.forward() 18 | probs = [x for x in enumerate(net.blobs[prob_blob].data.flatten())] 19 | num_classes = len(probs) 20 | sorted_probs = sorted(probs, key=itemgetter(1), reverse=True) 21 | top_preds = sorted_probs[:top_k] 22 | pred = sorted_probs[0][0] 23 | 24 | # if label_index is set, 25 | # generate a adversarial example toward the label, 26 | # else 27 | # reduce the probability of predicted label 28 | net.blobs[prob_blob].diff[...] = 0 29 | if type(label_index) is int and 0 <= label_index < num_classes: 30 | net.blobs[prob_blob].diff[0][label_index] = 1. 31 | else: 32 | net.blobs[prob_blob].diff[0][pred] = -1. 33 | 34 | # generate attack image with fast gradient sign method 35 | diffs = net.backward() 36 | diff_sign_mat = numpy.sign(diffs[data_blob]) 37 | adversarial_noise = epsilon * diff_sign_mat 38 | 39 | # clip exceeded values 40 | attack_hwc = transformer.deprocess(data_blob, transformed_img + adversarial_noise[0]) 41 | attack_hwc[attack_hwc > 1] = 1. 42 | attack_hwc[attack_hwc < 0] = 0. 43 | attack_img = transformer.preprocess(data_blob, attack_hwc) 44 | 45 | net.blobs[data_blob].data[0] = attack_img 46 | net.forward() 47 | probs = [x for x in enumerate(net.blobs[prob_blob].data.flatten())] 48 | sorted_probs = sorted(probs, key=itemgetter(1), reverse=True) 49 | top_attacked_preds = sorted_probs[:top_k] 50 | 51 | return attack_hwc, top_preds, top_attacked_preds 52 | 53 | 54 | def visualize_attack(title, original_img, attack_img, original_preds, attacked_preds, labels): 55 | pred = original_preds[0][0] 56 | attacked_pred = attacked_preds[0][0] 57 | k = len(original_preds) 58 | fig_name = '{}: {} to {}'.format(title, labels[pred], labels[attacked_pred]) 59 | 60 | pyplot.figure(fig_name) 61 | for img, plt0, plt1, preds in [ 62 | (original_img, 231, 234, original_preds), 63 | (attack_img, 233, 236, attacked_preds) 64 | ]: 65 | pyplot.subplot(plt0) 66 | pyplot.axis('off') 67 | pyplot.imshow(img) 68 | ax = pyplot.subplot(plt1) 69 | pyplot.axis('off') 70 | ax.set_xlim([0, 2]) 71 | bars = ax.barh(range(k-1, -1, -1), [x[1] for x in preds]) 72 | for i, bar in enumerate(bars): 73 | x_loc = bar.get_x() + bar.get_width() 74 | y_loc = k - i - 1 75 | label = labels[preds[i][0]] 76 | ax.text(x_loc, y_loc, '{}: {:.2f}%'.format(label, preds[i][1]*100)) 77 | 78 | pyplot.subplot(232) 79 | pyplot.axis('off') 80 | noise = attack_img - original_img 81 | pyplot.imshow(255 * noise) 82 | 83 | 84 | if __name__ == '__main__': 85 | # path to test image 86 | image_path = sys.argv[1] 87 | 88 | # model to attack 89 | model_definition = 'squeezenet-v1.0-deploy-with-force-backward.prototxt' 90 | model_weights = 'squeezenet_v1.0.caffemodel' 91 | channel_means = numpy.array([104., 117., 123.]) 92 | 93 | # initialize net 94 | net = caffe.Net(model_definition, model_weights, caffe.TEST) 95 | n_channels, height, width = net.blobs['data'].shape[-3:] 96 | net.blobs['data'].reshape(1, n_channels, height, width) 97 | 98 | # initialize transformer 99 | transformer = caffe.io.Transformer({'data': net.blobs['data'].data.shape}) 100 | transformer.set_transpose('data', (2, 0, 1)) 101 | transformer.set_mean('data', channel_means) 102 | transformer.set_raw_scale('data', 255) 103 | transformer.set_channel_swap('data', (2, 1, 0)) 104 | 105 | # load labels from imagenet synset words 106 | with open('synset_words.txt', 'r') as f: 107 | labels = [x.rstrip()[x.find(' '):].split(',')[0] for x in f.readlines()] 108 | 109 | # load image 110 | img = caffe.io.load_image(image_path) 111 | 112 | examples = [ 113 | (None, 1.0), # make adversarial example to reduce the predicted probability 114 | (296, 1.0), # make adversarial example toward ice bear(296) 115 | (9, 1.0), # make adversarial example toward ostrich(9) 116 | (9, 2.0), # make adversarial example toward ostrich(9) with stronger noise 117 | (9, 6.0), # make adversarial example toward ostrich(9) with very strong noise 118 | (9, 18.0), # make adversarial example toward ostrich(9) with too strong noise 119 | (752, 1.0), # make adversarial example toward racket(752) 120 | (752, 2.0), # make adversarial example toward racket(752) with stronger noise 121 | (752, 6.0), # make adversarial example toward racket(752) with very strong noise 122 | (752, 18.0), # make adversarial example toward racket(752) with too strong noise 123 | ] 124 | 125 | for i, (label_index, epsilon) in enumerate(examples): 126 | attack_img, original_preds, attacked_preds = \ 127 | make_n_test_adversarial_example(img, net, transformer, epsilon, label_index=label_index) 128 | visualize_attack('example{}'.format(i), img, attack_img, original_preds, attacked_preds, labels) 129 | 130 | # try to make adversarial example toward racket(752) with epsilon=0.1, iterate 10 times 131 | attack_img, original_preds, attacked_preds = \ 132 | make_n_test_adversarial_example(img, net, transformer, 0.1, label_index=752) 133 | for i in range(9): 134 | attack_img, _, attacked_preds = \ 135 | make_n_test_adversarial_example(attack_img, net, transformer, 0.1, label_index=752) 136 | visualize_attack('racket_iterative'.format(i), img, attack_img, original_preds, attacked_preds, labels) 137 | 138 | pyplot.show() 139 | -------------------------------------------------------------------------------- /random_bonus/adversarial_example_caffe/download-squeezenet-v1.0-weights.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env sh 2 | 3 | wget https://github.com/DeepScale/SqueezeNet/raw/master/SqueezeNet_v1.0/squeezenet_v1.0.caffemodel 4 | 5 | -------------------------------------------------------------------------------- /random_bonus/adversarial_example_caffe/little_white_dog.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yeyun111/dlcv_for_beginners/080c7d3bbb4a68e4fb79e33231ccc666ada16dcc/random_bonus/adversarial_example_caffe/little_white_dog.jpg -------------------------------------------------------------------------------- /random_bonus/gan_n_cgan_2d_example/README.md: -------------------------------------------------------------------------------- 1 | ## Generative Adversarial Networks (GANs) with 2D Samples 2 | Blog(in Chinese): 3 | [用GAN生成二维样本的小例子](https://zhuanlan.zhihu.com/p/27343585) 4 | Inspired & based on [Dev Nag's GAN example](https://github.com/devnag/pytorch-generative-adversarial-networks): 5 | 1) Use batch size instead of cardinality to achieve better convergency, the original version is actually generating 100 (cardinality by default) dimensional gaussian distribution for discriminator, so the convergency is **BAD**. 6 | 2) Use 2D samples, with visualization of training. 7 | 3) Demo of conditional GAN. 8 | 4) GPU support. 9 | 10 | 11 | ## Introduction 12 | Play with GAN to generate 2D samples that you can define your own probability density function (PDF) with a gray image. 13 | ![image](https://github.com/frombeijingwithlove/dlcv_book_pretrained_caffe_models/blob/master/random_bonus/gan_n_cgan_2d/example_z.jpg) 14 | 15 | ## 2D Sampling 16 | > python sampler.py 17 | 18 | Will demo 10000 samples from the PDF defined by a gray image. 19 | ![image](https://github.com/frombeijingwithlove/dlcv_book_pretrained_caffe_models/blob/master/random_bonus/gan_n_cgan_2d/test_2d_sampling_batman.png) 20 | ![image](https://github.com/frombeijingwithlove/dlcv_book_pretrained_caffe_models/blob/master/random_bonus/gan_n_cgan_2d/test_2d_sampling_binary.png) 21 | ![image](https://github.com/frombeijingwithlove/dlcv_book_pretrained_caffe_models/blob/master/random_bonus/gan_n_cgan_2d/test_2d_sampling_triangle.png) 22 | 23 | ## GAN 24 | > python gan_demo.py inputs/zig.jpg 25 | 26 | Training will be visualized as the following: 27 | ![image](https://github.com/frombeijingwithlove/dlcv_book_pretrained_caffe_models/blob/master/random_bonus/gan_n_cgan_2d/gan_zig.gif) 28 | 29 | More examples: 30 | ![image](https://github.com/frombeijingwithlove/dlcv_book_pretrained_caffe_models/blob/master/random_bonus/gan_n_cgan_2d/gan_Z.gif) 31 | ![image](https://github.com/frombeijingwithlove/dlcv_book_pretrained_caffe_models/blob/master/random_bonus/gan_n_cgan_2d/gan_triangle.gif) 32 | ![image](https://github.com/frombeijingwithlove/dlcv_book_pretrained_caffe_models/blob/master/random_bonus/gan_n_cgan_2d/gan_circle.gif) 33 | ![image](https://github.com/frombeijingwithlove/dlcv_book_pretrained_caffe_models/blob/master/random_bonus/gan_n_cgan_2d/gan_random.gif) 34 | 35 | ## Conditional GAN 36 | For more complex distributions, conditional GAN is much better. This demo reads distributions from different pdfs, encoding conditions as one-hot vector. 37 | 38 | > python cgan_demo.py inputs/binary 39 | 40 | Training will be visualized as the following: 41 | ![image](https://github.com/frombeijingwithlove/dlcv_book_pretrained_caffe_models/blob/master/random_bonus/gan_n_cgan_2d/cgan_binary.gif) 42 | Compared to vanilla GAN version: 43 | ![image](https://github.com/frombeijingwithlove/dlcv_book_pretrained_caffe_models/blob/master/random_bonus/gan_n_cgan_2d/gan_binary.gif) 44 | 45 | More examples: 46 | Vortex with C-GAN 47 | ![image](https://github.com/frombeijingwithlove/dlcv_book_pretrained_caffe_models/blob/master/random_bonus/gan_n_cgan_2d/cgan_vortex.gif) 48 | 49 | Vortex with vanilla GAN 50 | ![image](https://github.com/frombeijingwithlove/dlcv_book_pretrained_caffe_models/blob/master/random_bonus/gan_n_cgan_2d/gan_vortex.gif) 51 | 52 | Pentagram with C-GAN 53 | ![image](https://github.com/frombeijingwithlove/dlcv_book_pretrained_caffe_models/blob/master/random_bonus/gan_n_cgan_2d/cgan_penta.gif) 54 | 55 | Pentagram with vanilla GAN 56 | ![image](https://github.com/frombeijingwithlove/dlcv_book_pretrained_caffe_models/blob/master/random_bonus/gan_n_cgan_2d/gan_penta.gif) 57 | 58 | ## Latent space dimensionality / model complexity / learning rates / ... 59 | > python gan_demo.py -h 60 | 61 | -------------------------------------------------------------------------------- /random_bonus/gan_n_cgan_2d_example/argparser.py: -------------------------------------------------------------------------------- 1 | import os 2 | import argparse 3 | import torch.optim as optim 4 | 5 | OPTIMIZERS = { 6 | 'adadelta': optim.Adadelta, 7 | 'adam': optim.Adam, 8 | 'rmsprop': optim.RMSprop, 9 | 'sgd': optim.SGD 10 | } 11 | 12 | 13 | def parse_args(): 14 | parser = argparse.ArgumentParser( 15 | description='A Simple Demo of Generative Adversarial Networks with 2D Samples', 16 | formatter_class=argparse.ArgumentDefaultsHelpFormatter) 17 | 18 | parser.add_argument('input_path', 19 | help='Image or directory containing images to define distribution') 20 | 21 | parser.add_argument('--z_dim', 22 | help='Dimensionality of latent space', 23 | type=int, default=2) 24 | parser.add_argument('--iterations', 25 | help='Num of training iterations', 26 | type=int, default=2000) 27 | parser.add_argument('--batch_size', 28 | help='Batch size of each kind', 29 | type=int, default=2000) 30 | parser.add_argument('--optimizer', 31 | help='Optimizer: Adadelta/Adam/RMSprop/SGD', 32 | type=str, default='Adadelta') 33 | parser.add_argument('--d_lr', 34 | help='Learning rate of discriminator, for Adadelta it is the base learning rate', 35 | type=float, default=1) 36 | parser.add_argument('--g_lr', 37 | help='Learning rate of generator, for Adadelta it is the base learning rate', 38 | type=float, default=1) 39 | parser.add_argument('--d_steps', 40 | help='Steps of discriminators in each iteration', 41 | type=int, default=3) 42 | parser.add_argument('--g_steps', 43 | help='Steps of generator in each iteration', 44 | type=int, default=1) 45 | parser.add_argument('--d_hidden_size', 46 | help='Num of hidden units in discriminator', 47 | type=int, default=100) 48 | parser.add_argument('--g_hidden_size', 49 | help='Num of hidden units in generator', 50 | type=int, default=50) 51 | parser.add_argument('--display_interval', 52 | help='Interval of iterations to display/export images', 53 | type=int, default=10) 54 | parser.add_argument('--no_display', 55 | help='Show plots during training', action='store_true') 56 | parser.add_argument('--export', 57 | help='Export images', action='store_true') 58 | parser.add_argument('--cpu', 59 | help='Set to CPU mode', action='store_true') 60 | 61 | args = parser.parse_args() 62 | args.input_path = args.input_path.rstrip(os.sep) 63 | args.optimizer = OPTIMIZERS[args.optimizer.lower()] 64 | 65 | return args 66 | -------------------------------------------------------------------------------- /random_bonus/gan_n_cgan_2d_example/cgan_demo.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # Conditional Generative Adversarial Networks (GAN) example with 2D samples in PyTorch. 3 | import os 4 | import numpy 5 | from skimage import io 6 | import torch 7 | import torch.nn as nn 8 | from torch.autograd import Variable 9 | from sampler import generate_lut, sample_2d 10 | from visualizer import CGANDemoVisualizer 11 | from argparser import parse_args 12 | from networks import SimpleMLP 13 | 14 | DIMENSION = 2 15 | 16 | args = parse_args() 17 | cuda = False if args.cpu else True 18 | bs = args.batch_size 19 | z_dim = args.z_dim 20 | 21 | image_paths = [os.sep.join([args.input_path, x]) for x in os.listdir(args.input_path)] 22 | density_imgs = [io.imread(x, True) for x in image_paths] 23 | luts_2d = [generate_lut(x) for x in density_imgs] 24 | # Sampling based on visual density, a too small batch size may result in failure with conditions 25 | pix_sums = [numpy.sum(x) for x in density_imgs] 26 | total_pix_sums = numpy.sum(pix_sums) 27 | c_indices = [0] + [int(sum(pix_sums[:i+1])/total_pix_sums*bs+0.5) for i in range(len(pix_sums)-1)] + [bs] 28 | 29 | c_dim = len(luts_2d) # Dimensionality of condition labels <--> number of images 30 | 31 | visualizer = CGANDemoVisualizer('Conditional GAN 2D Example Visualization of {}'.format(args.input_path)) 32 | 33 | generator = SimpleMLP(input_size=z_dim+c_dim, hidden_size=args.g_hidden_size, output_size=DIMENSION) 34 | discriminator = SimpleMLP(input_size=DIMENSION+c_dim, hidden_size=args.d_hidden_size, output_size=1) 35 | 36 | if cuda: 37 | generator.cuda() 38 | discriminator.cuda() 39 | criterion = nn.BCELoss() 40 | 41 | d_optimizer = args.optimizer(discriminator.parameters(), lr=args.d_lr) 42 | g_optimizer = args.optimizer(generator.parameters(), lr=args.d_lr) 43 | 44 | y = numpy.zeros((bs, c_dim)) 45 | for i in range(c_dim): 46 | y[c_indices[i]:c_indices[i + 1], i] = 1 # conditional labels, one-hot encoding 47 | y = Variable(torch.Tensor(y)) 48 | if cuda: 49 | y = y.cuda() 50 | 51 | for train_iter in range(args.iterations): 52 | for d_index in range(args.d_steps): 53 | # 1. Train D on real+fake 54 | discriminator.zero_grad() 55 | 56 | # 1A: Train D on real samples with conditions 57 | real_samples = numpy.zeros((bs, DIMENSION)) 58 | for i in range(c_dim): 59 | real_samples[c_indices[i]:c_indices[i+1], :] = sample_2d(luts_2d[i], c_indices[i+1]-c_indices[i]) 60 | 61 | # first c dimensions is the condition inputs, the last 2 dimensions are samples 62 | real_samples = Variable(torch.Tensor(real_samples)) 63 | if cuda: 64 | real_samples = real_samples.cuda() 65 | d_real_data = torch.cat([y, real_samples], 1) 66 | if cuda: 67 | d_real_data = d_real_data.cuda() 68 | d_real_decision = discriminator(d_real_data) 69 | labels = Variable(torch.ones(bs)) 70 | if cuda: 71 | labels = labels.cuda() 72 | d_real_loss = criterion(d_real_decision, labels) # ones = true 73 | 74 | # 1B: Train D on fake 75 | latent_samples = Variable(torch.randn(bs, z_dim)) 76 | if cuda: 77 | latent_samples = latent_samples.cuda() 78 | # first c dimensions is the condition inputs, the last z_dim dimensions are latent samples 79 | d_gen_input = torch.cat([y, latent_samples], 1) 80 | d_fake_data = generator(d_gen_input).detach() # detach to avoid training G on these labels 81 | conditional_d_fake_data = torch.cat([y, d_fake_data], 1) 82 | if cuda: 83 | conditional_d_fake_data = conditional_d_fake_data.cuda() 84 | d_fake_decision = discriminator(conditional_d_fake_data) 85 | labels = Variable(torch.zeros(bs)) 86 | if cuda: 87 | labels = labels.cuda() 88 | d_fake_loss = criterion(d_fake_decision, labels) # zeros = fake 89 | 90 | d_loss = d_real_loss + d_fake_loss 91 | d_loss.backward() 92 | 93 | d_optimizer.step() # Only optimizes D's parameters; changes based on stored gradients from backward() 94 | 95 | for g_index in range(args.g_steps): 96 | # 2. Train G on D's response (but DO NOT train D on these labels) 97 | generator.zero_grad() 98 | 99 | latent_samples = Variable(torch.randn(bs, z_dim)) 100 | if cuda: 101 | latent_samples = latent_samples.cuda() 102 | g_gen_input = torch.cat([y, latent_samples], 1) 103 | g_fake_data = generator(g_gen_input) 104 | conditional_g_fake_data = torch.cat([y, g_fake_data], 1) 105 | g_fake_decision = discriminator(conditional_g_fake_data) 106 | labels = Variable(torch.ones(bs)) 107 | if cuda: 108 | labels = labels.cuda() 109 | g_loss = criterion(g_fake_decision, labels) # we want to fool, so pretend it's all genuine 110 | 111 | g_loss.backward() 112 | g_optimizer.step() # Only optimizes G's parameters 113 | 114 | if train_iter % args.display_interval == 0: 115 | loss_d_real = d_real_loss.data.cpu().numpy()[0] if cuda else d_real_loss.data.numpy()[0] 116 | loss_d_fake = d_fake_loss.data.cpu().numpy()[0] if cuda else d_fake_loss.data.numpy()[0] 117 | loss_g = g_loss.data.cpu().numpy()[0] if cuda else g_loss.data.numpy()[0] 118 | 119 | msg = 'Iteration {}: D_loss(real/fake): {:.6g}/{:.6g} G_loss: {:.6g}'.format(train_iter, loss_d_real, loss_d_fake, loss_g) 120 | print(msg) 121 | 122 | real_samples_with_y = d_real_data.data.cpu().numpy() if cuda else d_real_data.data.numpy() 123 | gen_samples_with_y = conditional_g_fake_data.data.cpu().numpy() if cuda else conditional_g_fake_data.data.numpy() 124 | if args.no_display: 125 | visualizer.draw(real_samples_with_y, gen_samples_with_y, msg, show=False) 126 | else: 127 | visualizer.draw(real_samples_with_y, gen_samples_with_y, msg) 128 | 129 | if args.export: 130 | filename = args.input_path.split(os.sep)[-1] 131 | output_dir = 'cgan_training_{}'.format(filename) 132 | os.system('mkdir -p {}'.format(output_dir)) 133 | export_filepath = os.sep.join([output_dir, 'iter_{:0>6d}.png'.format(train_iter)]) 134 | visualizer.savefig(export_filepath) 135 | 136 | if not args.no_display: 137 | visualizer.show() 138 | -------------------------------------------------------------------------------- /random_bonus/gan_n_cgan_2d_example/gan_demo.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # Generative Adversarial Networks (GAN) example with 2D samples in PyTorch. 3 | import os 4 | from skimage import io 5 | import torch 6 | import torch.nn as nn 7 | from torch.autograd import Variable 8 | from sampler import generate_lut, sample_2d 9 | from visualizer import GANDemoVisualizer 10 | from argparser import parse_args 11 | from networks import SimpleMLP 12 | 13 | DIMENSION = 2 14 | 15 | args = parse_args() 16 | cuda = False if args.cpu else True 17 | bs = args.batch_size 18 | z_dim = args.z_dim 19 | 20 | density_img = io.imread(args.input_path, True) 21 | lut_2d = generate_lut(density_img) 22 | 23 | visualizer = GANDemoVisualizer('GAN 2D Example Visualization of {}'.format(args.input_path)) 24 | 25 | generator = SimpleMLP(input_size=z_dim, hidden_size=args.g_hidden_size, output_size=DIMENSION) 26 | discriminator = SimpleMLP(input_size=DIMENSION, hidden_size=args.d_hidden_size, output_size=1) 27 | 28 | if cuda: 29 | generator.cuda() 30 | discriminator.cuda() 31 | criterion = nn.BCELoss() 32 | 33 | d_optimizer = args.optimizer(discriminator.parameters(), lr=args.d_lr) 34 | g_optimizer = args.optimizer(generator.parameters(), lr=args.d_lr) 35 | 36 | for train_iter in range(args.iterations): 37 | for d_index in range(args.d_steps): 38 | # 1. Train D on real+fake 39 | discriminator.zero_grad() 40 | 41 | # 1A: Train D on real 42 | real_samples = sample_2d(lut_2d, bs) 43 | d_real_data = Variable(torch.Tensor(real_samples)) 44 | if cuda: 45 | d_real_data = d_real_data.cuda() 46 | d_real_decision = discriminator(d_real_data) 47 | labels = Variable(torch.ones(bs)) 48 | if cuda: 49 | labels = labels.cuda() 50 | d_real_loss = criterion(d_real_decision, labels) # ones = true 51 | 52 | # 1B: Train D on fake 53 | latent_samples = torch.randn(bs, z_dim) 54 | d_gen_input = Variable(latent_samples) 55 | if cuda: 56 | d_gen_input = d_gen_input.cuda() 57 | d_fake_data = generator(d_gen_input).detach() # detach to avoid training G on these labels 58 | d_fake_decision = discriminator(d_fake_data) 59 | labels = Variable(torch.zeros(bs)) 60 | if cuda: 61 | labels = labels.cuda() 62 | d_fake_loss = criterion(d_fake_decision, labels) # zeros = fake 63 | 64 | d_loss = d_real_loss + d_fake_loss 65 | d_loss.backward() 66 | 67 | d_optimizer.step() # Only optimizes D's parameters; changes based on stored gradients from backward() 68 | 69 | for g_index in range(args.g_steps): 70 | # 2. Train G on D's response (but DO NOT train D on these labels) 71 | generator.zero_grad() 72 | 73 | latent_samples = torch.randn(bs, z_dim) 74 | g_gen_input = Variable(latent_samples) 75 | if cuda: 76 | g_gen_input = g_gen_input.cuda() 77 | g_fake_data = generator(g_gen_input) 78 | g_fake_decision = discriminator(g_fake_data) 79 | labels = Variable(torch.ones(bs)) 80 | if cuda: 81 | labels = labels.cuda() 82 | g_loss = criterion(g_fake_decision, labels) # we want to fool, so pretend it's all genuine 83 | 84 | g_loss.backward() 85 | g_optimizer.step() # Only optimizes G's parameters 86 | 87 | if train_iter % args.display_interval == 0: 88 | loss_d_real = d_real_loss.data.cpu().numpy()[0] if cuda else d_real_loss.data.numpy()[0] 89 | loss_d_fake = d_fake_loss.data.cpu().numpy()[0] if cuda else d_fake_loss.data.numpy()[0] 90 | loss_g = g_loss.data.cpu().numpy()[0] if cuda else g_loss.data.numpy()[0] 91 | 92 | msg = 'Iteration {}: D_loss(real/fake): {:.6g}/{:.6g} G_loss: {:.6g}'.format(train_iter, loss_d_real, loss_d_fake, loss_g) 93 | print(msg) 94 | 95 | gen_samples = g_fake_data.data.cpu().numpy() if cuda else g_fake_data.data.numpy() 96 | 97 | if args.no_display: 98 | visualizer.draw(real_samples, gen_samples, msg, show=False) 99 | else: 100 | visualizer.draw(real_samples, gen_samples, msg) 101 | 102 | if args.export: 103 | filename = args.input_path.split(os.sep)[-1] 104 | output_dir = 'gan_training_{}'.format(filename[:filename.rfind('.')]) 105 | os.system('mkdir -p {}'.format(output_dir)) 106 | export_filepath = os.sep.join([output_dir, 'iter_{:0>6d}.png'.format(train_iter)]) 107 | visualizer.savefig(export_filepath) 108 | 109 | if not args.no_display: 110 | visualizer.show() 111 | -------------------------------------------------------------------------------- /random_bonus/gan_n_cgan_2d_example/inputs/U.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yeyun111/dlcv_for_beginners/080c7d3bbb4a68e4fb79e33231ccc666ada16dcc/random_bonus/gan_n_cgan_2d_example/inputs/U.jpg -------------------------------------------------------------------------------- /random_bonus/gan_n_cgan_2d_example/inputs/Z.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yeyun111/dlcv_for_beginners/080c7d3bbb4a68e4fb79e33231ccc666ada16dcc/random_bonus/gan_n_cgan_2d_example/inputs/Z.jpg -------------------------------------------------------------------------------- /random_bonus/gan_n_cgan_2d_example/inputs/batman.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yeyun111/dlcv_for_beginners/080c7d3bbb4a68e4fb79e33231ccc666ada16dcc/random_bonus/gan_n_cgan_2d_example/inputs/batman.jpg -------------------------------------------------------------------------------- /random_bonus/gan_n_cgan_2d_example/inputs/binary.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yeyun111/dlcv_for_beginners/080c7d3bbb4a68e4fb79e33231ccc666ada16dcc/random_bonus/gan_n_cgan_2d_example/inputs/binary.jpg -------------------------------------------------------------------------------- /random_bonus/gan_n_cgan_2d_example/inputs/binary/0.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yeyun111/dlcv_for_beginners/080c7d3bbb4a68e4fb79e33231ccc666ada16dcc/random_bonus/gan_n_cgan_2d_example/inputs/binary/0.jpg -------------------------------------------------------------------------------- /random_bonus/gan_n_cgan_2d_example/inputs/binary/1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yeyun111/dlcv_for_beginners/080c7d3bbb4a68e4fb79e33231ccc666ada16dcc/random_bonus/gan_n_cgan_2d_example/inputs/binary/1.jpg -------------------------------------------------------------------------------- /random_bonus/gan_n_cgan_2d_example/inputs/circle.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yeyun111/dlcv_for_beginners/080c7d3bbb4a68e4fb79e33231ccc666ada16dcc/random_bonus/gan_n_cgan_2d_example/inputs/circle.jpg -------------------------------------------------------------------------------- /random_bonus/gan_n_cgan_2d_example/inputs/dumbbell.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yeyun111/dlcv_for_beginners/080c7d3bbb4a68e4fb79e33231ccc666ada16dcc/random_bonus/gan_n_cgan_2d_example/inputs/dumbbell.jpg -------------------------------------------------------------------------------- /random_bonus/gan_n_cgan_2d_example/inputs/penta.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yeyun111/dlcv_for_beginners/080c7d3bbb4a68e4fb79e33231ccc666ada16dcc/random_bonus/gan_n_cgan_2d_example/inputs/penta.jpg -------------------------------------------------------------------------------- /random_bonus/gan_n_cgan_2d_example/inputs/penta/0.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yeyun111/dlcv_for_beginners/080c7d3bbb4a68e4fb79e33231ccc666ada16dcc/random_bonus/gan_n_cgan_2d_example/inputs/penta/0.jpg -------------------------------------------------------------------------------- /random_bonus/gan_n_cgan_2d_example/inputs/penta/1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yeyun111/dlcv_for_beginners/080c7d3bbb4a68e4fb79e33231ccc666ada16dcc/random_bonus/gan_n_cgan_2d_example/inputs/penta/1.jpg -------------------------------------------------------------------------------- /random_bonus/gan_n_cgan_2d_example/inputs/penta/2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yeyun111/dlcv_for_beginners/080c7d3bbb4a68e4fb79e33231ccc666ada16dcc/random_bonus/gan_n_cgan_2d_example/inputs/penta/2.jpg -------------------------------------------------------------------------------- /random_bonus/gan_n_cgan_2d_example/inputs/penta/3.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yeyun111/dlcv_for_beginners/080c7d3bbb4a68e4fb79e33231ccc666ada16dcc/random_bonus/gan_n_cgan_2d_example/inputs/penta/3.jpg -------------------------------------------------------------------------------- /random_bonus/gan_n_cgan_2d_example/inputs/penta/4.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yeyun111/dlcv_for_beginners/080c7d3bbb4a68e4fb79e33231ccc666ada16dcc/random_bonus/gan_n_cgan_2d_example/inputs/penta/4.jpg -------------------------------------------------------------------------------- /random_bonus/gan_n_cgan_2d_example/inputs/random.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yeyun111/dlcv_for_beginners/080c7d3bbb4a68e4fb79e33231ccc666ada16dcc/random_bonus/gan_n_cgan_2d_example/inputs/random.jpg -------------------------------------------------------------------------------- /random_bonus/gan_n_cgan_2d_example/inputs/triangle.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yeyun111/dlcv_for_beginners/080c7d3bbb4a68e4fb79e33231ccc666ada16dcc/random_bonus/gan_n_cgan_2d_example/inputs/triangle.jpg -------------------------------------------------------------------------------- /random_bonus/gan_n_cgan_2d_example/inputs/vortex.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yeyun111/dlcv_for_beginners/080c7d3bbb4a68e4fb79e33231ccc666ada16dcc/random_bonus/gan_n_cgan_2d_example/inputs/vortex.jpg -------------------------------------------------------------------------------- /random_bonus/gan_n_cgan_2d_example/inputs/vortex/0.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yeyun111/dlcv_for_beginners/080c7d3bbb4a68e4fb79e33231ccc666ada16dcc/random_bonus/gan_n_cgan_2d_example/inputs/vortex/0.jpg -------------------------------------------------------------------------------- /random_bonus/gan_n_cgan_2d_example/inputs/vortex/1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yeyun111/dlcv_for_beginners/080c7d3bbb4a68e4fb79e33231ccc666ada16dcc/random_bonus/gan_n_cgan_2d_example/inputs/vortex/1.jpg -------------------------------------------------------------------------------- /random_bonus/gan_n_cgan_2d_example/inputs/vortex/2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yeyun111/dlcv_for_beginners/080c7d3bbb4a68e4fb79e33231ccc666ada16dcc/random_bonus/gan_n_cgan_2d_example/inputs/vortex/2.jpg -------------------------------------------------------------------------------- /random_bonus/gan_n_cgan_2d_example/inputs/zig.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yeyun111/dlcv_for_beginners/080c7d3bbb4a68e4fb79e33231ccc666ada16dcc/random_bonus/gan_n_cgan_2d_example/inputs/zig.jpg -------------------------------------------------------------------------------- /random_bonus/gan_n_cgan_2d_example/networks.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | import torch.nn.functional as F 3 | 4 | 5 | class SimpleMLP(nn.Module): 6 | def __init__(self, input_size, hidden_size, output_size): 7 | super(SimpleMLP, self).__init__() 8 | self.map1 = nn.Linear(input_size, hidden_size) 9 | self.map2 = nn.Linear(hidden_size, output_size) 10 | 11 | def forward(self, x): 12 | x = F.leaky_relu(self.map1(x), 0.1) 13 | return F.sigmoid(self.map2(x)) 14 | 15 | class DeepMLP(nn.Module): 16 | def __init__(self, input_size, hidden_size, output_size): 17 | super(DeepMLP, self).__init__() 18 | self.map1 = nn.Linear(input_size, hidden_size) 19 | self.map2 = nn.Linear(hidden_size, hidden_size) 20 | self.map3 = nn.Linear(hidden_size, output_size) 21 | 22 | def forward(self, x): 23 | x = F.leaky_relu(self.map1(x), 0.1) 24 | x = F.leaky_relu(self.map2(x), 0.1) 25 | return F.sigmoid(self.map3(x)) 26 | -------------------------------------------------------------------------------- /random_bonus/gan_n_cgan_2d_example/sampler.py: -------------------------------------------------------------------------------- 1 | from functools import partial 2 | import numpy 3 | from skimage import transform 4 | 5 | EPS = 1e-66 6 | RESOLUTION = 0.001 7 | num_grids = int(1/RESOLUTION+0.5) 8 | 9 | def generate_lut(img): 10 | """ 11 | linear approximation of CDF & marginal 12 | :param density_img: 13 | :return: lut_y, lut_x 14 | """ 15 | density_img = transform.resize(img, (num_grids, num_grids)) 16 | x_accumlation = numpy.sum(density_img, axis=1) 17 | sum_xy = numpy.sum(x_accumlation) 18 | y_cdf_of_accumulated_x = [[0., 0.]] 19 | accumulated = 0 20 | for ir, i in enumerate(range(num_grids-1, -1, -1)): 21 | accumulated += x_accumlation[i] 22 | if accumulated == 0: 23 | y_cdf_of_accumulated_x[0][0] = float(ir+1)/float(num_grids) 24 | elif EPS < accumulated < sum_xy - EPS: 25 | y_cdf_of_accumulated_x.append([float(ir+1)/float(num_grids), accumulated/sum_xy]) 26 | else: 27 | break 28 | y_cdf_of_accumulated_x.append([float(ir+1)/float(num_grids), 1.]) 29 | y_cdf_of_accumulated_x = numpy.array(y_cdf_of_accumulated_x) 30 | 31 | x_cdfs = [] 32 | for j in range(num_grids): 33 | x_freq = density_img[num_grids-j-1] 34 | sum_x = numpy.sum(x_freq) 35 | x_cdf = [[0., 0.]] 36 | accumulated = 0 37 | for i in range(num_grids): 38 | accumulated += x_freq[i] 39 | if accumulated == 0: 40 | x_cdf[0][0] = float(i+1) / float(num_grids) 41 | elif EPS < accumulated < sum_xy - EPS: 42 | x_cdf.append([float(i+1)/float(num_grids), accumulated/sum_x]) 43 | else: 44 | break 45 | x_cdf.append([float(i+1)/float(num_grids), 1.]) 46 | if accumulated > EPS: 47 | x_cdf = numpy.array(x_cdf) 48 | x_cdfs.append(x_cdf) 49 | else: 50 | x_cdfs.append(None) 51 | 52 | y_lut = partial(numpy.interp, xp=y_cdf_of_accumulated_x[:, 1], fp=y_cdf_of_accumulated_x[:, 0]) 53 | x_luts = [partial(numpy.interp, xp=x_cdfs[i][:, 1], fp=x_cdfs[i][:, 0]) if x_cdfs[i] is not None else None for i in range(num_grids)] 54 | 55 | return y_lut, x_luts 56 | 57 | def sample_2d(lut, N): 58 | y_lut, x_luts = lut 59 | u_rv = numpy.random.random((N, 2)) 60 | samples = numpy.zeros(u_rv.shape) 61 | for i, (x, y) in enumerate(u_rv): 62 | ys = y_lut(y) 63 | x_bin = int(ys/RESOLUTION) 64 | xs = x_luts[x_bin](x) 65 | samples[i][0] = xs 66 | samples[i][1] = ys 67 | 68 | return samples 69 | 70 | if __name__ == '__main__': 71 | from skimage import io 72 | density_img = io.imread('inputs/random.jpg', True) 73 | lut_2d = generate_lut(density_img) 74 | samples = sample_2d(lut_2d, 10000) 75 | 76 | from matplotlib import pyplot 77 | fig, (ax0, ax1) = pyplot.subplots(ncols=2, figsize=(9, 4)) 78 | fig.canvas.set_window_title('Test 2D Sampling') 79 | ax0.imshow(density_img, cmap='gray') 80 | ax0.xaxis.set_major_locator(pyplot.NullLocator()) 81 | ax0.yaxis.set_major_locator(pyplot.NullLocator()) 82 | 83 | ax1.axis('equal') 84 | ax1.axis([0, 1, 0, 1]) 85 | ax1.plot(samples[:, 0], samples[:, 1], 'k,') 86 | pyplot.show() 87 | -------------------------------------------------------------------------------- /random_bonus/gan_n_cgan_2d_example/visualizer.py: -------------------------------------------------------------------------------- 1 | from itertools import cycle 2 | import numpy 3 | from matplotlib import pyplot 4 | from skimage import filters 5 | 6 | 7 | class GANDemoVisualizer: 8 | 9 | def __init__(self, title, l_kde=100, bw_kde=5): 10 | self.title = title 11 | self.l_kde = l_kde 12 | self.resolution = 1. / self.l_kde 13 | self.bw_kde_ = bw_kde 14 | self.fig, self.axes = pyplot.subplots(ncols=3, figsize=(13.5, 4)) 15 | self.fig.canvas.set_window_title(self.title) 16 | 17 | def draw(self, real_samples, gen_samples, msg=None, cmap='hot', pause_time=0.05, max_sample_size=500, show=True): 18 | if msg: 19 | self.fig.suptitle(msg) 20 | ax0, ax1, ax2 = self.axes 21 | 22 | self.draw_samples(ax0, 'real and generated samples', real_samples, gen_samples, max_sample_size) 23 | self.draw_density_estimation(ax1, 'density: real samples', real_samples, cmap) 24 | self.draw_density_estimation(ax2, 'density: generated samples', gen_samples, cmap) 25 | 26 | if show: 27 | pyplot.draw() 28 | pyplot.pause(pause_time) 29 | 30 | @staticmethod 31 | def draw_samples(axis, title, real_samples, generated_samples, max_sample_size): 32 | axis.clear() 33 | axis.set_xlabel(title) 34 | axis.plot(generated_samples[:max_sample_size, 0], generated_samples[:max_sample_size, 1], '.') 35 | axis.plot(real_samples[:max_sample_size, 0], real_samples[:max_sample_size, 1], 'kx') 36 | axis.axis('equal') 37 | axis.axis([0, 1, 0, 1]) 38 | 39 | def draw_density_estimation(self, axis, title, samples, cmap): 40 | axis.clear() 41 | axis.set_xlabel(title) 42 | density_estimation = numpy.zeros((self.l_kde, self.l_kde)) 43 | for x, y in samples: 44 | if 0 < x < 1 and 0 < y < 1: 45 | density_estimation[int((1-y) / self.resolution)][int(x / self.resolution)] += 1 46 | density_estimation = filters.gaussian(density_estimation, self.bw_kde_) 47 | axis.imshow(density_estimation, cmap=cmap) 48 | axis.xaxis.set_major_locator(pyplot.NullLocator()) 49 | axis.yaxis.set_major_locator(pyplot.NullLocator()) 50 | 51 | def savefig(self, filepath): 52 | self.fig.savefig(filepath) 53 | 54 | @staticmethod 55 | def show(): 56 | pyplot.show() 57 | 58 | 59 | class CGANDemoVisualizer(GANDemoVisualizer): 60 | 61 | def __init__(self, title, l_kde=100, bw_kde=5): 62 | GANDemoVisualizer.__init__(self, title, l_kde, bw_kde) 63 | 64 | def draw(self, real_samples, gen_samples, msg=None, cmap='hot', pause_time=0.05, max_sample_size=500, show=True): 65 | if msg: 66 | self.fig.suptitle(msg) 67 | ax0, ax1, ax2 = self.axes 68 | 69 | self.draw_samples(ax0, 'real and generated samples', real_samples, gen_samples, max_sample_size) 70 | self.draw_density_estimation(ax1, 'density: real samples', real_samples[:, -2:], cmap) 71 | self.draw_density_estimation(ax2, 'density: generated samples', gen_samples[:, -2:], cmap) 72 | 73 | if show: 74 | pyplot.draw() 75 | pyplot.pause(pause_time) 76 | 77 | def draw_samples(self, axis, title, real_samples, generated_samples, max_sample_size): 78 | axis.clear() 79 | axis.set_xlabel(title) 80 | g_samples = numpy.copy(generated_samples) 81 | r_samples = numpy.copy(real_samples) 82 | numpy.random.shuffle(g_samples) 83 | numpy.random.shuffle(r_samples) 84 | g_samples = g_samples[:max_sample_size, :] 85 | r_samples = r_samples[:max_sample_size, :] 86 | color_iter = cycle('bgrcmy') 87 | for i in range(g_samples.shape[1]-2): 88 | c = next(color_iter) 89 | samples = g_samples[g_samples[:, i] > 0, :][:, -2:] 90 | axis.plot(samples[:, 0], samples[:, 1], c+'.', markersize=5) 91 | samples = r_samples[r_samples[:, i] > 0, :][:, -2:] 92 | axis.plot(samples[:, 0], samples[:, 1], c+'x', markersize=5) 93 | axis.axis('equal') 94 | axis.axis([0, 1, 0, 1]) 95 | 96 | def savefig(self, filepath): 97 | self.fig.savefig(filepath) 98 | 99 | @staticmethod 100 | def show(): 101 | pyplot.show() 102 | -------------------------------------------------------------------------------- /random_bonus/generate_mosaic_for_porno_images/README.md: -------------------------------------------------------------------------------- 1 | ## 给XX图片生成马赛克 2 | Blog: 3 | [提高驾驶技术:用GAN去除(爱情)动作片中的马赛克和衣服](https://zhuanlan.zhihu.com/p/27199954) 4 | 5 | ## step 1 6 | 下载yahoo的open_nsfw(Not Safe For Work)模型 7 | > ./clone_open_nsfw.sh 8 | 9 | **注**:原版open_nsfw中执行global pooling的是最后一层kernel size为7的pooling层,为了实现输入大小可变,让马赛克更精细一些,deploy_global_pooling.prototxt中将 10 | > kernel_size: 7 11 | 12 | 改为了 13 | > global_pooling: true 14 | 15 | ## step 2 16 | 17 | > python gen_mosaic.py [input dir] [output dir] 18 | 19 | 代码基于第10章的激活响应可视化:[visualize_activation.py](https://github.com/frombeijingwithlove/dlcv_for_beginners/blob/master/chap10/visualize_activation.py) 修改而来。 20 | 21 | ## step 3 (optional) 22 | 用于pix2pix训练的图片如果是长宽比较大,并且XX区域大都位于画面中心,可以考虑中央裁剪并重新缩放到256x256: 23 | > python crop_n_resize [dir_0] [dir_1] ... [dir_n] 256 24 | -------------------------------------------------------------------------------- /random_bonus/generate_mosaic_for_porno_images/clone_open_nsfw.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | git clone https://github.com/yahoo/open_nsfw.git 3 | -------------------------------------------------------------------------------- /random_bonus/generate_mosaic_for_porno_images/crop_n_resize.py: -------------------------------------------------------------------------------- 1 | import os 2 | import cv2 3 | import sys 4 | 5 | folders = sys.argv[1:-1] 6 | length = int(sys.argv[-1]) 7 | 8 | cnt = 0 9 | for folder in folders: 10 | print('scanning {} ...'.format(folder)) 11 | folder = folder.rstrip('/') 12 | files = os.listdir(folder) 13 | 14 | for img_file in files: 15 | filepath = '{}/{}'.format(folder, img_file) 16 | try: 17 | img = cv2.imread(filepath) 18 | h, w, c = img.shape 19 | except: 20 | print('problematic file:', filepath) 21 | continue 22 | 23 | if img is None: 24 | print('problematic file:', filepath) 25 | continue 26 | elif h == length and w == length: 27 | continue 28 | else: 29 | if h > w: 30 | dl = int((h-w)/2) 31 | if dl > 0: 32 | img = img[dl:-dl, ...] 33 | else: 34 | dl = int((w-h)/2) 35 | if dl > 0: 36 | img = img[:, dl:-dl, ...] 37 | img = cv2.resize(img, (length, length)) 38 | cv2.imwrite(filepath, img) 39 | 40 | cnt += 1 41 | if cnt % 100 == 0: 42 | print('{} images processed!'.format(cnt)) 43 | 44 | print('Done!') 45 | 46 | -------------------------------------------------------------------------------- /random_bonus/generate_mosaic_for_porno_images/gen_mosaic.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import os 3 | import numpy as np 4 | import cv2 5 | sys.path.append('/path/to/caffe/python') 6 | import caffe 7 | 8 | WEIGHTS_FILE = 'open_nsfw/nsfw_model/resnet_50_1by2_nsfw.caffemodel' 9 | DEPLOY_FILE = 'deploy_global_pooling.prototxt' 10 | FEATURE_MAPS = 'eltwise_stage3_block2' 11 | FC_LAYER = 'fc_nsfw' 12 | 13 | SHORT_EDGE = 320 14 | MOSAIC_RANGE = [5, 15] 15 | 16 | #caffe.set_mode_cpu() 17 | net = caffe.Net(DEPLOY_FILE, WEIGHTS_FILE, caffe.TEST) 18 | input_dir = sys.argv[1] 19 | output_dir = sys.argv[2] 20 | os.system('mkdir -p {}'.format(output_dir)) 21 | 22 | porno = 1 23 | mask_th = 0.5 24 | 25 | filenames = os.listdir(input_dir) 26 | for i, filename in enumerate(filenames): 27 | filepath = os.sep.join([input_dir, filename]) 28 | 29 | image = cv2.imread(filepath)[:, :, :3] 30 | height, width = image.shape[:2] 31 | 32 | short_edge_image = min(image.shape[:2]) 33 | scale_ratio = float(SHORT_EDGE) / float(short_edge_image) 34 | if scale_ratio < 1: 35 | transformed_image = cv2.resize(image, (0, 0), fx=scale_ratio, fy=scale_ratio) 36 | else: 37 | transformed_image = np.copy(image) 38 | transformed_image = transformed_image.astype(np.float32) 39 | transformed_image -= np.array([104., 117., 123.]) 40 | transformed_image = np.transpose(transformed_image, (2, 0, 1)) 41 | 42 | net.blobs['data'].reshape(1, 3, transformed_image.shape[1], transformed_image.shape[2]) 43 | net.blobs['data'].data[...] = transformed_image 44 | 45 | mosaic_size = np.random.random_integers(MOSAIC_RANGE[0], MOSAIC_RANGE[1]+1, 1) 46 | scale_mosaic = 1 / float(mosaic_size) 47 | mosaic_image = cv2.resize(image, (0, 0), fx=scale_mosaic, fy=scale_mosaic) 48 | mosaic_image = cv2.resize(mosaic_image, (width, height), interpolation=cv2.INTER_NEAREST) 49 | 50 | net.forward() 51 | feature_maps = net.blobs[FEATURE_MAPS].data[0] 52 | fc_params = net.params[FC_LAYER] 53 | fc_w = fc_params[0].data[porno] 54 | 55 | activation_map = np.zeros_like(feature_maps[0]) 56 | for feature_map, w in zip(feature_maps, fc_w): 57 | activation_map += feature_map * w 58 | 59 | activation_map = cv2.resize(activation_map, (width, height), interpolation=cv2.INTER_CUBIC) 60 | activation_map -= activation_map.min() 61 | activation_map /= activation_map.max() 62 | mask = np.zeros(activation_map.shape) 63 | mask[activation_map > mask_th] = 1 64 | image_with_mosaic = np.copy(image) 65 | image_with_mosaic[mask > mask_th] = mosaic_image[mask > mask_th] 66 | 67 | output_filepath = os.sep.join([output_dir, filename]) 68 | cv2.imwrite(output_filepath, image_with_mosaic) 69 | 70 | if (i+1) % 100 == 0: 71 | print('{} images processed!'.format(i+1)) 72 | 73 | # uncomment the following for visualization 74 | #vis_img = np.hstack([image, image_with_mosaic]) 75 | #cv2.imshow('Mosaic Visualization', vis_img) 76 | #cv2.waitKey() 77 | 78 | print('Done!') 79 | 80 | -------------------------------------------------------------------------------- /random_bonus/great-circle-interp/README.md: -------------------------------------------------------------------------------- 1 | ## Interpolating Latent Space of GAN with Great Circle 2 | 3 | If you can read Chinese please refer to [行走在GAN的Latent Space](https://zhuanlan.zhihu.com/p/32135185) 4 | 5 | pretrained model to generate comic avatars can be downloaded at: https://github.com/frombeijingwithlove/dlcv_book_pretrained_caffe_models/blob/master/random_bonus/great-circle-interp/netG_epoch_49.pth -------------------------------------------------------------------------------- /random_bonus/great-circle-interp/distance-experiment.py: -------------------------------------------------------------------------------- 1 | import numpy 2 | from matplotlib import pyplot 3 | 4 | 5 | def dist_o2l(p1, p2): 6 | # distance from origin to the line defined by (p1, p2) 7 | p12 = p2 - p1 8 | u12 = p12 / numpy.linalg.norm(p12) 9 | l_pp = numpy.dot(-p1, u12) 10 | pp = l_pp*u12 + p1 11 | return numpy.linalg.norm(pp) 12 | 13 | dim = 100 14 | N = 100000 15 | 16 | rvs = [] 17 | dists2l = [] 18 | for i in range(N): 19 | u = numpy.random.randn(dim) 20 | v = numpy.random.randn(dim) 21 | rvs.extend([u, v]) 22 | dists2l.append(dist_o2l(u, v)) 23 | 24 | dists = [numpy.linalg.norm(x) for x in rvs] 25 | 26 | print('Distances to samples, mean: {}, std: {}'.format(numpy.mean(dists), numpy.std(dists))) 27 | print('Distances to lines, mean: {}, std: {}'.format(numpy.mean(dists2l), numpy.std(dists2l))) 28 | 29 | fig, (ax0, ax1) = pyplot.subplots(ncols=2, figsize=(11, 5)) 30 | ax0.hist(dists, 100, normed=1, color='g') 31 | ax1.hist(dists2l, 100, normed=1, color='b') 32 | pyplot.show() 33 | -------------------------------------------------------------------------------- /random_bonus/great-circle-interp/latent-walk-great-circle.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | import argparse 3 | import os 4 | import numpy 5 | from scipy.stats import chi 6 | import torch.utils.data 7 | from torch.autograd import Variable 8 | from networks import NetG 9 | from PIL import Image 10 | 11 | parser = argparse.ArgumentParser() 12 | parser.add_argument('--nz', type=int, default=100, help='size of the latent z vector') 13 | parser.add_argument('--niter', type=int, default=10, help='how many paths') 14 | parser.add_argument('--n_steps', type=int, default=23, help='steps to walk') 15 | parser.add_argument('--ngf', type=int, default=64) 16 | parser.add_argument('--ngpu', type=int, default=1, help='number of GPUs to use') 17 | parser.add_argument('--netG', default='netG_epoch_49.pth', help="trained params for G") 18 | 19 | opt = parser.parse_args() 20 | output_dir = 'gcircle-walk' 21 | os.system('mkdir -p {}'.format(output_dir)) 22 | print(opt) 23 | 24 | ngpu = int(opt.ngpu) 25 | nz = int(opt.nz) 26 | ngf = int(opt.ngf) 27 | nc = 3 28 | 29 | netG = NetG(ngf, nz, nc, ngpu) 30 | netG.load_state_dict(torch.load(opt.netG, map_location=lambda storage, loc: storage)) 31 | netG.eval() 32 | print(netG) 33 | 34 | for j in range(opt.niter): 35 | # step 1 36 | r = chi.rvs(df=100) 37 | 38 | # step 2 39 | u = numpy.random.normal(0, 1, nz) 40 | w = numpy.random.normal(0, 1, nz) 41 | u /= numpy.linalg.norm(u) 42 | w /= numpy.linalg.norm(w) 43 | 44 | v = w - numpy.dot(u, w) * u 45 | v /= numpy.linalg.norm(v) 46 | 47 | ndimgs = [] 48 | for i in range(opt.n_steps): 49 | t = float(i) / float(opt.n_steps) 50 | # step 3 51 | z = numpy.cos(t * 2 * numpy.pi) * u + numpy.sin(t * 2 * numpy.pi) * v 52 | z *= r 53 | 54 | noise_t = z.reshape((1, nz, 1, 1)) 55 | noise_t = torch.FloatTensor(noise_t) 56 | noisev = Variable(noise_t) 57 | fake = netG(noisev) 58 | timg = fake[0] 59 | timg = timg.data 60 | 61 | timg.add_(1).div_(2) 62 | ndimg = timg.mul(255).clamp(0, 255).byte().permute(1, 2, 0).numpy() 63 | ndimgs.append(ndimg) 64 | 65 | print('exporting {} ...'.format(j)) 66 | ndimg = numpy.hstack(ndimgs) 67 | 68 | im = Image.fromarray(ndimg) 69 | filename = os.sep.join([output_dir, 'gc-{:0>6d}.png'.format(j)]) 70 | im.save(filename) 71 | -------------------------------------------------------------------------------- /random_bonus/great-circle-interp/latent-walk-slerp-vs-lerp.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | import argparse 3 | import numpy 4 | import torch.utils.data 5 | from torch.autograd import Variable 6 | import networks 7 | from PIL import Image 8 | 9 | parser = argparse.ArgumentParser() 10 | parser.add_argument('--nz', type=int, default=100, help='size of the latent z vector') 11 | parser.add_argument('--n_samples', type=int, default=10, help='how many images') 12 | parser.add_argument('--n_steps', type=int, default=11, help='steps for interpolation') 13 | parser.add_argument('--ngf', type=int, default=64) 14 | parser.add_argument('--ngpu', type=int, default=1, help='number of GPUs to use') 15 | parser.add_argument('--netG', default='netG_epoch_49.pth', help="path to netG") 16 | 17 | opt = parser.parse_args() 18 | print(opt) 19 | 20 | ngpu = int(opt.ngpu) 21 | nz = int(opt.nz) 22 | ngf = int(opt.ngf) 23 | nc = 3 24 | 25 | netG = networks.NetG(ngf, nz, nc, ngpu) 26 | netG.eval() 27 | netG.load_state_dict(torch.load(opt.netG, map_location=lambda storage, loc: storage)) 28 | print(netG) 29 | 30 | n_steps = opt.n_steps 31 | for epoch in range(opt.n_samples): 32 | u = numpy.random.randn(nz) 33 | v = numpy.random.randn(nz) 34 | lu = numpy.linalg.norm(u) 35 | lv = numpy.linalg.norm(v) 36 | theta = numpy.arccos(numpy.dot(u, v)/lu/lv) 37 | 38 | ndimgs_slerp = [] 39 | ndimgs_lerp = [] 40 | for i in range(n_steps+1): 41 | t = float(i) / float(n_steps) 42 | 43 | # slerp 44 | z_slerp = numpy.sin((1 - t) * theta) / numpy.sin(theta) * u + numpy.sin(t * theta) / numpy.sin(theta) * v 45 | 46 | noise_t = z_slerp.reshape((1, nz, 1, 1)) 47 | noise_t = torch.FloatTensor(noise_t) 48 | noisev = Variable(noise_t) 49 | fake = netG(noisev) 50 | timg = fake[0] 51 | timg = timg.data 52 | 53 | timg.add_(1).div_(2) 54 | ndimg = timg.mul(255).clamp(0, 255).byte().permute(1, 2, 0).numpy() 55 | ndimgs_slerp.append(ndimg) 56 | 57 | # lerp 58 | z_lerp = (1 - t) * u + t * v 59 | 60 | noise_t = z_lerp.reshape((1, nz, 1, 1)) 61 | noise_t = torch.FloatTensor(noise_t) 62 | noisev = Variable(noise_t) 63 | fake = netG(noisev) 64 | timg = fake[0] 65 | timg = timg.data 66 | 67 | timg.add_(1).div_(2) 68 | ndimg = timg.mul(255).clamp(0, 255).byte().permute(1, 2, 0).numpy() 69 | ndimgs_lerp.append(ndimg) 70 | 71 | print('exporting {} ...'.format(epoch)) 72 | 73 | # export slerp result 74 | ndimg = numpy.hstack(ndimgs_slerp) 75 | im = Image.fromarray(ndimg) 76 | im.save('e{:0>3d}-slerp.png'.format(epoch)) 77 | 78 | # export lerp result 79 | ndimg = numpy.hstack(ndimgs_lerp) 80 | im = Image.fromarray(ndimg) 81 | im.save('e{:0>3d}-lerp.png'.format(epoch)) 82 | -------------------------------------------------------------------------------- /random_bonus/great-circle-interp/networks.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.parallel 4 | import torch.backends.cudnn as cudnn 5 | 6 | 7 | class NetG(nn.Module): 8 | def __init__(self, ngf, nz, nc, ngpu): 9 | super(NetG, self).__init__() 10 | self.ngpu = ngpu 11 | self.main = nn.Sequential( 12 | # input is Z, going into a convolution 13 | nn.ConvTranspose2d(nz, ngf * 8, 4, 1, 0, bias=False), 14 | nn.BatchNorm2d(ngf * 8), 15 | nn.ReLU(True), 16 | # state size. (ngf*8) x 4 x 4 17 | nn.ConvTranspose2d(ngf * 8, ngf * 4, 4, 2, 1, bias=False), 18 | nn.BatchNorm2d(ngf * 4), 19 | nn.ReLU(True), 20 | # state size. (ngf*4) x 8 x 8 21 | nn.ConvTranspose2d(ngf * 4, ngf * 2, 4, 2, 1, bias=False), 22 | nn.BatchNorm2d(ngf * 2), 23 | nn.ReLU(True), 24 | # state size. (ngf*2) x 16 x 16 25 | nn.ConvTranspose2d(ngf * 2, ngf, 4, 2, 1, bias=False), 26 | nn.BatchNorm2d(ngf), 27 | nn.ReLU(True), 28 | # state size. (ngf) x 32 x 32 29 | nn.ConvTranspose2d(ngf, nc, 4, 2, 1, bias=False), 30 | nn.Tanh() 31 | # state size. (nc) x 64 x 64 32 | ) 33 | 34 | def forward(self, input): 35 | if isinstance(input.data, torch.cuda.FloatTensor) and self.ngpu > 1: 36 | output = nn.parallel.data_parallel(self.main, input, range(self.ngpu)) 37 | else: 38 | output = self.main(input) 39 | return output 40 | 41 | 42 | class NetD(nn.Module): 43 | def __init__(self, ndf, nc, ngpu): 44 | super(NetD, self).__init__() 45 | self.ngpu = ngpu 46 | self.main = nn.Sequential( 47 | # input is (nc) x 64 x 64 48 | nn.Conv2d(nc, ndf, 4, 2, 1, bias=False), 49 | nn.LeakyReLU(0.2, inplace=True), 50 | # state size. (ndf) x 32 x 32 51 | nn.Conv2d(ndf, ndf * 2, 4, 2, 1, bias=False), 52 | nn.BatchNorm2d(ndf * 2), 53 | nn.LeakyReLU(0.2, inplace=True), 54 | # state size. (ndf*2) x 16 x 16 55 | nn.Conv2d(ndf * 2, ndf * 4, 4, 2, 1, bias=False), 56 | nn.BatchNorm2d(ndf * 4), 57 | nn.LeakyReLU(0.2, inplace=True), 58 | # state size. (ndf*4) x 8 x 8 59 | nn.Conv2d(ndf * 4, ndf * 8, 4, 2, 1, bias=False), 60 | nn.BatchNorm2d(ndf * 8), 61 | nn.LeakyReLU(0.2, inplace=True), 62 | # state size. (ndf*8) x 4 x 4 63 | nn.Conv2d(ndf * 8, 1, 4, 1, 0, bias=False), 64 | nn.Sigmoid() 65 | ) 66 | 67 | def forward(self, input): 68 | if isinstance(input.data, torch.cuda.FloatTensor) and self.ngpu > 1: 69 | output = nn.parallel.data_parallel(self.main, input, range(self.ngpu)) 70 | else: 71 | output = self.main(input) 72 | 73 | return output.view(-1, 1).squeeze(1) 74 | -------------------------------------------------------------------------------- /random_bonus/image-segmentation(updating)/README.md: -------------------------------------------------------------------------------- 1 | ## Image Segmentation with Simplified & Customizable U-Net & TriangleNet by PyTorch 2 | 3 | ### Step 1 4 | In root folder, create "train" & "val" folder containing two folders with images and segmentations (in lossless format, e.g. PNG). Default folder for images is "image", for segmentations is "segmentations". Or can be specified in config file (see example.cfg) 5 | 6 | ### Step 2 7 | > python main.py train [path/to/root_folder] --config [path/to/configfile] 8 | 9 | ## TriangleNet 10 | Compare image and label at different scale in training, refer to networks.py for more details -------------------------------------------------------------------------------- /random_bonus/image-segmentation(updating)/argparser.py: -------------------------------------------------------------------------------- 1 | import os 2 | import argparse 3 | 4 | 5 | def parse_param_file(filepath): 6 | with open(filepath, 'r') as f: 7 | kw_exprs = [x.strip() for x in f.readlines() if x.strip()] 8 | return eval('dict({})'.format(','.join(kw_exprs))) 9 | 10 | 11 | def parse_args(): 12 | parser = argparse.ArgumentParser( 13 | description='Simple Demo of Image Segmentation with U-Net', 14 | formatter_class=argparse.ArgumentDefaultsHelpFormatter) 15 | 16 | # general options 17 | parser.add_argument('mode', 18 | help='train/test') 19 | parser.add_argument('dataroot', 20 | help='Directory containing training images in "images" and "segmentations" or test images') 21 | parser.add_argument('config', 22 | help='Path to config file') 23 | parser.add_argument('--cpu', 24 | help='Set to CPU mode', action='store_true') 25 | parser.add_argument('--output-dir', 26 | help='Directory of output for both train/test', 27 | type=str, default='') 28 | 29 | # test options 30 | parser.add_argument('--model', 31 | help='Path to pre-trained model', 32 | type=str, default='') 33 | 34 | args = parser.parse_args() 35 | 36 | params = { 37 | # general params 38 | 'network': 'triangle', 39 | 'layers': [32, 64, 128, 256, 512], 40 | 'groups': 1, 41 | 'color_labels': [], 42 | 'image_width': None, 43 | 'image_height': None 44 | } 45 | 46 | kwargs = parse_param_file(args.config) 47 | 48 | # other params specified in config file 49 | if args.mode == 'train': 50 | 51 | # default: no augmentation, with batch-norm 52 | 53 | train_params = { 54 | # training params 55 | 'optimizer': 'SGD', 56 | 'lr_policy': {0: 1e-4}, 57 | 'momentum': 0.9, 58 | 'nesterov': True, 59 | 'batch_norm': True, 60 | 'batch_size': 4, 61 | 'val_batch_size': None, 62 | 'epochs': 24, 63 | 'print_interval': 50, 64 | 'validation_interval': 1000, 65 | 'checkpoint_interval': 10000, 66 | 'random_horizontal_flip': False, 67 | 'random_square_crop': False, 68 | 'random_crop': None, # example: (0.81, 0.1), use 0.81 as area ratio, & 0.1 as the hw ratio variation 69 | 'random_rotation': 0, 70 | 'img_dir': 'images', 71 | 'seg_dir': 'segmentations', 72 | 'regression': False, 73 | } 74 | 75 | params.update(train_params) 76 | if params['val_batch_size'] is None: 77 | params['val_batch_size'] = params['batch_size'] 78 | 79 | # update params from config 80 | for k, v in kwargs.items(): 81 | if k in params: 82 | params[k] = v 83 | 84 | # set params to args 85 | for k, v in params.items(): 86 | setattr(args, k, v) 87 | 88 | args.dataroot = args.dataroot.rstrip(os.sep) 89 | 90 | return args 91 | -------------------------------------------------------------------------------- /random_bonus/image-segmentation(updating)/example.cfg: -------------------------------------------------------------------------------- 1 | unet_layers=[32, 64, 128, 256, 512] 2 | lr_policy={0: 1, 3: 5e-1} 3 | optimizer='Adadelta' 4 | batch_size=4 5 | epochs=100 6 | color_labels=[(i, i, i) for i in range(18)] 7 | image_width=256 8 | image_height=384 9 | random_horizontal_flip=True 10 | random_square_crop=False 11 | random_crop=(0.85, 0.1) 12 | random_rotation=5 13 | validation_interval=5000 14 | checkpoint_interval=10000 15 | print_interval=100 16 | seg_dir='profiles' 17 | -------------------------------------------------------------------------------- /random_bonus/image-segmentation(updating)/loss_visualizer.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import numpy 4 | from matplotlib import pyplot 5 | 6 | LOG_FILENAME = 'log.txt' 7 | TRAIN_LOSS_KEYWORD = '| Training loss: ' 8 | VAL_LOSS_KEYWORD = '| Validation loss: ' 9 | ITER_INDEX = 4 10 | LOSS_INDEX = -1 11 | MIOU_INDEX = -5 12 | MPA_INDEX = -9 13 | 14 | 15 | def parse_log(filepath): 16 | with open(filepath, 'r') as f: 17 | train_curve = [] 18 | val_curve = [] 19 | line = f.readline() 20 | while line: 21 | if TRAIN_LOSS_KEYWORD in line or VAL_LOSS_KEYWORD in line: 22 | tokens = line.split() 23 | measure = [int(tokens[ITER_INDEX]), float(tokens[LOSS_INDEX])] 24 | if TRAIN_LOSS_KEYWORD in line: 25 | train_curve.append(measure) 26 | else: 27 | measure.extend([float(tokens[MPA_INDEX]), float(tokens[MIOU_INDEX])]) 28 | val_curve.append(measure) 29 | 30 | line = f.readline() 31 | return train_curve, val_curve 32 | 33 | root_dir = sys.argv[1].rstrip(os.sep) 34 | keyword = sys.argv[2] if len(sys.argv) > 2 else None 35 | 36 | groups = [x for x in os.listdir(root_dir) if os.path.isdir(x) and (True if keyword is None else (keyword in x))] 37 | 38 | for group in groups: 39 | log_path = os.sep.join([root_dir, group, LOG_FILENAME]) 40 | train_loss, val_loss = parse_log(log_path) 41 | train_loss = numpy.array(train_loss) 42 | val_loss = numpy.array(val_loss) 43 | pyplot.figure('Train/Test Loss Curves') 44 | pyplot.plot(train_loss[:, 0], train_loss[:, 1], label=group) 45 | pyplot.plot(val_loss[:, 0], val_loss[:, 1], '--', label=group) 46 | pyplot.figure('mPA/mIOU Curves') 47 | pyplot.plot(val_loss[:, 0], val_loss[:, 2], label='{}-mPA'.format(group)) 48 | pyplot.plot(val_loss[:, 0], val_loss[:, 3], '--', label='{}-mIOU'.format(group)) 49 | 50 | pyplot.figure('Train/Test Loss Curves') 51 | pyplot.legend(loc='upper right') 52 | pyplot.figure('mPA/mIOU Curves') 53 | pyplot.legend(loc='lower right') 54 | pyplot.show() 55 | -------------------------------------------------------------------------------- /random_bonus/image-segmentation(updating)/networks.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | from torchvision.models import resnet 5 | from torchvision.models.resnet import conv3x3 6 | 7 | 8 | class UNetConvBlock(nn.Module): 9 | def __init__(self, input_nch, output_nch, kernel_size=3, activation=F.leaky_relu, use_bn=True, same_conv=True): 10 | super(UNetConvBlock, self).__init__() 11 | padding = kernel_size // 2 if same_conv else 0 # only support odd kernel 12 | self.conv0 = nn.Conv2d(input_nch, output_nch, kernel_size, padding=padding) 13 | self.conv1 = nn.Conv2d(output_nch, output_nch, kernel_size, padding=padding) 14 | self.act = activation 15 | self.batch_norm = nn.BatchNorm2d(output_nch) if use_bn else None 16 | 17 | def forward(self, x): 18 | x = self.conv0(x) 19 | if self.batch_norm: 20 | x = self.batch_norm(x) 21 | x = self.act(x) 22 | x = self.conv1(x) 23 | if self.batch_norm: 24 | x = self.batch_norm(x) 25 | return self.act(x) 26 | 27 | 28 | class UNet(nn.Module): 29 | def __init__(self, conv_channels, input_nch=3, output_nch=2, use_bn=True): 30 | super(UNet, self).__init__() 31 | self.n_stages = len(conv_channels) 32 | # define convolution blocks 33 | down_convs = [] 34 | up_convs = [] 35 | 36 | self.max_pooling = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) 37 | 38 | in_nch = input_nch 39 | for i, out_nch in enumerate(conv_channels): 40 | down_convs.append(UNetConvBlock(in_nch, out_nch, use_bn=use_bn)) 41 | up_conv_in_ch = 2 * out_nch if i < self.n_stages - 1 else out_nch # first up conv with equal channels 42 | up_conv_out_ch = out_nch if i == 0 else in_nch # last up conv with channels equal to labels 43 | up_convs.insert(0, UNetConvBlock(up_conv_in_ch, up_conv_out_ch, use_bn=use_bn)) 44 | in_nch = out_nch 45 | 46 | self.down_convs = nn.ModuleList(down_convs) 47 | self.up_convs = nn.ModuleList(up_convs) 48 | 49 | # define output convolution 50 | self.out_conv = nn.Conv2d(conv_channels[0], output_nch, 1) 51 | 52 | def forward(self, x): 53 | # conv & downsampling 54 | down_sampled_fmaps = [] 55 | for i in range(self.n_stages-1): 56 | x = self.down_convs[i](x) 57 | x = self.max_pooling(x) 58 | down_sampled_fmaps.insert(0, x) 59 | 60 | # center convs 61 | x = self.down_convs[self.n_stages-1](x) 62 | x = self.up_convs[0](x) 63 | 64 | # conv & upsampling 65 | for i, down_sampled_fmap in enumerate(down_sampled_fmaps): 66 | x = torch.cat([x, down_sampled_fmap], 1) 67 | x = self.up_convs[i+1](x) 68 | x = F.upsample(x, scale_factor=2, mode='bilinear') 69 | 70 | return self.out_conv(x) 71 | #x = self.out_conv(x) 72 | #return x if self.out_conv.out_channels == 1 else F.relu(x) 73 | 74 | 75 | class BasicResBlock(nn.Module): 76 | 77 | def __init__(self, input_nch, output_nch, groups=1): 78 | super(BasicResBlock, self).__init__() 79 | self.transform_conv = nn.Conv2d(input_nch, output_nch, 1) 80 | self.bn1 = nn.BatchNorm2d(output_nch) 81 | self.conv1 = nn.Conv2d(output_nch, output_nch, 3, padding=1, groups=groups, bias=False) 82 | self.bn2 = nn.BatchNorm2d(output_nch) 83 | self.conv2 = nn.Conv2d(output_nch, output_nch, 3, padding=1, groups=groups, bias=False) 84 | self.act = nn.LeakyReLU(inplace=True) 85 | 86 | def forward(self, x): 87 | x = self.transform_conv(x) 88 | residual = x 89 | 90 | out = self.bn1(x) 91 | out = self.act(out) 92 | out = self.conv1(out) 93 | 94 | out = self.bn2(out) 95 | out = self.act(out) 96 | out = self.conv2(out) 97 | 98 | out += residual 99 | 100 | return out 101 | 102 | 103 | class TriangleNet(nn.Module): 104 | def __init__(self, conv_channels, input_nch, output_nch, groups=1): 105 | super(TriangleNet, self).__init__() 106 | self.input_nch = input_nch 107 | self.output_nch = output_nch 108 | self.pyramid_height = len(conv_channels) 109 | 110 | blocks = [list() for _ in range(self.pyramid_height)] 111 | for i in range(self.pyramid_height): 112 | for j in range(i, self.pyramid_height): 113 | if i == 0 and j == 0: 114 | blocks[i].append(BasicResBlock(input_nch, conv_channels[j], groups=groups)) 115 | else: 116 | blocks[i].append(BasicResBlock(conv_channels[j-1], conv_channels[j], groups=groups)) 117 | 118 | for i in range(self.pyramid_height): 119 | blocks[i] = nn.ModuleList(blocks[i]) 120 | self.blocks = nn.ModuleList(blocks) 121 | 122 | self.down_sample = nn.MaxPool2d(3, 2, 1) 123 | self.up_samples = nn.ModuleList([nn.Upsample(scale_factor=2**i, mode='bilinear') for i in range(1, self.pyramid_height)]) 124 | 125 | self.channel_out_convs = nn.ModuleList([nn.Conv2d(conv_channels[-1], output_nch, 1) for _ in range(self.pyramid_height)]) 126 | self.out_conv = nn.Conv2d(self.pyramid_height * conv_channels[-1], output_nch, 1) 127 | 128 | def forward(self, x): 129 | # forward & expand 130 | x = [self.blocks[0][0](x)] 131 | for i in range(1, self.pyramid_height): 132 | x.append(self.down_sample(x[-1])) 133 | for j in range(i+1): 134 | x[j] = self.blocks[j][i-j](x[j]) 135 | 136 | # upsampling & conv 137 | if self.training: 138 | ms_out = [self.channel_out_convs[i](x[i]) for i in range(self.pyramid_height)] 139 | x = [x[0]] + [self.up_samples[i-1](x[i]) for i in range(1, self.pyramid_height)] 140 | 141 | # final 1x1 conv 142 | out = self.out_conv(torch.cat(x, 1)) 143 | return [out] + ms_out if self.training else out 144 | 145 | 146 | class PSPTriangleNet(nn.Module): 147 | def __init__(self, conv_channels, input_nch, output_nch, groups): 148 | super(PSPTriangleNet, self).__init__() 149 | self.input_nch = input_nch 150 | self.output_nch = output_nch 151 | self.pyramid_height = len(conv_channels) 152 | 153 | blocks = [] 154 | for i in range(self.pyramid_height-1): 155 | if i == 0: 156 | blocks.append(BasicResBlock(input_nch, conv_channels[i], groups=groups)) 157 | else: 158 | blocks.append(BasicResBlock(conv_channels[i-1], conv_channels[i], groups=groups)) 159 | 160 | ms_blocks = [] 161 | for i in range(self.pyramid_height): 162 | ms_blocks.append(BasicResBlock(conv_channels[-2], conv_channels[-1]//self.pyramid_height)) 163 | self.blocks = nn.ModuleList(blocks) 164 | self.ms_blocks = nn.ModuleList(ms_blocks) 165 | 166 | self.down_samples = nn.ModuleList([nn.MaxPool2d(2**i+1, 2**i, 2**(i-1)) for i in range(1, self.pyramid_height)]) 167 | self.up_samples = nn.ModuleList([nn.Upsample(scale_factor=2**i, mode='bilinear') for i in range(1, self.pyramid_height)]) 168 | 169 | self.channel_out_convs = nn.ModuleList([nn.Conv2d(conv_channels[-1]//self.pyramid_height, output_nch, 1) for _ in range(self.pyramid_height)]) 170 | self.out_conv = nn.Conv2d(conv_channels[-1], output_nch, 1) 171 | 172 | def forward(self, x): 173 | # forward & expand 174 | for i in range(self.pyramid_height-1): 175 | x = self.blocks[i](x) 176 | x = [self.ms_blocks[0](x)] + [self.down_samples[i](self.ms_blocks[i](x)) for i in range(self.pyramid_height-1)] 177 | 178 | # upsampling & conv 179 | if self.training: 180 | ms_out = [self.channel_out_convs[i](x[i]) for i in range(self.pyramid_height)] 181 | x = [x[0]] + [self.up_samples[i-1](x[i]) for i in range(1, self.pyramid_height)] 182 | 183 | # final 1x1 conv 184 | out = self.out_conv(torch.cat(x, 1)) 185 | return [out] + ms_out if self.training else out 186 | 187 | -------------------------------------------------------------------------------- /random_bonus/image-segmentation(updating)/utils.py: -------------------------------------------------------------------------------- 1 | from datetime import datetime 2 | import random 3 | import numpy 4 | import torch 5 | import torch.nn as nn 6 | import torch.nn.functional as F 7 | import torchvision 8 | from torchvision.datasets.folder import * 9 | from torch.optim import SGD, Adadelta, Adam, Adagrad, RMSprop, ASGD 10 | import cv2 11 | 12 | OPTIMIZERS = { 13 | 'sgd': SGD, 14 | 'adadelta': Adadelta, 15 | 'adam': Adam, 16 | 'adagrad': Adagrad, 17 | 'rmsprop': RMSprop, 18 | 'asgd': ASGD 19 | } 20 | 21 | 22 | class SegmentationImageFolder(ImageFolder): 23 | """A simplified segmentation data loader where the images are arranged in this way: :: 24 | 25 | root/images/001.png 26 | root/images/002.png 27 | root/images/003.png 28 | 29 | root/segmentations/001.png 30 | root/segmentations/002.png 31 | root/segmentations/003.png 32 | 33 | images in the two folder should be corresponding, sorting by name 34 | 35 | Args: 36 | please refer to 37 | https://github.com/frombeijingwithlove/dlcv_for_beginners/blob/master/chap6/data_augmentation/image_augmentation.py 38 | """ 39 | 40 | def __init__(self, root, 41 | image_folder='images', segmentation_folder='segmentations', 42 | labels=[(0, 0, 0), (255, 255, 255)], 43 | image_size=None, 44 | random_horizontal_flip=False, 45 | random_rotation=0, 46 | random_crop=None, 47 | random_square_crop=False, 48 | loader=default_loader, 49 | label_regr=False, 50 | multi_scale=0): 51 | super(SegmentationImageFolder, self).__init__(root, loader=loader) 52 | pair_len = len(self.imgs) // 2 53 | assert image_folder in self.classes and segmentation_folder in self.classes 54 | if image_folder < segmentation_folder: 55 | self.imgs = [(self.imgs[i][0], self.imgs[i+pair_len][0]) for i in range(pair_len)] 56 | else: 57 | self.imgs = [(self.imgs[i+pair_len][0], self.imgs[i][0]) for i in range(pair_len)] 58 | self.img_folder = image_folder 59 | self.seg_folder = segmentation_folder 60 | self.labels = [numpy.array(x, dtype=numpy.uint8) for x in labels] 61 | self.image_size = image_size 62 | self.flip_lr = random_horizontal_flip 63 | self.random_rotation = random_rotation 64 | self.random_crop = random_crop 65 | self.random_square_crop = random_square_crop 66 | self.label_regr=label_regr 67 | self.multi_scale=multi_scale 68 | 69 | def __getitem__(self, index): 70 | """ 71 | Args: 72 | index (int): Index 73 | 74 | Returns: 75 | tuple: (image, target) where target is class_index of the target class. 76 | """ 77 | imgpath, segpath = self.imgs[index] 78 | img = self.loader(imgpath) 79 | seg = self.loader(segpath) 80 | 81 | # manually transform to incorporate horizontal flip & one-hot coding for segmentation labels 82 | if self.random_rotation: 83 | w, h = img.size 84 | angle = self.random_rotation % 360 85 | img = img.rotate(angle) 86 | seg = seg.rotate(angle) 87 | 88 | angle_crop = angle % 180 89 | if angle_crop > 90: 90 | angle_crop = 180 - angle_crop 91 | theta = angle_crop * numpy.pi / 180.0 92 | hw_ratio = float(h) / float(w) 93 | tan_theta = numpy.tan(theta) 94 | numerator = numpy.cos(theta) + numpy.sin(theta) * tan_theta 95 | r = hw_ratio if h > w else 1 / hw_ratio 96 | denominator = r * tan_theta + 1 97 | crop_mult = numerator / denominator 98 | w_crop = int(round(crop_mult * w)) 99 | h_crop = int(round(crop_mult * h)) 100 | x0 = int((w - w_crop) / 2) 101 | y0 = int((h - h_crop) / 2) 102 | 103 | img = img.crop((x0, y0, x0+w_crop, y0+h_crop)) 104 | seg = seg.crop((x0, y0, x0+w_crop, y0+h_crop)) 105 | 106 | if self.random_crop: 107 | area_ratio, hw_vari = self.random_crop 108 | w, h = img.size 109 | hw_delta = numpy.random.uniform(-hw_vari, hw_vari) 110 | hw_mult = 1 + hw_delta 111 | w_crop = int(round(w * numpy.sqrt(area_ratio * hw_mult))) 112 | if w_crop > w - 2: 113 | w_crop = w - 2 114 | h_crop = int(round(h * numpy.sqrt(area_ratio / hw_mult))) 115 | if h_crop > h - 2: 116 | h_crop = h - 2 117 | x0 = numpy.random.randint(0, w - w_crop - 1) 118 | y0 = numpy.random.randint(0, h - h_crop - 1) 119 | img = img.crop((x0, y0, x0+w_crop, y0+h_crop)) 120 | seg = seg.crop((x0, y0, x0+w_crop, y0+h_crop)) 121 | 122 | if self.random_square_crop: 123 | w, h = img.size 124 | if w > h: 125 | x0 = random.randint(0, w-h-1) 126 | img = img.crop((x0, 0, x0+h, h)) 127 | seg = seg.crop((x0, 0, x0+h, h)) 128 | elif w < h: 129 | y0 = random.randint(0, h-w-1) 130 | img = img.crop((0, y0, w, y0+w)) 131 | seg = seg.crop((0, y0, w, y0+w)) 132 | 133 | if self.image_size: 134 | img = img.resize(self.image_size) 135 | seg = seg.resize(self.image_size, Image.NEAREST) 136 | 137 | # random horizontal flip 138 | if random.random() > 0.5: 139 | img = img.transpose(Image.FLIP_LEFT_RIGHT) 140 | seg = seg.transpose(Image.FLIP_LEFT_RIGHT) 141 | 142 | # one-hot coding for segmentation labels 143 | seg_arr = numpy.array(seg) 144 | seg = numpy.zeros(seg_arr.shape[:2], dtype=numpy.int64) 145 | for i, label_color in enumerate(self.labels): 146 | label_indices = numpy.where(seg_arr == label_color)[:2] 147 | seg[label_indices[0], label_indices[1]] = i 148 | 149 | if self.multi_scale: 150 | h, w = seg.shape 151 | seg = [seg] + [cv2.resize(seg, (w//(2**i), h//(2**i)), interpolation=cv2.INTER_NEAREST).astype(numpy.int64) for i in range(1, self.multi_scale)] 152 | 153 | # to tensor 154 | transform = torchvision.transforms.Compose([ 155 | torchvision.transforms.ToTensor(), 156 | torchvision.transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)) 157 | ]) 158 | img = transform(img) 159 | if self.multi_scale: 160 | seg = [torch.Tensor(x) if self.label_regr else torch.LongTensor(x) for x in seg] 161 | else: 162 | seg = torch.Tensor(seg) if self.label_regr else torch.LongTensor(seg) 163 | 164 | return img, seg 165 | 166 | def __len__(self): 167 | return len(self.imgs) 168 | 169 | 170 | class CrossEntropyLoss2D(nn.Module): 171 | def __init__(self, size_average=True): 172 | super(CrossEntropyLoss2D, self).__init__() 173 | self.nll_loss_2d = nn.NLLLoss2d(size_average=size_average) 174 | 175 | def forward(self, outputs, targets): 176 | return self.nll_loss_2d(F.log_softmax(outputs), targets) 177 | 178 | 179 | class MSCrossEntropyLoss2D(nn.Module): 180 | def __init__(self, weights, size_average=True): 181 | super(MSCrossEntropyLoss2D, self).__init__() 182 | self.nll_loss_2d = nn.NLLLoss2d(size_average=size_average) 183 | self.weights = weights 184 | 185 | def forward(self, outputs, targets): 186 | loss = self.weights[0] * self.nll_loss_2d(F.log_softmax(outputs[0]), targets[0]) 187 | for i in range(len(self.weights)-1): 188 | loss += self.weights[i+1] * self.nll_loss_2d(F.log_softmax(outputs[i+1]), targets[i]) 189 | return loss 190 | 191 | 192 | def get_datetime_string(): 193 | datetime_now = datetime.now() 194 | return '{}-{}-{}-{}-{}-{}'.format( 195 | datetime_now.year, 196 | datetime_now.month, 197 | datetime_now.day, 198 | datetime_now.hour, 199 | datetime_now.minute, 200 | datetime_now.second 201 | ) 202 | 203 | 204 | # borrowed from 205 | # https://github.com/pytorch/examples/tree/master/imagenet 206 | class AverageMeter(object): 207 | """Computes and stores the average and current value""" 208 | def __init__(self): 209 | self.val = 0 210 | self.avg = 0 211 | self.sum = 0 212 | self.count = 0 213 | 214 | def update(self, val, n=1): 215 | self.val = val 216 | self.sum += val * n 217 | self.count += n 218 | self.avg = self.sum / self.count 219 | 220 | 221 | def get_optimizer(name, model_params, **kwargs): 222 | name = name.lower() 223 | if name == 'sgd': 224 | optimizer = OPTIMIZERS[name]( 225 | model_params, 226 | lr=kwargs['lr'], 227 | momentum=kwargs['momentum'], 228 | nesterov=kwargs['nesterov'] 229 | ) 230 | elif name in ['adadelta', 'adam', 'adagrad', 'asgd']: 231 | optimizer = OPTIMIZERS[name](model_params, lr=kwargs['lr']) 232 | elif name == 'rmsprop': 233 | optimizer = OPTIMIZERS[name]( 234 | model_params, 235 | lr=kwargs['lr'], 236 | momentum=kwargs['momentum'], 237 | ) 238 | else: 239 | raise Exception('Not supported optimizer!') 240 | 241 | return optimizer 242 | -------------------------------------------------------------------------------- /random_bonus/multiple_models_fusion_caffe/README.md: -------------------------------------------------------------------------------- 1 | ## Caffe中进行模型融合 2 | Blog: 3 | [在Caffe中实现模型融合](http://www.cnblogs.com/frombeijingwithlove/p/6683476.html) 4 | 5 | ## 从头训练两个不同模型 6 | ### step 1 7 | > ./download_mnist.sh 8 | 9 | ### step 2 10 | > python convert_mnist.py 11 | 12 | ### step 3 13 | > python gen_img_list.py 14 | 15 | ### step 4 16 | 17 | train with lenet_odd_solver.prototxt & lenet_even_solver.prototxt 18 | 19 | ## 基于预训练模型直接融合 20 | 预训练模型下载地址: 21 | https://github.com/frombeijingwithlove/dlcv_book_pretrained_caffe_models/blob/master/mnist_lenet_even_iter_30000.caffemodel 22 | https://github.com/frombeijingwithlove/dlcv_book_pretrained_caffe_models/blob/master/mnist_lenet_odd_iter_30000.caffemodel 23 | 24 | ## 生成融合后的prototxt 25 | 26 | > python rename_n_freeze_layers.py input_model output_model prefix 27 | 28 | 拷贝从data层之后开始到最后要进行融合的特征层,比如ip1位置的prototxt,放到一个prototxt中,然后在开始加上data层,融合层可以用concat进行拼接,然后再接fc或是其他操作。 29 | 30 | ## 生成融合后的权重 31 | > python fuse_model.py 32 | 33 | ## 基于融合的模型继续进行finetune 34 | 35 | 直接训练即可 -------------------------------------------------------------------------------- /random_bonus/multiple_models_fusion_caffe/convert_mnist.py: -------------------------------------------------------------------------------- 1 | import os 2 | import pickle, gzip 3 | from matplotlib import pyplot 4 | 5 | # Load the dataset 6 | print('Loading data from mnist.pkl.gz ...') 7 | with gzip.open('mnist.pkl.gz', 'rb') as f: 8 | train_set, valid_set, test_set = pickle.load(f) 9 | 10 | imgs_dir = 'mnist' 11 | os.system('mkdir -p {}'.format(imgs_dir)) 12 | datasets = {'train': train_set, 'val': valid_set, 'test': test_set} 13 | for dataname, dataset in datasets.items(): 14 | print('Converting {} dataset ...'.format(dataname)) 15 | data_dir = os.sep.join([imgs_dir, dataname]) 16 | os.system('mkdir -p {}'.format(data_dir)) 17 | for i, (img, label) in enumerate(zip(*dataset)): 18 | filename = '{:0>6d}_{}.jpg'.format(i, label) 19 | filepath = os.sep.join([data_dir, filename]) 20 | img = img.reshape((28, 28)) 21 | pyplot.imsave(filepath, img, cmap='gray') 22 | if (i+1) % 10000 == 0: 23 | print('{} images converted!'.format(i+1)) 24 | 25 | -------------------------------------------------------------------------------- /random_bonus/multiple_models_fusion_caffe/download_mnist.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | # wget http://www.iro.umontreal.ca/~lisa/deep/data/mnist/mnist.pkl.gz 3 | wget http://deeplearning.net/data/mnist/mnist.pkl.gz 4 | -------------------------------------------------------------------------------- /random_bonus/multiple_models_fusion_caffe/fuse_model.py: -------------------------------------------------------------------------------- 1 | import sys 2 | sys.path.append('/path/to/caffe/python') 3 | import caffe 4 | 5 | fusion_net = caffe.Net('lenet_fusion_train_val.prototxt', caffe.TEST) 6 | 7 | model_list = [ 8 | ('even', 'lenet_even_train_val.prototxt', 'mnist_lenet_even_iter_30000.caffemodel'), 9 | ('odd', 'lenet_odd_train_val.prototxt', 'mnist_lenet_odd_iter_30000.caffemodel') 10 | ] 11 | 12 | for prefix, model_def, model_weight in model_list: 13 | net = caffe.Net(model_def, model_weight, caffe.TEST) 14 | 15 | for layer_name, param in net.params.iteritems(): 16 | n_params = len(param) 17 | try: 18 | for i in range(n_params): 19 | fusion_net.params['{}/{}'.format(prefix, layer_name)][i].data[...] = param[i].data[...] 20 | except Exception as e: 21 | print(e) 22 | 23 | fusion_net.save('init_fusion.caffemodel') 24 | -------------------------------------------------------------------------------- /random_bonus/multiple_models_fusion_caffe/gen_img_list.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | 4 | mnist_path = 'mnist' 5 | data_sets = ['train', 'val'] 6 | 7 | for data_set in data_sets: 8 | odd_list = '{}_odd.txt'.format(data_set) 9 | even_list = '{}_even.txt'.format(data_set) 10 | all_list = '{}_all.txt'.format(data_set) 11 | root = os.sep.join([mnist_path, data_set]) 12 | filenames = os.listdir(root) 13 | with open(odd_list, 'w') as f_odd, open(even_list, 'w') as f_even, open(all_list, 'w') as f_all: 14 | for filename in filenames: 15 | filepath = os.sep.join([root, filename]) 16 | label = int(filename[:filename.rfind('.')].split('_')[1]) 17 | line = '{} {}\n'.format(filepath, label) 18 | f_all.write(line) 19 | 20 | line = '{} {}\n'.format(filepath, int(label/2)) 21 | if label % 2: 22 | f_odd.write(line) 23 | else: 24 | f_even.write(line) 25 | -------------------------------------------------------------------------------- /random_bonus/multiple_models_fusion_caffe/lenet_even_solver.prototxt: -------------------------------------------------------------------------------- 1 | net: "lenet_even_train_val.prototxt" 2 | test_iter: 247 3 | test_initialization: false 4 | test_interval: 1000 5 | base_lr: 0.01 6 | momentum: 0.9 7 | weight_decay: 0.0005 8 | lr_policy: "step" 9 | gamma: 0.707 10 | stepsize: 1000 11 | display: 200 12 | max_iter: 30000 13 | snapshot: 30000 14 | snapshot_prefix: "mnist_lenet_even" 15 | solver_mode: GPU 16 | -------------------------------------------------------------------------------- /random_bonus/multiple_models_fusion_caffe/lenet_even_train_val.prototxt: -------------------------------------------------------------------------------- 1 | name: "LeNet" 2 | layer { 3 | name: "mnist" 4 | type: "ImageData" 5 | top: "data" 6 | top: "label" 7 | include { 8 | phase: TRAIN 9 | } 10 | transform_param { 11 | mean_value: 128 12 | scale: 0.00390625 13 | } 14 | image_data_param { 15 | source: "train_even.txt" 16 | is_color: false 17 | batch_size: 25 18 | } 19 | } 20 | layer { 21 | name: "mnist" 22 | type: "ImageData" 23 | top: "data" 24 | top: "label" 25 | include { 26 | phase: TEST 27 | } 28 | transform_param { 29 | mean_value: 128 30 | scale: 0.00390625 31 | } 32 | image_data_param { 33 | source: "val_even.txt" 34 | is_color: false 35 | batch_size: 20 36 | } 37 | } 38 | layer { 39 | name: "conv1" 40 | type: "Convolution" 41 | bottom: "data" 42 | top: "conv1" 43 | param { 44 | lr_mult: 1 45 | } 46 | param { 47 | lr_mult: 2 48 | } 49 | convolution_param { 50 | num_output: 20 51 | kernel_size: 5 52 | stride: 1 53 | weight_filler { 54 | type: "xavier" 55 | } 56 | bias_filler { 57 | type: "constant" 58 | } 59 | } 60 | } 61 | layer { 62 | name: "pool1" 63 | type: "Pooling" 64 | bottom: "conv1" 65 | top: "pool1" 66 | pooling_param { 67 | pool: MAX 68 | kernel_size: 2 69 | stride: 2 70 | } 71 | } 72 | layer { 73 | name: "conv2" 74 | type: "Convolution" 75 | bottom: "pool1" 76 | top: "conv2" 77 | param { 78 | lr_mult: 1 79 | } 80 | param { 81 | lr_mult: 2 82 | } 83 | convolution_param { 84 | num_output: 50 85 | kernel_size: 5 86 | stride: 1 87 | weight_filler { 88 | type: "xavier" 89 | } 90 | bias_filler { 91 | type: "constant" 92 | } 93 | } 94 | } 95 | layer { 96 | name: "pool2" 97 | type: "Pooling" 98 | bottom: "conv2" 99 | top: "pool2" 100 | pooling_param { 101 | pool: MAX 102 | kernel_size: 2 103 | stride: 2 104 | } 105 | } 106 | layer { 107 | name: "ip1" 108 | type: "InnerProduct" 109 | bottom: "pool2" 110 | top: "ip1" 111 | param { 112 | lr_mult: 1 113 | } 114 | param { 115 | lr_mult: 2 116 | } 117 | inner_product_param { 118 | num_output: 500 119 | weight_filler { 120 | type: "xavier" 121 | } 122 | bias_filler { 123 | type: "constant" 124 | } 125 | } 126 | } 127 | layer { 128 | name: "relu1" 129 | type: "ReLU" 130 | bottom: "ip1" 131 | top: "ip1" 132 | } 133 | layer { 134 | name: "ip2" 135 | type: "InnerProduct" 136 | bottom: "ip1" 137 | top: "ip2" 138 | param { 139 | lr_mult: 1 140 | } 141 | param { 142 | lr_mult: 2 143 | } 144 | inner_product_param { 145 | num_output: 5 146 | weight_filler { 147 | type: "xavier" 148 | } 149 | bias_filler { 150 | type: "constant" 151 | } 152 | } 153 | } 154 | layer { 155 | name: "accuracy" 156 | type: "Accuracy" 157 | bottom: "ip2" 158 | bottom: "label" 159 | top: "accuracy" 160 | include { 161 | phase: TEST 162 | } 163 | } 164 | layer { 165 | name: "loss" 166 | type: "SoftmaxWithLoss" 167 | bottom: "ip2" 168 | bottom: "label" 169 | top: "loss" 170 | } 171 | -------------------------------------------------------------------------------- /random_bonus/multiple_models_fusion_caffe/lenet_fusion_solver.prototxt: -------------------------------------------------------------------------------- 1 | net: "lenet_fusion_train_val.prototxt" 2 | test_iter: 500 3 | test_initialization: false 4 | test_interval: 1000 5 | base_lr: 0.01 6 | momentum: 0.9 7 | weight_decay: 0.0005 8 | lr_policy: "step" 9 | gamma: 0.707 10 | stepsize: 1000 11 | display: 200 12 | max_iter: 30000 13 | snapshot: 30000 14 | snapshot_prefix: "mnist_lenet_fused" 15 | solver_mode: GPU 16 | -------------------------------------------------------------------------------- /random_bonus/multiple_models_fusion_caffe/lenet_fusion_train_val.prototxt: -------------------------------------------------------------------------------- 1 | name: "LeNet" 2 | layer { 3 | name: "mnist" 4 | type: "ImageData" 5 | top: "data" 6 | top: "label" 7 | include { 8 | phase: TRAIN 9 | } 10 | transform_param { 11 | mean_value: 128 12 | scale: 0.00390625 13 | } 14 | image_data_param { 15 | source: "train_all.txt" 16 | is_color: false 17 | batch_size: 50 18 | } 19 | } 20 | layer { 21 | name: "mnist" 22 | type: "ImageData" 23 | top: "data" 24 | top: "label" 25 | include { 26 | phase: TEST 27 | } 28 | transform_param { 29 | mean_value: 128 30 | scale: 0.00390625 31 | } 32 | image_data_param { 33 | source: "val_all.txt" 34 | is_color: false 35 | batch_size: 20 36 | } 37 | } 38 | layer { 39 | name: "odd/conv1" 40 | type: "Convolution" 41 | bottom: "data" 42 | top: "odd/conv1" 43 | param { 44 | lr_mult: 0 45 | } 46 | param { 47 | lr_mult: 0 48 | } 49 | convolution_param { 50 | num_output: 20 51 | kernel_size: 5 52 | stride: 1 53 | weight_filler { 54 | type: "xavier" 55 | } 56 | bias_filler { 57 | type: "constant" 58 | } 59 | } 60 | } 61 | layer { 62 | name: "odd/pool1" 63 | type: "Pooling" 64 | bottom: "odd/conv1" 65 | top: "odd/pool1" 66 | pooling_param { 67 | pool: MAX 68 | kernel_size: 2 69 | stride: 2 70 | } 71 | } 72 | layer { 73 | name: "odd/conv2" 74 | type: "Convolution" 75 | bottom: "odd/pool1" 76 | top: "odd/conv2" 77 | param { 78 | lr_mult: 0 79 | } 80 | param { 81 | lr_mult: 0 82 | } 83 | convolution_param { 84 | num_output: 50 85 | kernel_size: 5 86 | stride: 1 87 | weight_filler { 88 | type: "xavier" 89 | } 90 | bias_filler { 91 | type: "constant" 92 | } 93 | } 94 | } 95 | layer { 96 | name: "odd/pool2" 97 | type: "Pooling" 98 | bottom: "odd/conv2" 99 | top: "odd/pool2" 100 | pooling_param { 101 | pool: MAX 102 | kernel_size: 2 103 | stride: 2 104 | } 105 | } 106 | layer { 107 | name: "odd/ip1" 108 | type: "InnerProduct" 109 | bottom: "odd/pool2" 110 | top: "odd/ip1" 111 | param { 112 | lr_mult: 0 113 | } 114 | param { 115 | lr_mult: 0 116 | } 117 | inner_product_param { 118 | num_output: 500 119 | weight_filler { 120 | type: "xavier" 121 | } 122 | bias_filler { 123 | type: "constant" 124 | } 125 | } 126 | } 127 | layer { 128 | name: "odd/relu1" 129 | type: "ReLU" 130 | bottom: "odd/ip1" 131 | top: "odd/ip1" 132 | } 133 | layer { 134 | name: "even/conv1" 135 | type: "Convolution" 136 | bottom: "data" 137 | top: "even/conv1" 138 | param { 139 | lr_mult: 0 140 | } 141 | param { 142 | lr_mult: 0 143 | } 144 | convolution_param { 145 | num_output: 20 146 | kernel_size: 5 147 | stride: 1 148 | weight_filler { 149 | type: "xavier" 150 | } 151 | bias_filler { 152 | type: "constant" 153 | } 154 | } 155 | } 156 | layer { 157 | name: "even/pool1" 158 | type: "Pooling" 159 | bottom: "even/conv1" 160 | top: "even/pool1" 161 | pooling_param { 162 | pool: MAX 163 | kernel_size: 2 164 | stride: 2 165 | } 166 | } 167 | layer { 168 | name: "even/conv2" 169 | type: "Convolution" 170 | bottom: "even/pool1" 171 | top: "even/conv2" 172 | param { 173 | lr_mult: 0 174 | } 175 | param { 176 | lr_mult: 0 177 | } 178 | convolution_param { 179 | num_output: 50 180 | kernel_size: 5 181 | stride: 1 182 | weight_filler { 183 | type: "xavier" 184 | } 185 | bias_filler { 186 | type: "constant" 187 | } 188 | } 189 | } 190 | layer { 191 | name: "even/pool2" 192 | type: "Pooling" 193 | bottom: "even/conv2" 194 | top: "even/pool2" 195 | pooling_param { 196 | pool: MAX 197 | kernel_size: 2 198 | stride: 2 199 | } 200 | } 201 | layer { 202 | name: "even/ip1" 203 | type: "InnerProduct" 204 | bottom: "even/pool2" 205 | top: "even/ip1" 206 | param { 207 | lr_mult: 0 208 | } 209 | param { 210 | lr_mult: 0 211 | } 212 | inner_product_param { 213 | num_output: 500 214 | weight_filler { 215 | type: "xavier" 216 | } 217 | bias_filler { 218 | type: "constant" 219 | } 220 | } 221 | } 222 | layer { 223 | name: "even/relu1" 224 | type: "ReLU" 225 | bottom: "even/ip1" 226 | top: "even/ip1" 227 | } 228 | layer { 229 | name: "concat" 230 | bottom: "odd/ip1" 231 | bottom: "even/ip1" 232 | top: "ip1_fused" 233 | type: "Concat" 234 | concat_param { 235 | axis: 1 236 | } 237 | } 238 | layer { 239 | name: "ip2" 240 | type: "InnerProduct" 241 | bottom: "ip1_fused" 242 | top: "ip2" 243 | param { 244 | lr_mult: 1 245 | } 246 | param { 247 | lr_mult: 2 248 | } 249 | inner_product_param { 250 | num_output: 10 251 | weight_filler { 252 | type: "xavier" 253 | } 254 | bias_filler { 255 | type: "constant" 256 | } 257 | } 258 | } 259 | layer { 260 | name: "accuracy" 261 | type: "Accuracy" 262 | bottom: "ip2" 263 | bottom: "label" 264 | top: "accuracy" 265 | include { 266 | phase: TEST 267 | } 268 | } 269 | layer { 270 | name: "loss" 271 | type: "SoftmaxWithLoss" 272 | bottom: "ip2" 273 | bottom: "label" 274 | top: "loss" 275 | } 276 | -------------------------------------------------------------------------------- /random_bonus/multiple_models_fusion_caffe/lenet_odd_solver.prototxt: -------------------------------------------------------------------------------- 1 | net: "lenet_odd_train_val.prototxt" 2 | test_iter: 253 3 | test_initialization: false 4 | test_interval: 1000 5 | base_lr: 0.01 6 | momentum: 0.9 7 | weight_decay: 0.0005 8 | lr_policy: "step" 9 | gamma: 0.707 10 | stepsize: 1000 11 | display: 200 12 | max_iter: 30000 13 | snapshot: 30000 14 | snapshot_prefix: "mnist_lenet_odd" 15 | solver_mode: GPU 16 | -------------------------------------------------------------------------------- /random_bonus/multiple_models_fusion_caffe/lenet_odd_train_val.prototxt: -------------------------------------------------------------------------------- 1 | name: "LeNet" 2 | layer { 3 | name: "mnist" 4 | type: "ImageData" 5 | top: "data" 6 | top: "label" 7 | include { 8 | phase: TRAIN 9 | } 10 | transform_param { 11 | mean_value: 128 12 | scale: 0.00390625 13 | } 14 | image_data_param { 15 | source: "train_odd.txt" 16 | is_color: false 17 | batch_size: 25 18 | } 19 | } 20 | layer { 21 | name: "mnist" 22 | type: "ImageData" 23 | top: "data" 24 | top: "label" 25 | include { 26 | phase: TEST 27 | } 28 | transform_param { 29 | mean_value: 128 30 | scale: 0.00390625 31 | } 32 | image_data_param { 33 | source: "val_odd.txt" 34 | is_color: false 35 | batch_size: 20 36 | } 37 | } 38 | layer { 39 | name: "conv1" 40 | type: "Convolution" 41 | bottom: "data" 42 | top: "conv1" 43 | param { 44 | lr_mult: 1 45 | } 46 | param { 47 | lr_mult: 2 48 | } 49 | convolution_param { 50 | num_output: 20 51 | kernel_size: 5 52 | stride: 1 53 | weight_filler { 54 | type: "xavier" 55 | } 56 | bias_filler { 57 | type: "constant" 58 | } 59 | } 60 | } 61 | layer { 62 | name: "pool1" 63 | type: "Pooling" 64 | bottom: "conv1" 65 | top: "pool1" 66 | pooling_param { 67 | pool: MAX 68 | kernel_size: 2 69 | stride: 2 70 | } 71 | } 72 | layer { 73 | name: "conv2" 74 | type: "Convolution" 75 | bottom: "pool1" 76 | top: "conv2" 77 | param { 78 | lr_mult: 1 79 | } 80 | param { 81 | lr_mult: 2 82 | } 83 | convolution_param { 84 | num_output: 50 85 | kernel_size: 5 86 | stride: 1 87 | weight_filler { 88 | type: "xavier" 89 | } 90 | bias_filler { 91 | type: "constant" 92 | } 93 | } 94 | } 95 | layer { 96 | name: "pool2" 97 | type: "Pooling" 98 | bottom: "conv2" 99 | top: "pool2" 100 | pooling_param { 101 | pool: MAX 102 | kernel_size: 2 103 | stride: 2 104 | } 105 | } 106 | layer { 107 | name: "ip1" 108 | type: "InnerProduct" 109 | bottom: "pool2" 110 | top: "ip1" 111 | param { 112 | lr_mult: 1 113 | } 114 | param { 115 | lr_mult: 2 116 | } 117 | inner_product_param { 118 | num_output: 500 119 | weight_filler { 120 | type: "xavier" 121 | } 122 | bias_filler { 123 | type: "constant" 124 | } 125 | } 126 | } 127 | layer { 128 | name: "relu1" 129 | type: "ReLU" 130 | bottom: "ip1" 131 | top: "ip1" 132 | } 133 | layer { 134 | name: "ip2" 135 | type: "InnerProduct" 136 | bottom: "ip1" 137 | top: "ip2" 138 | param { 139 | lr_mult: 1 140 | } 141 | param { 142 | lr_mult: 2 143 | } 144 | inner_product_param { 145 | num_output: 5 146 | weight_filler { 147 | type: "xavier" 148 | } 149 | bias_filler { 150 | type: "constant" 151 | } 152 | } 153 | } 154 | layer { 155 | name: "accuracy" 156 | type: "Accuracy" 157 | bottom: "ip2" 158 | bottom: "label" 159 | top: "accuracy" 160 | include { 161 | phase: TEST 162 | } 163 | } 164 | layer { 165 | name: "loss" 166 | type: "SoftmaxWithLoss" 167 | bottom: "ip2" 168 | bottom: "label" 169 | top: "loss" 170 | } 171 | -------------------------------------------------------------------------------- /random_bonus/multiple_models_fusion_caffe/rename_n_freeze_layers.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import re 3 | 4 | layer_name_regex = re.compile('name:\s*"(.*?)"') 5 | lr_mult_regex = re.compile('lr_mult:\s*\d+\.*\d*') 6 | 7 | input_filepath = sys.argv[1] 8 | output_filepath = sys.argv[2] 9 | prefix = sys.argv[3] 10 | 11 | with open(input_filepath, 'r') as fr, open(output_filepath, 'w') as fw: 12 | prototxt = fr.read() 13 | layer_names = set(layer_name_regex.findall(prototxt)) 14 | for layer_name in layer_names: 15 | prototxt = prototxt.replace(layer_name, '{}/{}'.format(prefix, layer_name)) 16 | 17 | lr_mult_statements = set(lr_mult_regex.findall(prototxt)) 18 | for lr_mult_statement in lr_mult_statements: 19 | prototxt = prototxt.replace(lr_mult_statement, 'lr_mult: 0') 20 | 21 | fw.write(prototxt) 22 | -------------------------------------------------------------------------------- /reference.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yeyun111/dlcv_for_beginners/080c7d3bbb4a68e4fb79e33231ccc666ada16dcc/reference.pdf --------------------------------------------------------------------------------