├── test └── .gitkeep ├── train └── .gitkeep ├── .DS_Store ├── convert.py └── README.MD /test/.gitkeep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /train/.gitkeep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/littlecangbaby/pythonConvertCifar-10/HEAD/.DS_Store -------------------------------------------------------------------------------- /convert.py: -------------------------------------------------------------------------------- 1 | #encoding:utf-8 2 | #author:guohuifeng 3 | #email:309884616@qq.com 4 | from scipy.misc import imsave 5 | import numpy as np 6 | 7 | 8 | # 解压缩,返回解压后的字典 9 | def unpickle(file): 10 | import cPickle 11 | fo = open(file, 'rb') 12 | dict = cPickle.load(fo) 13 | fo.close() 14 | return dict 15 | 16 | # 生成训练集图片,如果需要png格式,只需要改图片后缀名即可。 17 | for j in range(1, 6): 18 | dataName = "data_batch_" + str(j) # 读取当前目录下的data_batch12345文件,dataName其实也是data_batch文件的路径,本文和脚本文件在同一目录下。 19 | Xtr = unpickle(dataName) 20 | print dataName + " is loading..." 21 | 22 | for i in range(0, 10000): 23 | img = np.reshape(Xtr['data'][i], (3, 32, 32)) # Xtr['data']为图片二进制数据 24 | img = img.transpose(1, 2, 0) # 读取image 25 | picName = 'train/' + str(Xtr['labels'][i]) + '_' + str(i + (j - 1)*10000) + '.jpg' # Xtr['labels']为图片的标签,值范围0-9,本文中,train文件夹需要存在,并与脚本文件在同一目录下。 26 | imsave(picName, img) 27 | print dataName + " loaded." 28 | 29 | print "test_batch is loading..." 30 | 31 | # 生成测试集图片 32 | testXtr = unpickle("test_batch") 33 | for i in range(0, 10000): 34 | img = np.reshape(testXtr['data'][i], (3, 32, 32)) 35 | img = img.transpose(1, 2, 0) 36 | picName = 'test/' + str(testXtr['labels'][i]) + '_' + str(i) + '.jpg' 37 | imsave(picName, img) 38 | print "test_batch loaded." 39 | 40 | 41 | 42 | 43 | -------------------------------------------------------------------------------- /README.MD: -------------------------------------------------------------------------------- 1 | #找遍了网上都没有都没有直接用python将cifar-10转换成图片格式的方法,因此特开一个教程,供大家一起交流学习。本人致力于打造菜鸟教程。 2 | ##0.已经转好的图片格式供大家下载。 3 | [百度盘](https://pan.baidu.com/s/1skN4jW5) 4 | 密码z6i3 5 | 6 | 从上到下依次为图片的0-9类,共10类,其中训练集5W张图片,每类5K张;验证集1W张图片,每类1K张。共计6W张图片。 7 | 8 | ![这里写图片描述](http://img.blog.csdn.net/20170316145353375?watermark/2/text/aHR0cDovL2Jsb2cuY3Nkbi5uZXQvZ3VvaHVpZmVuZ2J5/font/5a6L5L2T/fontsize/400/fill/I0JBQkFCMA==/dissolve/70/gravity/SouthEast) 9 | 10 | ##1.准备工具 11 | * cifar10数据集,需要自行下载,并解压到与转换脚本同一文件夹内。keras官方下载路径为[keras的cifar-10官方下载路径](http://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz) 12 | * python依赖库:numpy、等等。(如果运行过程中报错,请自行根据错误提示装上依赖的库) 13 | 14 | ##2.代码 15 | ``` 16 | #encoding:utf-8 17 | #author:guohuifeng 18 | #email:309884616@qq.com 19 | from scipy.misc import imsave 20 | import numpy as np 21 | 22 | # 解压缩,返回解压后的字典 23 | def unpickle(file): 24 | import cPickle 25 | fo = open(file, 'rb') 26 | dict = cPickle.load(fo) 27 | fo.close() 28 | return dict 29 | 30 | # 生成训练集图片,如果需要png格式,只需要改图片后缀名即可。 31 | for j in range(1, 6): 32 | dataName = "data_batch_" + str(j) # 读取当前目录下的data_batch12345文件,dataName其实也是data_batch文件的路径,本文和脚本文件在同一目录下。 33 | Xtr = unpickle(dataName) 34 | print dataName + " is loading..." 35 | 36 | for i in range(0, 10000): 37 | img = np.reshape(Xtr['data'][i], (3, 32, 32)) # Xtr['data']为图片二进制数据 38 | img = img.transpose(1, 2, 0) # 读取image 39 | picName = 'train/' + str(Xtr['labels'][i]) + '_' + str(i + (j - 1)*10000) + '.jpg' # Xtr['labels']为图片的标签,值范围0-9,本文中,train文件夹需要存在,并与脚本文件在同一目录下。 40 | imsave(picName, img) 41 | print dataName + " loaded." 42 | 43 | print "test_batch is loading..." 44 | 45 | # 生成测试集图片 46 | testXtr = unpickle("test_batch") 47 | for i in range(0, 10000): 48 | img = np.reshape(testXtr['data'][i], (3, 32, 32)) 49 | img = img.transpose(1, 2, 0) 50 | picName = 'test/' + str(testXtr['labels'][i]) + '_' + str(i) + '.jpg' 51 | imsave(picName, img) 52 | print "test_batch loaded." 53 | ``` 54 | 55 | ##3.检验 56 | 根据本文的图片命名方式,采用"标签_当前batch中图片序号"的方式,可以解决图片名重复带来的问题。下面根据前文中第0章的大类展示来做下测试: 57 | 58 | * 第0类,飞机,正确 59 | 60 | ![这里写图片描述](http://img.blog.csdn.net/20170316151053368?watermark/2/text/aHR0cDovL2Jsb2cuY3Nkbi5uZXQvZ3VvaHVpZmVuZ2J5/font/5a6L5L2T/fontsize/400/fill/I0JBQkFCMA==/dissolve/70/gravity/SouthEast) 61 | 62 | * 第6类,青蛙,正确 63 | 64 | ![这里写图片描述](http://img.blog.csdn.net/20170316151547854?watermark/2/text/aHR0cDovL2Jsb2cuY3Nkbi5uZXQvZ3VvaHVpZmVuZ2J5/font/5a6L5L2T/fontsize/400/fill/I0JBQkFCMA==/dissolve/70/gravity/SouthEast) 65 | 66 | ##4.对未来工作的展望 67 | cifar-100的图片转换类似,有兴趣的话请自行转换。 68 | 69 | --------------------------------------------------------------------------------