├── README.md └── convert_to_csv.py /README.md: -------------------------------------------------------------------------------- 1 | # convert-dataset-images-to-csv-python 2 | This script can be used to convert the dataset images into CSV format. 3 | -------------------------------------------------------------------------------- /convert_to_csv.py: -------------------------------------------------------------------------------- 1 | # dependencies 2 | # OS modules 3 | import os 4 | # Pandas 5 | import pandas as pd 6 | # In-built time module 7 | import time 8 | # tqdm for progress bars 9 | from tqdm import tqdm 10 | # Pillow Image Library 11 | from PIL import Image 12 | # Numpy module 13 | import numpy as np 14 | 15 | # A list for column names of csv 16 | columnNames = list() 17 | # A column for label 18 | columnNames.append('label') 19 | # Other pixels column 20 | # replace 784 with your image size, here it is 28x28=784 21 | # iterate and build headers 22 | for i in range(784): 23 | pixel = str(i) 24 | columnNames.append(pixel) 25 | 26 | # Create a Pandas dataframe for storing data 27 | train_data = pd.DataFrame(columns = columnNames) 28 | 29 | # calculates the total number of images in the dataset initially 0 30 | num_images = 0 31 | 32 | # iterate through every folder of the dataset 33 | for i in range(0, 58): 34 | 35 | # print messeage 36 | print("Iterating: " + str(i) + " folder") 37 | 38 | # itreate through every image in the folder 39 | # tqdm shows progress bar 40 | for file in tqdm(os.listdir(str(i))): 41 | # open image using PIL Image module 42 | img = Image.open(os.path.join(str(i), file)) 43 | # resize to 28x28, replace with your size 44 | img = img.resize((28, 28), Image.NEAREST) 45 | # load image 46 | img.load() 47 | # create a numpy array for image pixels 48 | imgdata = np.asarray(img, dtype="int32") 49 | 50 | # temporary array to store pixel values 51 | data = [] 52 | data.append(str(i)) 53 | for y in range(28): 54 | for x in range(28): 55 | data.append(imgdata[x][y]) 56 | 57 | # add the data row to training data dataframe 58 | train_data.loc[num_images] = data 59 | 60 | # increment the number of images 61 | num_images += 1 62 | 63 | # write the dataframe to the CSV file 64 | train_data.to_csv("train_converted.csv", index=False) --------------------------------------------------------------------------------