├── .gitignore ├── housePrice.keras2 ├── sampleImages ├── loss.png ├── model.png └── price.png ├── HouseImages └── 95 │ └── 95_frontal.jpg ├── README.md └── cnn_regression.py /.gitignore: -------------------------------------------------------------------------------- 1 | .DS_Store 2 | -------------------------------------------------------------------------------- /housePrice.keras2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Walid-Ahmed/keras-regression-cnns_House_Prices/HEAD/housePrice.keras2 -------------------------------------------------------------------------------- /sampleImages/loss.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Walid-Ahmed/keras-regression-cnns_House_Prices/HEAD/sampleImages/loss.png -------------------------------------------------------------------------------- /sampleImages/model.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Walid-Ahmed/keras-regression-cnns_House_Prices/HEAD/sampleImages/model.png -------------------------------------------------------------------------------- /sampleImages/price.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Walid-Ahmed/keras-regression-cnns_House_Prices/HEAD/sampleImages/price.png -------------------------------------------------------------------------------- /HouseImages/95/95_frontal.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Walid-Ahmed/keras-regression-cnns_House_Prices/HEAD/HouseImages/95/95_frontal.jpg -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # keras-regression-cnns_House_Prices 2 | 3 | 4 | Special thanks to [Adrian Rosebrock](https://www.pyimagesearch.com/author/adrian/) for his [great post](https://www.pyimagesearch.com/2019/01/28/keras-regression-and-cnns//) that was used as baseline for this tutourial. 5 | 6 | This simple code creates and train a neural network to predict house prices based on 4 images for each house , you can try it by running the command 'python cnn_regression.py'. 7 | 8 | 9 | The model used is as the following: 10 | 11 | 12 | 13 | The dataset is from https://github.com/emanhamed/Houses-dataset, this house dataset includes four numerical and categorical attributes as input and the one continous variable as output: 14 | 1. Number of bedrooms (continous) 15 | 2. Number of bathrooms(continous) 16 | 3. Area (continous) 17 | 4. Zip code (Cateogiral) 18 | 5. Price (continous) 19 | 20 | Moreover the dataset includes 4 images for each house and this what will be used for training, The 4 images of each house (Bathroom/Kitchen/Frontal/bedroom) will be tiled together into one image 64*64 px which will be the input to our CNN and the output is the price. . 21 | 22 | 23 | When training finishes the a curves will show the traning and validation loss. Another curve will also be shown for actual vs predicted prices. Both curves are saved to local drive. Also the trained model is saved as housePrice.keras2 24 | 25 | 26 | 27 | 28 | -------------------------------------------------------------------------------- /cnn_regression.py: -------------------------------------------------------------------------------- 1 | # USAGE 2 | # python cnn_regression.py 3 | 4 | # import the necessary packages 5 | from keras.optimizers import Adam 6 | from sklearn.model_selection import train_test_split 7 | #from pyimagesearch import datasets 8 | import numpy as np 9 | import argparse 10 | import locale 11 | import os 12 | from util import paths 13 | import shutil 14 | import cv2 15 | import numpy as np 16 | from pathlib import Path 17 | import itertools 18 | 19 | from keras.models import Sequential 20 | from keras.layers.normalization import BatchNormalization 21 | from keras.layers.convolutional import Conv2D 22 | from keras.layers.convolutional import MaxPooling2D 23 | from keras.layers.core import Activation 24 | from keras.layers.core import Dropout 25 | from keras.layers.core import Dense 26 | from keras.layers import Flatten 27 | from keras.layers import Input 28 | from keras.models import Model 29 | 30 | #from pyimagesearch import datasets 31 | import pandas as pd 32 | from sklearn.preprocessing import LabelBinarizer 33 | from sklearn.preprocessing import MinMaxScaler 34 | from sklearn import preprocessing 35 | import matplotlib.pyplot as plt 36 | from keras.utils import plot_model 37 | 38 | 39 | 40 | 41 | EPOCHS_NUM=200 42 | # construct the path to the input .txt file that contains information 43 | # on each house in the dataset and then load the dataset 44 | 45 | print("[INFO] loading house attributes...") 46 | inputPath = "HousesInfo.txt" 47 | cols = ["bedrooms", "bathrooms", "area", "zipcode", "price"] 48 | df = pd.read_csv(inputPath, sep=" ", header=None, names=cols) 49 | print(df.head()) 50 | 51 | 52 | #remove zipcounts that have kess than 25 houses 53 | #Pandas Index.value_counts() function returns object containing counts of unique values. The resulting object will be in descending order so that the first element is the most frequently-occurring element. Excludes NA values by default. 54 | zipcodeSeries=df["zipcode"].value_counts() # 55 | zipcodes = zipcodeSeries.keys().tolist() #zipcodes as list 56 | counts = zipcodeSeries.tolist() #count of zipcodes as list 57 | for (zipcode, count) in zip(zipcodes, counts): 58 | # the zip code counts for our housing dataset is *extremely* 59 | # unbalanced (some only having 1 or 2 houses per zip code) 60 | # so let's sanitize our data by removing any houses with less 61 | # than 25 houses per zip code 62 | if count < 25: 63 | booleanVal=(df["zipcode"] == zipcode) # this will be true at all zipcodes that should be deleted 64 | #print(type(booleanVal)) # 65 | idxs = df[booleanVal].index #this will return indices of these true values 66 | df.drop(idxs, inplace=True) 67 | print("[INFO]removed zipcodes which less than 25 houses") 68 | 69 | 70 | 71 | 72 | 73 | 74 | # load the house images and then 75 | 76 | if not os.path.exists('HouseImages'): 77 | os.makedirs('HouseImages') 78 | 79 | 80 | #dividing images to foler accourding to house (4 imgaes/folder) 81 | imagesPaths=paths.list_images("HousesDataset") 82 | for imagePath in imagesPaths: 83 | sourcePath=imagePath 84 | imagePath=os.path.basename(imagePath) 85 | imageIndex=(imagePath.split("_"))[0] 86 | pathToSaveIamge=os.path.join("HouseImages",imageIndex) 87 | if not os.path.exists(pathToSaveIamge): 88 | os.makedirs(pathToSaveIamge) 89 | pathToSaveIamge=os.path.join("HouseImages",imageIndex,imagePath) 90 | shutil.copyfile(sourcePath, pathToSaveIamge) 91 | 92 | 93 | #read image and concotnat each 4 images into one image 94 | trainingImages=[] 95 | for recordIndex in df.index: 96 | dirOfImages=os.path.join("HouseImages",str(recordIndex)) 97 | houseImages=[] 98 | files=os.listdir(dirOfImages) 99 | for file in sorted(files): 100 | if (file==".DS_Store"): 101 | print(".DS_Store ignored" ) 102 | continue 103 | imgfilePath=os.path.join(dirOfImages,file) 104 | img=cv2.imread(imgfilePath) 105 | img = cv2.resize(img, (32, 32)) 106 | houseImages.append(img) 107 | 108 | outputImage = np.zeros((64, 64, 3), dtype="uint8") 109 | outputImage[0:32, 0:32] = houseImages[0] 110 | outputImage[0:32, 32:64] = houseImages[1] 111 | outputImage[32:64, 32:64] = houseImages[2] 112 | outputImage[32:64, 0:32] = houseImages[3] 113 | trainingImages.append(outputImage) 114 | print("[INFO] Reading images from directory {}".format(dirOfImages)) 115 | 116 | 117 | 118 | 119 | 120 | #scale the pixel intensities to the range [0, 1] 121 | images = np.array(trainingImages, dtype="float") / 255.0 122 | 123 | 124 | 125 | 126 | # partition the data into training and testing splits using 75% of 127 | # the data for training and the remaining 25% for testing 128 | (trainY, testY, trainX, testX) = train_test_split(df, images, test_size=0.25, random_state=42) 129 | 130 | # find the largest house price in the training set and use it to 131 | # scale our house prices to the range [0, 1] (will lead to better 132 | # training and convergence) 133 | maxPrice = trainY["price"].max() 134 | print("maxPrice={}".format(maxPrice)) 135 | input("press any key") 136 | 137 | trainY=trainY["price"].values 138 | trainY = trainY / maxPrice 139 | 140 | testY=testY["price"].values 141 | testY = testY / maxPrice 142 | 143 | 144 | 145 | 146 | print("Shapee of training data set {}".format(trainX.shape)) 147 | print("Shapee of price vector {}".format(testX.shape)) 148 | 149 | # create our Convolutional Neural Network and then compile the model 150 | # using mean absolute percentage error as our loss, implying that we 151 | # seek to minimize the absolute percentage difference between our 152 | # price *predictions* and the *actual prices 153 | 154 | 155 | inputShape = (64, 64, 3) 156 | chanDim = -1 157 | # define the model input 158 | inputs = Input(shape=inputShape) 159 | # CONV => RELU => BN => POOL 160 | x = Conv2D(16, (3, 3), padding="same")(inputs) 161 | x = Activation("relu")(x) 162 | x = BatchNormalization(axis=chanDim)(x) 163 | x = MaxPooling2D(pool_size=(2, 2))(x) 164 | # CONV => RELU => BN => POOL 165 | x = Conv2D(32, (3, 3), padding="same")(x) 166 | x = Activation("relu")(x) 167 | x = BatchNormalization(axis=chanDim)(x) 168 | x = MaxPooling2D(pool_size=(2, 2))(x) 169 | # CONV => RELU => BN => POOL 170 | x = Conv2D(64, (3, 3), padding="same")(x) 171 | x = Activation("relu")(x) 172 | x = BatchNormalization(axis=chanDim)(x) 173 | x = MaxPooling2D(pool_size=(2, 2))(x) 174 | 175 | # flatten the volume, then FC => RELU => BN => DROPOUT 176 | x = Flatten()(x) 177 | x = Dense(16)(x) 178 | x = Activation("relu")(x) 179 | x = BatchNormalization(axis=chanDim)(x) 180 | x = Dropout(0.5)(x) 181 | 182 | # apply another FC layer, this one to match the number of nodes 183 | # coming out of the MLP 184 | x = Dense(4)(x) 185 | x = Activation("relu")(x) 186 | x = Dense(1, activation="linear")(x) 187 | 188 | # construct the CNN 189 | model = Model(inputs, x) 190 | 191 | 192 | 193 | 194 | #model = models.create_cnn(64, 64, 3, regress=True) 195 | model.summary() 196 | fileToSaveModelPlot='model.png' 197 | plot_model(model, to_file='model.png') 198 | print("[INFO] Model plot saved to {}".format(fileToSaveModelPlot) ) 199 | 200 | 201 | 202 | 203 | 204 | 205 | 206 | opt = Adam(lr=1e-3, decay=1e-3 / 200) 207 | model.compile(loss="mean_absolute_percentage_error", optimizer=opt) 208 | 209 | # train the model 210 | print("[INFO] training model...") 211 | history=model.fit(trainX, trainY, validation_data=(testX, testY),epochs=EPOCHS_NUM, batch_size=8) 212 | 213 | # make predictions on the testing data 214 | 215 | 216 | 217 | 218 | 219 | model.save("housePrice.keras2") 220 | print("[INFO] model saved to housePrice.keras2") 221 | 222 | # make predictions on the testing data 223 | print("[INFO] predicting house prices...") 224 | preds = model.predict(testX) 225 | 226 | 227 | 228 | # compute the difference between the *predicted* house prices and the 229 | # *actual* house prices, then compute the percentage difference and 230 | # the absolute percentage difference 231 | diff = preds.flatten() - testY 232 | percentDiff = (diff / testY) * 100 233 | absPercentDiff = np.abs(percentDiff) 234 | 235 | # compute the mean and standard deviation of the absolute percentage 236 | # difference 237 | mean = np.mean(absPercentDiff) 238 | std = np.std(absPercentDiff) 239 | 240 | # finally, show some statistics on our model 241 | locale.setlocale(locale.LC_ALL, "en_US.UTF-8") 242 | print("[INFO] avg. house price: {}, std house price: {}".format( 243 | locale.currency(df["price"].mean(), grouping=True), 244 | locale.currency(df["price"].std(), grouping=True))) 245 | print("[INFO] mean: {:.2f}%, std: {:.2f}%".format(mean, std)) 246 | 247 | 248 | #readjust house prices 249 | testY=testY*maxPrice 250 | preds=preds*maxPrice 251 | 252 | 253 | validationLoss=(history.history['val_loss']) 254 | trainingLoss=history.history['loss'] 255 | 256 | 257 | 258 | 259 | #------------------------------------------------ 260 | # Plot training and validation accuracy per epoch 261 | epochs = range(len(validationLoss)) # Get number of epochs 262 | #------------------------------------------------ 263 | plt.plot ( epochs, trainingLoss ,label="Training Loss") 264 | plt.plot ( epochs, validationLoss, label="Validation Loss" ) 265 | plt.title ('Training and validation loss') 266 | plt.xlabel("Epoch #") 267 | plt.ylabel("Loss") 268 | fileToSaveAccuracyCurve="plot_acc.png" 269 | plt.savefig("plot_acc.png") 270 | print("[INFO] Loss curve saved to {}".format("plot_acc.png")) 271 | plt.legend(loc="upper right") 272 | plt.show() 273 | 274 | 275 | 276 | 277 | 278 | #plot curves (Actual vs Predicted) 279 | plt.plot ( testY ,label="Actual price") 280 | plt.plot ( preds, label="Predicted price" ) 281 | plt.title ('House prices') 282 | plt.xlabel("Point #") 283 | plt.ylabel("Price") 284 | plt.legend(loc="upper right") 285 | plt.savefig("HousePrices.png") 286 | plt.show() 287 | print("[INFO] predicted vs actual price saved to HousePrices.png") 288 | 289 | 290 | 291 | 292 | 293 | 294 | 295 | 296 | --------------------------------------------------------------------------------