├── .gitignore
├── housePrice.keras2
├── sampleImages
    ├── loss.png
    ├── model.png
    └── price.png
├── HouseImages
    └── 95
    │   └── 95_frontal.jpg
├── README.md
└── cnn_regression.py


/.gitignore:
--------------------------------------------------------------------------------
1 | .DS_Store
2 | 


--------------------------------------------------------------------------------
/housePrice.keras2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Walid-Ahmed/keras-regression-cnns_House_Prices/HEAD/housePrice.keras2


--------------------------------------------------------------------------------
/sampleImages/loss.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Walid-Ahmed/keras-regression-cnns_House_Prices/HEAD/sampleImages/loss.png


--------------------------------------------------------------------------------
/sampleImages/model.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Walid-Ahmed/keras-regression-cnns_House_Prices/HEAD/sampleImages/model.png


--------------------------------------------------------------------------------
/sampleImages/price.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Walid-Ahmed/keras-regression-cnns_House_Prices/HEAD/sampleImages/price.png


--------------------------------------------------------------------------------
/HouseImages/95/95_frontal.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Walid-Ahmed/keras-regression-cnns_House_Prices/HEAD/HouseImages/95/95_frontal.jpg


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # keras-regression-cnns_House_Prices
 2 | 
 3 | 
 4 | Special thanks to [Adrian Rosebrock](https://www.pyimagesearch.com/author/adrian/)   for his  [great post](https://www.pyimagesearch.com/2019/01/28/keras-regression-and-cnns//) that was used as baseline for this tutourial.
 5 | 
 6 | This  simple code  creates and train a neural network to predict house prices based on 4 images for each house , you can try it by running the command  'python cnn_regression.py'. 
 7 | 
 8 | 
 9 | The model used is as the following:
10 | <img src="https://github.com/Walid-Ahmed/keras-regression-cnns_House_Prices/blob/master/sampleImages/model.png"  align="middle">
11 | 
12 | 
13 | The dataset is from   https://github.com/emanhamed/Houses-dataset, this house dataset includes four numerical and categorical attributes as input and the one continous variable as output:
14 | 1. Number of bedrooms (continous)
15 | 2. Number of bathrooms(continous)
16 | 3. Area (continous)
17 | 4. Zip code (Cateogiral)
18 | 5. Price (continous)
19 | 
20 | Moreover the dataset includes 4 images for each house and this what will be used for training, The 4 images of each house (Bathroom/Kitchen/Frontal/bedroom) will be tiled together into one image 64*64 px which will be the input to our CNN and the output is the price. .
21 | 
22 | 
23 | When training finishes the   a curves will show the traning and validation loss. Another curve will also be shown for actual vs predicted prices. Both curves are saved to local drive. Also the trained  model is saved as housePrice.keras2 
24 | 
25 | <img src="https://github.com/Walid-Ahmed/keras-regression-cnns_House_Prices/blob/master/sampleImages/loss.png">
26 | 
27 | <img src="https://github.com/Walid-Ahmed/keras-regression-cnns_House_Prices/blob/master/sampleImages/price.png">
28 | 


--------------------------------------------------------------------------------
/cnn_regression.py:
--------------------------------------------------------------------------------
  1 | # USAGE
  2 | # python cnn_regression.py 
  3 | 
  4 | # import the necessary packages
  5 | from keras.optimizers import Adam
  6 | from sklearn.model_selection import train_test_split
  7 | #from pyimagesearch import datasets
  8 | import numpy as np
  9 | import argparse
 10 | import locale
 11 | import os
 12 | from util import paths
 13 | import shutil 
 14 | import cv2
 15 | import numpy as np
 16 | from pathlib import Path
 17 | import itertools
 18 | 
 19 | from keras.models import Sequential
 20 | from keras.layers.normalization import BatchNormalization
 21 | from keras.layers.convolutional import Conv2D
 22 | from keras.layers.convolutional import MaxPooling2D
 23 | from keras.layers.core import Activation
 24 | from keras.layers.core import Dropout
 25 | from keras.layers.core import Dense
 26 | from keras.layers import Flatten
 27 | from keras.layers import Input
 28 | from keras.models import Model
 29 | 
 30 | #from pyimagesearch import datasets
 31 | import pandas as pd
 32 | from sklearn.preprocessing import LabelBinarizer
 33 | from sklearn.preprocessing import MinMaxScaler
 34 | from sklearn import preprocessing
 35 | import matplotlib.pyplot as plt
 36 | from keras.utils import plot_model
 37 | 
 38 | 
 39 | 
 40 | 
 41 | EPOCHS_NUM=200
 42 | # construct the path to the input .txt file that contains information
 43 | # on each house in the dataset and then load the dataset
 44 | 
 45 | print("[INFO] loading house attributes...")
 46 | inputPath =  "HousesInfo.txt"
 47 | cols = ["bedrooms", "bathrooms", "area", "zipcode", "price"]
 48 | df = pd.read_csv(inputPath, sep=" ", header=None, names=cols)
 49 | print(df.head())
 50 | 
 51 | 
 52 | #remove zipcounts that have kess than 25 houses
 53 | #Pandas Index.value_counts() function returns object containing counts of unique values. The resulting object will be in descending order so that the first element is the most frequently-occurring element. Excludes NA values by default.
 54 | zipcodeSeries=df["zipcode"].value_counts()  #<class 'pandas.core.series.Series'>
 55 | zipcodes = zipcodeSeries.keys().tolist()   #zipcodes as list
 56 | counts = zipcodeSeries.tolist()    #count of zipcodes as list  
 57 | for (zipcode, count) in zip(zipcodes, counts):
 58 | 		# the zip code counts for our housing dataset is *extremely*
 59 | 		# unbalanced (some only having 1 or 2 houses per zip code)
 60 | 		# so let's sanitize our data by removing any houses with less
 61 | 		# than 25 houses per zip code
 62 | 		if count < 25:
 63 | 			booleanVal=(df["zipcode"] == zipcode)  # this will be true at all zipcodes that should be deleted
 64 | 			#print(type(booleanVal))   #<class 'pandas.core.series.Series'>
 65 | 			idxs = df[booleanVal].index  #this will return indices of these true values
 66 | 			df.drop(idxs, inplace=True)
 67 | print("[INFO]removed zipcodes which less than 25 houses")            
 68 | 
 69 | 
 70 | 
 71 | 
 72 | 
 73 | 
 74 | # load the house images and then 
 75 | 
 76 | if not os.path.exists('HouseImages'):
 77 |     os.makedirs('HouseImages')
 78 | 
 79 | 
 80 | #dividing images to foler accourding to house (4 imgaes/folder)
 81 | imagesPaths=paths.list_images("HousesDataset")
 82 | for imagePath in imagesPaths:
 83 | 	sourcePath=imagePath
 84 | 	imagePath=os.path.basename(imagePath)
 85 | 	imageIndex=(imagePath.split("_"))[0]
 86 | 	pathToSaveIamge=os.path.join("HouseImages",imageIndex)
 87 | 	if not os.path.exists(pathToSaveIamge):
 88 | 		os.makedirs(pathToSaveIamge)
 89 | 	pathToSaveIamge=os.path.join("HouseImages",imageIndex,imagePath)
 90 | 	shutil.copyfile(sourcePath, pathToSaveIamge)	
 91 | 
 92 | 
 93 | #read image and concotnat each 4 images into one image
 94 | trainingImages=[]
 95 | for recordIndex in df.index:
 96 | 	dirOfImages=os.path.join("HouseImages",str(recordIndex))
 97 | 	houseImages=[]
 98 | 	files=os.listdir(dirOfImages)
 99 | 	for file in sorted(files):
100 | 		if (file==".DS_Store"):
101 |    			print(".DS_Store ignored" )
102 |    			continue
103 | 		imgfilePath=os.path.join(dirOfImages,file)
104 | 		img=cv2.imread(imgfilePath)
105 | 		img = cv2.resize(img, (32, 32))
106 | 		houseImages.append(img)
107 | 	
108 | 	outputImage = np.zeros((64, 64, 3), dtype="uint8")
109 | 	outputImage[0:32, 0:32] = houseImages[0]
110 | 	outputImage[0:32, 32:64] = houseImages[1]
111 | 	outputImage[32:64, 32:64] = houseImages[2]
112 | 	outputImage[32:64, 0:32] = houseImages[3]
113 | 	trainingImages.append(outputImage)
114 | 	print("[INFO] Reading images from directory {}".format(dirOfImages))
115 | 
116 | 
117 | 
118 | 
119 | 
120 | #scale the pixel intensities to the range [0, 1]
121 | images = np.array(trainingImages, dtype="float") / 255.0
122 | 
123 | 
124 | 
125 | 
126 | # partition the data into training and testing splits using 75% of
127 | # the data for training and the remaining 25% for testing
128 | (trainY, testY, trainX, testX) = train_test_split(df, images, test_size=0.25, random_state=42)
129 | 
130 | # find the largest house price in the training set and use it to
131 | # scale our house prices to the range [0, 1] (will lead to better
132 | # training and convergence)
133 | maxPrice = trainY["price"].max()
134 | print("maxPrice={}".format(maxPrice))
135 | input("press any key")
136 | 
137 | trainY=trainY["price"].values
138 | trainY = trainY / maxPrice
139 | 
140 | testY=testY["price"].values
141 | testY = testY / maxPrice
142 | 
143 | 
144 | 
145 | 
146 | print("Shapee of training  data set {}".format(trainX.shape))
147 | print("Shapee of price  vector {}".format(testX.shape))
148 | 
149 | # create our Convolutional Neural Network and then compile the model
150 | # using mean absolute percentage error as our loss, implying that we
151 | # seek to minimize the absolute percentage difference between our
152 | # price *predictions* and the *actual prices
153 | 
154 | 
155 | inputShape = (64, 64, 3)
156 | chanDim = -1
157 | # define the model input
158 | inputs = Input(shape=inputShape)
159 | # CONV => RELU => BN => POOL
160 | x = Conv2D(16, (3, 3), padding="same")(inputs)
161 | x = Activation("relu")(x)
162 | x = BatchNormalization(axis=chanDim)(x)
163 | x = MaxPooling2D(pool_size=(2, 2))(x)
164 | # CONV => RELU => BN => POOL
165 | x = Conv2D(32, (3, 3), padding="same")(x)
166 | x = Activation("relu")(x)
167 | x = BatchNormalization(axis=chanDim)(x)
168 | x = MaxPooling2D(pool_size=(2, 2))(x)
169 | # CONV => RELU => BN => POOL
170 | x = Conv2D(64, (3, 3), padding="same")(x)
171 | x = Activation("relu")(x)
172 | x = BatchNormalization(axis=chanDim)(x)
173 | x = MaxPooling2D(pool_size=(2, 2))(x)
174 | 
175 | # flatten the volume, then FC => RELU => BN => DROPOUT
176 | x = Flatten()(x)
177 | x = Dense(16)(x)
178 | x = Activation("relu")(x)
179 | x = BatchNormalization(axis=chanDim)(x)
180 | x = Dropout(0.5)(x)
181 | 
182 | # apply another FC layer, this one to match the number of nodes
183 | # coming out of the MLP
184 | x = Dense(4)(x)
185 | x = Activation("relu")(x)
186 | x = Dense(1, activation="linear")(x)
187 | 
188 | # construct the CNN
189 | model = Model(inputs, x)
190 | 
191 | 
192 | 
193 | 
194 | #model = models.create_cnn(64, 64, 3, regress=True)
195 | model.summary()
196 | fileToSaveModelPlot='model.png'
197 | plot_model(model, to_file='model.png')
198 | print("[INFO] Model plot saved to {}".format(fileToSaveModelPlot) )
199 | 
200 | 
201 | 
202 | 
203 | 
204 | 
205 | 
206 | opt = Adam(lr=1e-3, decay=1e-3 / 200)
207 | model.compile(loss="mean_absolute_percentage_error", optimizer=opt)
208 | 
209 | # train the model
210 | print("[INFO] training model...")
211 | history=model.fit(trainX, trainY, validation_data=(testX, testY),epochs=EPOCHS_NUM, batch_size=8)
212 | 
213 | # make predictions on the testing data
214 | 
215 | 
216 | 
217 | 
218 | 
219 | model.save("housePrice.keras2")
220 | print("[INFO] model saved to housePrice.keras2")
221 | 
222 | # make predictions on the testing data
223 | print("[INFO] predicting house prices...")
224 | preds = model.predict(testX)
225 | 
226 | 
227 | 
228 | # compute the difference between the *predicted* house prices and the
229 | # *actual* house prices, then compute the percentage difference and
230 | # the absolute percentage difference
231 | diff = preds.flatten() - testY
232 | percentDiff = (diff / testY) * 100
233 | absPercentDiff = np.abs(percentDiff)
234 | 
235 | # compute the mean and standard deviation of the absolute percentage
236 | # difference
237 | mean = np.mean(absPercentDiff)
238 | std = np.std(absPercentDiff)
239 | 
240 | # finally, show some statistics on our model
241 | locale.setlocale(locale.LC_ALL, "en_US.UTF-8")
242 | print("[INFO] avg. house price: {}, std house price: {}".format(
243 | 	locale.currency(df["price"].mean(), grouping=True),
244 | 	locale.currency(df["price"].std(), grouping=True)))
245 | print("[INFO] mean: {:.2f}%, std: {:.2f}%".format(mean, std))
246 | 
247 | 
248 | #readjust house prices
249 | testY=testY*maxPrice
250 | preds=preds*maxPrice
251 | 
252 | 
253 | validationLoss=(history.history['val_loss'])
254 | trainingLoss=history.history['loss']
255 | 
256 | 
257 | 
258 | 
259 | #------------------------------------------------
260 | # Plot training and validation accuracy per epoch
261 | epochs   = range(len(validationLoss)) # Get number of epochs
262 |  #------------------------------------------------
263 | plt.plot  ( epochs,     trainingLoss ,label="Training Loss")
264 | plt.plot  ( epochs, validationLoss, label="Validation Loss" )
265 | plt.title ('Training and validation loss')
266 | plt.xlabel("Epoch #")
267 | plt.ylabel("Loss")
268 | fileToSaveAccuracyCurve="plot_acc.png"
269 | plt.savefig("plot_acc.png")
270 | print("[INFO] Loss curve saved to {}".format("plot_acc.png"))
271 | plt.legend(loc="upper right")
272 | plt.show()
273 | 
274 | 
275 | 
276 | 
277 | 
278 | #plot curves (Actual vs Predicted)
279 | plt.plot  ( testY ,label="Actual price")
280 | plt.plot  ( preds, label="Predicted price" )
281 | plt.title ('House prices')
282 | plt.xlabel("Point #")
283 | plt.ylabel("Price")
284 | plt.legend(loc="upper right")
285 | plt.savefig("HousePrices.png")
286 | plt.show()
287 | print("[INFO] predicted vs actual price saved to HousePrices.png")
288 | 
289 | 
290 | 
291 | 
292 | 
293 | 
294 | 
295 | 
296 | 


--------------------------------------------------------------------------------