├── .gitignore
├── housePrice.keras2
├── sampleImages
├── loss.png
├── model.png
└── price.png
├── HouseImages
└── 95
│ └── 95_frontal.jpg
├── README.md
└── cnn_regression.py
/.gitignore:
--------------------------------------------------------------------------------
1 | .DS_Store
2 |
--------------------------------------------------------------------------------
/housePrice.keras2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Walid-Ahmed/keras-regression-cnns_House_Prices/HEAD/housePrice.keras2
--------------------------------------------------------------------------------
/sampleImages/loss.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Walid-Ahmed/keras-regression-cnns_House_Prices/HEAD/sampleImages/loss.png
--------------------------------------------------------------------------------
/sampleImages/model.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Walid-Ahmed/keras-regression-cnns_House_Prices/HEAD/sampleImages/model.png
--------------------------------------------------------------------------------
/sampleImages/price.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Walid-Ahmed/keras-regression-cnns_House_Prices/HEAD/sampleImages/price.png
--------------------------------------------------------------------------------
/HouseImages/95/95_frontal.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Walid-Ahmed/keras-regression-cnns_House_Prices/HEAD/HouseImages/95/95_frontal.jpg
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # keras-regression-cnns_House_Prices
2 |
3 |
4 | Special thanks to [Adrian Rosebrock](https://www.pyimagesearch.com/author/adrian/) for his [great post](https://www.pyimagesearch.com/2019/01/28/keras-regression-and-cnns//) that was used as baseline for this tutourial.
5 |
6 | This simple code creates and train a neural network to predict house prices based on 4 images for each house , you can try it by running the command 'python cnn_regression.py'.
7 |
8 |
9 | The model used is as the following:
10 |
11 |
12 |
13 | The dataset is from https://github.com/emanhamed/Houses-dataset, this house dataset includes four numerical and categorical attributes as input and the one continous variable as output:
14 | 1. Number of bedrooms (continous)
15 | 2. Number of bathrooms(continous)
16 | 3. Area (continous)
17 | 4. Zip code (Cateogiral)
18 | 5. Price (continous)
19 |
20 | Moreover the dataset includes 4 images for each house and this what will be used for training, The 4 images of each house (Bathroom/Kitchen/Frontal/bedroom) will be tiled together into one image 64*64 px which will be the input to our CNN and the output is the price. .
21 |
22 |
23 | When training finishes the a curves will show the traning and validation loss. Another curve will also be shown for actual vs predicted prices. Both curves are saved to local drive. Also the trained model is saved as housePrice.keras2
24 |
25 |
26 |
27 |
28 |
--------------------------------------------------------------------------------
/cnn_regression.py:
--------------------------------------------------------------------------------
1 | # USAGE
2 | # python cnn_regression.py
3 |
4 | # import the necessary packages
5 | from keras.optimizers import Adam
6 | from sklearn.model_selection import train_test_split
7 | #from pyimagesearch import datasets
8 | import numpy as np
9 | import argparse
10 | import locale
11 | import os
12 | from util import paths
13 | import shutil
14 | import cv2
15 | import numpy as np
16 | from pathlib import Path
17 | import itertools
18 |
19 | from keras.models import Sequential
20 | from keras.layers.normalization import BatchNormalization
21 | from keras.layers.convolutional import Conv2D
22 | from keras.layers.convolutional import MaxPooling2D
23 | from keras.layers.core import Activation
24 | from keras.layers.core import Dropout
25 | from keras.layers.core import Dense
26 | from keras.layers import Flatten
27 | from keras.layers import Input
28 | from keras.models import Model
29 |
30 | #from pyimagesearch import datasets
31 | import pandas as pd
32 | from sklearn.preprocessing import LabelBinarizer
33 | from sklearn.preprocessing import MinMaxScaler
34 | from sklearn import preprocessing
35 | import matplotlib.pyplot as plt
36 | from keras.utils import plot_model
37 |
38 |
39 |
40 |
41 | EPOCHS_NUM=200
42 | # construct the path to the input .txt file that contains information
43 | # on each house in the dataset and then load the dataset
44 |
45 | print("[INFO] loading house attributes...")
46 | inputPath = "HousesInfo.txt"
47 | cols = ["bedrooms", "bathrooms", "area", "zipcode", "price"]
48 | df = pd.read_csv(inputPath, sep=" ", header=None, names=cols)
49 | print(df.head())
50 |
51 |
52 | #remove zipcounts that have kess than 25 houses
53 | #Pandas Index.value_counts() function returns object containing counts of unique values. The resulting object will be in descending order so that the first element is the most frequently-occurring element. Excludes NA values by default.
54 | zipcodeSeries=df["zipcode"].value_counts() #
55 | zipcodes = zipcodeSeries.keys().tolist() #zipcodes as list
56 | counts = zipcodeSeries.tolist() #count of zipcodes as list
57 | for (zipcode, count) in zip(zipcodes, counts):
58 | # the zip code counts for our housing dataset is *extremely*
59 | # unbalanced (some only having 1 or 2 houses per zip code)
60 | # so let's sanitize our data by removing any houses with less
61 | # than 25 houses per zip code
62 | if count < 25:
63 | booleanVal=(df["zipcode"] == zipcode) # this will be true at all zipcodes that should be deleted
64 | #print(type(booleanVal)) #
65 | idxs = df[booleanVal].index #this will return indices of these true values
66 | df.drop(idxs, inplace=True)
67 | print("[INFO]removed zipcodes which less than 25 houses")
68 |
69 |
70 |
71 |
72 |
73 |
74 | # load the house images and then
75 |
76 | if not os.path.exists('HouseImages'):
77 | os.makedirs('HouseImages')
78 |
79 |
80 | #dividing images to foler accourding to house (4 imgaes/folder)
81 | imagesPaths=paths.list_images("HousesDataset")
82 | for imagePath in imagesPaths:
83 | sourcePath=imagePath
84 | imagePath=os.path.basename(imagePath)
85 | imageIndex=(imagePath.split("_"))[0]
86 | pathToSaveIamge=os.path.join("HouseImages",imageIndex)
87 | if not os.path.exists(pathToSaveIamge):
88 | os.makedirs(pathToSaveIamge)
89 | pathToSaveIamge=os.path.join("HouseImages",imageIndex,imagePath)
90 | shutil.copyfile(sourcePath, pathToSaveIamge)
91 |
92 |
93 | #read image and concotnat each 4 images into one image
94 | trainingImages=[]
95 | for recordIndex in df.index:
96 | dirOfImages=os.path.join("HouseImages",str(recordIndex))
97 | houseImages=[]
98 | files=os.listdir(dirOfImages)
99 | for file in sorted(files):
100 | if (file==".DS_Store"):
101 | print(".DS_Store ignored" )
102 | continue
103 | imgfilePath=os.path.join(dirOfImages,file)
104 | img=cv2.imread(imgfilePath)
105 | img = cv2.resize(img, (32, 32))
106 | houseImages.append(img)
107 |
108 | outputImage = np.zeros((64, 64, 3), dtype="uint8")
109 | outputImage[0:32, 0:32] = houseImages[0]
110 | outputImage[0:32, 32:64] = houseImages[1]
111 | outputImage[32:64, 32:64] = houseImages[2]
112 | outputImage[32:64, 0:32] = houseImages[3]
113 | trainingImages.append(outputImage)
114 | print("[INFO] Reading images from directory {}".format(dirOfImages))
115 |
116 |
117 |
118 |
119 |
120 | #scale the pixel intensities to the range [0, 1]
121 | images = np.array(trainingImages, dtype="float") / 255.0
122 |
123 |
124 |
125 |
126 | # partition the data into training and testing splits using 75% of
127 | # the data for training and the remaining 25% for testing
128 | (trainY, testY, trainX, testX) = train_test_split(df, images, test_size=0.25, random_state=42)
129 |
130 | # find the largest house price in the training set and use it to
131 | # scale our house prices to the range [0, 1] (will lead to better
132 | # training and convergence)
133 | maxPrice = trainY["price"].max()
134 | print("maxPrice={}".format(maxPrice))
135 | input("press any key")
136 |
137 | trainY=trainY["price"].values
138 | trainY = trainY / maxPrice
139 |
140 | testY=testY["price"].values
141 | testY = testY / maxPrice
142 |
143 |
144 |
145 |
146 | print("Shapee of training data set {}".format(trainX.shape))
147 | print("Shapee of price vector {}".format(testX.shape))
148 |
149 | # create our Convolutional Neural Network and then compile the model
150 | # using mean absolute percentage error as our loss, implying that we
151 | # seek to minimize the absolute percentage difference between our
152 | # price *predictions* and the *actual prices
153 |
154 |
155 | inputShape = (64, 64, 3)
156 | chanDim = -1
157 | # define the model input
158 | inputs = Input(shape=inputShape)
159 | # CONV => RELU => BN => POOL
160 | x = Conv2D(16, (3, 3), padding="same")(inputs)
161 | x = Activation("relu")(x)
162 | x = BatchNormalization(axis=chanDim)(x)
163 | x = MaxPooling2D(pool_size=(2, 2))(x)
164 | # CONV => RELU => BN => POOL
165 | x = Conv2D(32, (3, 3), padding="same")(x)
166 | x = Activation("relu")(x)
167 | x = BatchNormalization(axis=chanDim)(x)
168 | x = MaxPooling2D(pool_size=(2, 2))(x)
169 | # CONV => RELU => BN => POOL
170 | x = Conv2D(64, (3, 3), padding="same")(x)
171 | x = Activation("relu")(x)
172 | x = BatchNormalization(axis=chanDim)(x)
173 | x = MaxPooling2D(pool_size=(2, 2))(x)
174 |
175 | # flatten the volume, then FC => RELU => BN => DROPOUT
176 | x = Flatten()(x)
177 | x = Dense(16)(x)
178 | x = Activation("relu")(x)
179 | x = BatchNormalization(axis=chanDim)(x)
180 | x = Dropout(0.5)(x)
181 |
182 | # apply another FC layer, this one to match the number of nodes
183 | # coming out of the MLP
184 | x = Dense(4)(x)
185 | x = Activation("relu")(x)
186 | x = Dense(1, activation="linear")(x)
187 |
188 | # construct the CNN
189 | model = Model(inputs, x)
190 |
191 |
192 |
193 |
194 | #model = models.create_cnn(64, 64, 3, regress=True)
195 | model.summary()
196 | fileToSaveModelPlot='model.png'
197 | plot_model(model, to_file='model.png')
198 | print("[INFO] Model plot saved to {}".format(fileToSaveModelPlot) )
199 |
200 |
201 |
202 |
203 |
204 |
205 |
206 | opt = Adam(lr=1e-3, decay=1e-3 / 200)
207 | model.compile(loss="mean_absolute_percentage_error", optimizer=opt)
208 |
209 | # train the model
210 | print("[INFO] training model...")
211 | history=model.fit(trainX, trainY, validation_data=(testX, testY),epochs=EPOCHS_NUM, batch_size=8)
212 |
213 | # make predictions on the testing data
214 |
215 |
216 |
217 |
218 |
219 | model.save("housePrice.keras2")
220 | print("[INFO] model saved to housePrice.keras2")
221 |
222 | # make predictions on the testing data
223 | print("[INFO] predicting house prices...")
224 | preds = model.predict(testX)
225 |
226 |
227 |
228 | # compute the difference between the *predicted* house prices and the
229 | # *actual* house prices, then compute the percentage difference and
230 | # the absolute percentage difference
231 | diff = preds.flatten() - testY
232 | percentDiff = (diff / testY) * 100
233 | absPercentDiff = np.abs(percentDiff)
234 |
235 | # compute the mean and standard deviation of the absolute percentage
236 | # difference
237 | mean = np.mean(absPercentDiff)
238 | std = np.std(absPercentDiff)
239 |
240 | # finally, show some statistics on our model
241 | locale.setlocale(locale.LC_ALL, "en_US.UTF-8")
242 | print("[INFO] avg. house price: {}, std house price: {}".format(
243 | locale.currency(df["price"].mean(), grouping=True),
244 | locale.currency(df["price"].std(), grouping=True)))
245 | print("[INFO] mean: {:.2f}%, std: {:.2f}%".format(mean, std))
246 |
247 |
248 | #readjust house prices
249 | testY=testY*maxPrice
250 | preds=preds*maxPrice
251 |
252 |
253 | validationLoss=(history.history['val_loss'])
254 | trainingLoss=history.history['loss']
255 |
256 |
257 |
258 |
259 | #------------------------------------------------
260 | # Plot training and validation accuracy per epoch
261 | epochs = range(len(validationLoss)) # Get number of epochs
262 | #------------------------------------------------
263 | plt.plot ( epochs, trainingLoss ,label="Training Loss")
264 | plt.plot ( epochs, validationLoss, label="Validation Loss" )
265 | plt.title ('Training and validation loss')
266 | plt.xlabel("Epoch #")
267 | plt.ylabel("Loss")
268 | fileToSaveAccuracyCurve="plot_acc.png"
269 | plt.savefig("plot_acc.png")
270 | print("[INFO] Loss curve saved to {}".format("plot_acc.png"))
271 | plt.legend(loc="upper right")
272 | plt.show()
273 |
274 |
275 |
276 |
277 |
278 | #plot curves (Actual vs Predicted)
279 | plt.plot ( testY ,label="Actual price")
280 | plt.plot ( preds, label="Predicted price" )
281 | plt.title ('House prices')
282 | plt.xlabel("Point #")
283 | plt.ylabel("Price")
284 | plt.legend(loc="upper right")
285 | plt.savefig("HousePrices.png")
286 | plt.show()
287 | print("[INFO] predicted vs actual price saved to HousePrices.png")
288 |
289 |
290 |
291 |
292 |
293 |
294 |
295 |
296 |
--------------------------------------------------------------------------------