├── .gitignore ├── README.md ├── data_collection ├── osm.py ├── roof.py └── sign_url.py ├── data_preprocessing ├── add_angle70.py ├── contour_cache.py ├── contour_nopanel_extraction.py ├── contour_panel_extraction.py ├── csvupdate.py ├── data_augment.py ├── data_integration.py ├── feature_extraction.py ├── mean_cache.py ├── mean_cache_nosplit.py ├── mean_cache_split.py ├── read_calculatemetric.py └── under_smaple.py ├── evaluation ├── generate_evaluation_confusionmatrics.py ├── iou_calculation.py ├── iou_score_generator.py ├── orientation_calculate.py ├── postive_contour_generate.py ├── roof_contour_match.py └── solar_array_orientation.py ├── image └── pipeline.png ├── models ├── PCA │ ├── components.py │ └── pca.py ├── hybrid │ ├── hybrid.py │ ├── linear_model │ │ └── linearmodel.py │ └── union_differentweights.py ├── logical_regression │ ├── logical_model_data.py │ ├── logical_model_test.py │ ├── logical_model_train.py │ ├── lrmodel.py │ ├── vgg.py │ └── vgg_physical_integration.py ├── random_forest │ └── random_forest.py ├── svm │ ├── svm10.py │ ├── svm2.py │ ├── svm3.py │ ├── svm4.py │ ├── svm5.py │ ├── svm6.py │ ├── svm7.py │ ├── svm8.py │ ├── svm9.py │ ├── svm_roc.py │ ├── svmaggressive.py │ ├── svmlinear.py │ ├── svmnosplit.py │ ├── svmrbf.py │ ├── svmridge.py │ └── svmsplit.py ├── thresholding │ ├── append_new_column.py │ ├── data_description.py │ ├── hard_filters_test.py │ └── thresholding_model_generator.py └── vgg_process │ ├── data_preparation.py │ ├── metrics.py │ ├── train_validation.py │ ├── vgg_images_test.py │ └── vgg_images_train.py ├── requirements.txt ├── result_presentation ├── 10location_accuray.py ├── contours_extraction.py ├── csv_calculate.py ├── data_stastics │ ├── box_plot.py │ ├── distribution_plot.py │ ├── scatter_grid.py │ ├── scatter_plot.py │ └── violin_plot.py ├── draw_pca.py ├── draw_roc.py ├── feature_statistics.py └── kmeans_draw.py └── tools └── solar_labeller ├── input_data_preparation.py ├── requirements.txt ├── solar_marker.py └── solar_marker_fixer.py /.gitignore: -------------------------------------------------------------------------------- 1 | .DS_Store 2 | */.DS_Store 3 | Thumbs.db 4 | node_modules/* 5 | *.idea 6 | *~ 7 | package-lock.json 8 | .vscode 9 | .idea/ 10 | .idea/* 11 | .venv 12 | venv 13 | house*/* 14 | log/ 15 | data/ 16 | *.ipynb 17 | */*.ipynb 18 | .ipynb_checkpoints 19 | */.ipynb_checkpoints 20 | output*/ -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # SolarFinder: Automatic Detection of Solar Photovoltaic Arrays. 2 | 3 | Smart cities, utilities, third-parties, and government agencies are having pressure on managing stochastic power generation from distributed rooftop solar photovoltaic arrays, such as accurately predicting and reacting to the variations in electric grid. Recently, there is a rising interest to identify and characterize solar PV arrays automatically and passively. Traditional approaches such as online assessment and utilities interconnection filings are time consuming and costly, and limited in geospatial resolution and thus do not scale up to every location. Significant recent work focuses on using aerial imagery to train machine learning or deep learning models to automatically detect solar PV arrays. Unfortunately, these approaches typically require Very High Resolution (VHR) images and human handcrafted image templates for training, which have a minimum cost of \$15 per $km^2$ and are not always available at every location. 4 | 5 | To address the problem, we design a new system---"SolarFinder" that can automatically detect distributed solar photovoltaic arrays in a given geospatial region without any extra cost. SolarFinder first automatically fetches low/regular resolution satellite images within the region using publicly-available imagery APIs. Then, SolarFinder leverages multi-dimensional K-means algorithm to automatically segment solar arrays on rooftop images. Eventually, SolarFinder employs hybrid linear regression approach that integrates support vectors machine (SVMs-RBF) modeling with a deep convolutional neural network (CNNs) approach to accurately identify solar arrays and characterize each solar deployment simultaneously. We evaluate SolarFinder using 269,632 public satellite images that include 1,143,636 contours from 13 geospatial regions in U.S. We find that pre-trained SolarFinder yields a MCC of 0.17, which is 3 times better than the most recent pre-trained CNNs approach and is the same as a re-trained CNNs approach. 6 | 7 |

8 | 9 |

10 | 11 | #### [Project Website](https://cps.cs.fiu.edu/projects/solarfinder-project/) 12 | 13 | ## Datasets 14 | 15 | To download rooftop images data: [Download](https://www.kaggle.com/datasets/qli027/solar-finder). We currently are having issue to host the linked website for sharing the dataset. A new link will be annouced soon. 16 | 17 | ## Pre-request Environment 18 | 19 | ### Environment Requirement 20 | 21 | python3.7 or higher version is required. 22 | 23 | ### Setup Environment 24 | 1. Install virtual environment 25 | ``` 26 | pip install virtualenv 27 | ``` 28 | 29 | 2. Create venv directory 30 | ``` 31 | python3 -m venv .venv 32 | ``` 33 | 34 | 3. Activate virtual environment 35 | ``` 36 | source .venv/bin/activate 37 | ``` 38 | 39 | 4. Install packages from requirements.txt 40 | ``` 41 | pip install -r requirements.txt 42 | ``` 43 | 44 | 5. Deactivate virtual environment 45 | ``` 46 | deactivate 47 | ``` 48 | 49 | SolarFinder work is published at the 19th ACM/IEEE Conference on Information Processing in Sensor Networks (IPSN 2020). 50 | If you use our code or datasets in your research, please consider to cite our work: 51 | 52 | #### SolarFinder: Automatic Detection of Solar Photovoltaic Arrays.
53 | Qi Li, Yuzhou Feng, Yuyang Leng and Dong Chen.
54 | In Proc. of the 19th ACM/IEEE International Conference on Information Processing in Sensor Networks, IPSN'20, April 21-24, 2020, Sydney, Australia. 55 | 56 | -------------------------------------------------------------------------------- /data_collection/osm.py: -------------------------------------------------------------------------------- 1 | # this file is used to generate the url to download images from google static map, 2 | # for every house, we store the url to download the house original images and house mask. 3 | # the input is osm file, output is json which store the ulr and csv document which store the id and location of houses. 4 | import csv 5 | import datetime 6 | import glob as gb 7 | import json 8 | import os 9 | import xml.dom.minidom 10 | import xml.dom.minidom 11 | 12 | # used to calculate the download images time 13 | start = datetime.datetime.now() 14 | # osm_path which is the osm file location 15 | osm_path = gb.glob("/*.osm") 16 | for osm in osm_path: 17 | dom = xml.dom.minidom.parse(osm) 18 | num = osm.split("/")[-1] 19 | num = os.path.splitext(num)[0] 20 | # dom = xml.dom.minidom.parse('./0.osm') 21 | root = dom.documentElement 22 | nodelist = root.getElementsByTagName('node') 23 | waylist = root.getElementsByTagName('way') 24 | node_dic = {} 25 | 26 | url_prefix1 = 'https://maps.googleapis.com/maps/api/staticmap?zoom=20&size=400x400&scale=4&maptype=hybrid&path=color:0xff0000ff%7Cweight:5%7Cfillcolor:0xff0000ff' 27 | url_prefix2 = 'https://maps.googleapis.com/maps/api/staticmap?zoom=20&size=400x400&scale=4&maptype=hybrid&path=color:0x00000000%7Cweight:5%7Cfillcolor:0x00000000' 28 | url_suffix = '&key=AIzaSyA7UVGBz0YP8OPQnQ9Suz69_u1TUSDukt8' 29 | 30 | for node in nodelist: 31 | node_id = node.getAttribute('id') 32 | node_lat = float(node.getAttribute('lat')) 33 | node_lon = float(node.getAttribute('lon')) 34 | node_dic[node_id] = (node_lat, node_lon) 35 | url = [] 36 | location = {} 37 | csv_lat = 0 38 | csv_lon = 0 39 | num_img = 0 40 | # json used to store the url of images downloading 41 | with open(os.path.join('./10house/house1/', format(str(num)) + '.json'), 'w') as json_file: 42 | for way in waylist: 43 | taglist = way.getElementsByTagName('tag') 44 | build_flag = False 45 | for tag in taglist: 46 | # choose the attribute to be building, 47 | if tag.getAttribute('k') == 'building': 48 | build_flag = True 49 | if build_flag: 50 | ndlist = way.getElementsByTagName('nd') 51 | s = "" 52 | for nd in ndlist: 53 | nd_id = nd.getAttribute('ref') 54 | if nd_id in node_dic: 55 | node_lat = node_dic[nd_id][0] 56 | node_lon = node_dic[nd_id][1] 57 | g = nd_id 58 | csv_lat = node_dic[nd_id][0] 59 | csv_lon = node_dic[nd_id][1] 60 | print(g) 61 | s += '%7C' + str(node_lat) + '%2C' + str(node_lon) 62 | # secret = 'pSRLFZI7ujDivoNjR-Vz7GR6F4Q=' 63 | url1 = url_prefix1 + s + url_suffix 64 | # url1 = sign_url(url1, secret) 65 | url2 = url_prefix2 + s + url_suffix 66 | # url2 = sign_url(url2, secret) 67 | test_dict = {"id": g, "mask": url1, "image": url2} 68 | url.append(test_dict) 69 | location[g] = str(csv_lat) + ',' + str(csv_lon) 70 | num_img = num_img + 1 71 | json_str = json.dumps(url) 72 | json_file.write(json_str) 73 | json_file.close() 74 | # csv document used to store the house id and location( latitude and longtitude) 75 | csv_path = "./10house/house1/house1.csv" 76 | with open(csv_path, 'a') as csv_file: 77 | writer = csv.writer(csv_file) 78 | for key, value in location.items(): 79 | writer.writerow([key, value]) 80 | csv_file.close() 81 | end = datetime.datetime.now() 82 | print(end - start) 83 | -------------------------------------------------------------------------------- /data_collection/roof.py: -------------------------------------------------------------------------------- 1 | # this is used to download the roof images from google static map , 2 | # we download the original images and mask from google static map (free) then use and operation to get the roof ROI, 3 | # so we can process and label the roof images 4 | import glob as gb 5 | import json 6 | import os 7 | 8 | import cv2 9 | import numpy as np 10 | import requests 11 | 12 | i = 0 13 | json_path = gb.glob("./10house/house1/map.json") 14 | for file in json_path: 15 | with open(file, 'r') as file: 16 | urls = json.load(file) 17 | for url in urls: 18 | i = i + 1 19 | id = url['id'] 20 | mask = url['mask'] 21 | image = url['image'] 22 | mask = requests.get(mask) 23 | image = requests.get(image) 24 | fmask = open(os.path.join('./10house/house1/image/', format(str('1')) + '.png'), 'ab') 25 | fimg = open(os.path.join('./10house/house1/mask/', format(str('1')) + '.png'), 'ab') 26 | fmask.write(mask.content) 27 | fimg.write(image.content) 28 | fmask.close() 29 | fimg.close() 30 | tag = cv2.imread(os.path.join('./10house/house1/image/', format('1') + '.png')) 31 | real = cv2.imread(os.path.join('./10house/house1/mask/', format('1') + '.png')) 32 | lower = np.array([0, 0, 100]) 33 | upper = np.array([40, 40, 255]) 34 | img = cv2.inRange(tag, lower, upper) 35 | 36 | # and operations with images 37 | img = np.expand_dims(img, axis=2) 38 | img = np.concatenate((img, img, img), axis=-1) 39 | result = cv2.bitwise_and(real, img) 40 | cv2.imwrite(os.path.join('./10house/house1/roof/' + format(str(id)) + '.png'), result) 41 | os.remove("./10house/house1/image/1.png") 42 | os.remove("./10house/house1/mask/1.png") 43 | -------------------------------------------------------------------------------- /data_collection/sign_url.py: -------------------------------------------------------------------------------- 1 | import base64 2 | import hashlib 3 | import hmac 4 | from urllib.parse import urlparse 5 | 6 | 7 | # sign the ulr so there is no limit to download the images from google static map, 8 | # but it may cause extra fees. 9 | def sign_url(input_url=None, secret=None): 10 | if not input_url or not secret: 11 | raise Exception("Both input_url and secret are required") 12 | 13 | url = urlparse(input_url) 14 | 15 | # We only need to sign the path+query part of the string 16 | url_to_sign = url.path + "?" + url.query 17 | 18 | # Decode the private key into its binary format 19 | # We need to decode the URL-encoded private key 20 | decoded_key = base64.urlsafe_b64decode(secret) 21 | 22 | # Create a signature using the private key and the URL-encoded 23 | # string using HMAC SHA1. This signature will be binary. 24 | signature = hmac.new(decoded_key, url_to_sign.encode(), hashlib.sha1) 25 | 26 | # Encode the binary signature into base64 for use within a URL 27 | encoded_signature = base64.urlsafe_b64encode(signature.digest()) 28 | 29 | original_url = url.scheme + "://" + url.netloc + url.path + "?" + url.query 30 | 31 | # Return signed URL 32 | return original_url + "&signature=" + encoded_signature.decode() 33 | -------------------------------------------------------------------------------- /data_preprocessing/add_angle70.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import numpy as np 3 | import matplotlib.pyplot as plt 4 | from sklearn.preprocessing import StandardScaler 5 | import csv 6 | 7 | 8 | data = pd.read_csv(".csv") 9 | df = pd.DataFrame(data) 10 | data1 =pd.read_csv("/contour_all.csv") 11 | df1 = pd.DataFrame(data1) 12 | 13 | angle70 = df1.iloc[:,13] 14 | df.insert(13, "numangle70", angle70, True) 15 | 16 | export_csv = df.to_csv ('/location810/angle70.csv',index=None) 17 | 18 | 19 | 20 | -------------------------------------------------------------------------------- /data_preprocessing/contour_nopanel_extraction.py: -------------------------------------------------------------------------------- 1 | import csv 2 | import cv2 3 | csvpath = '/aul/homes/final_contour/house3' + '/nopanelcontour_features.csv' 4 | with open(csvpath, 'a') as updatecsv: 5 | myFields = ['id', 'image', 'size','pole','mean','square','ratiowh','ratioarea','approxlen','numangle','numangle90','numangle70','label'] 6 | writer = csv.DictWriter(updatecsv, fieldnames=myFields) 7 | writer.writeheader() 8 | updatecsv.close() 9 | csv_path = '/aul/homes/data/house3/contour_features.csv' 10 | with open(csv_path, newline='') as csvfile: 11 | reader = csv.DictReader(csvfile) 12 | for row in reader: 13 | if (row['label']==str(0)): 14 | contour = row 15 | img_path = '/aul/homes/data/house3/' + 'contour_all/' + row['id'] + '.png' 16 | img = cv2.imread(img_path) 17 | img_newpath = '/aul/homes/final_contour/house3/nopanel/' + row['id'] + '.png' 18 | cv2.imwrite(img_newpath ,img) 19 | print(contour['id']) 20 | with open(csvpath, 'a') as updatecsv: 21 | writer = csv.writer(updatecsv) 22 | writer.writerow([contour['id'], contour['image'],contour['size'],contour['pole'],contour['mean'],contour['square'],contour['ratiowh'],contour['ratioarea'],contour['approxlen'],contour['numangle'],contour['numangle90'], contour['numangle70'],contour['label']]) 23 | updatecsv.close() 24 | csvfile.close() 25 | print('finish') -------------------------------------------------------------------------------- /data_preprocessing/contour_panel_extraction.py: -------------------------------------------------------------------------------- 1 | import csv 2 | import cv2 3 | 4 | num = 0 5 | csv_path = '/aul/homes/final/nosplit/train/train_nopanel.csv' 6 | csvpath_train_nopanel = '/aul/homes/final/nosplit/train/train_nopanel.csv' 7 | with open(csv_path, newline='') as csvfile: 8 | reader = csv.DictReader(csvfile) 9 | for row in reader: 10 | # if (row['label']==str(1)): 11 | # contour = row 12 | img_name = row['id'] 13 | location = row['location'][-1] 14 | if (location == '0'): 15 | print(row['locaiton'] 16 | 17 | # print(location) 18 | img_path = '/aul/homes/final_contour/house' + str('location') + '/panel/' + 'img_name' + '.png'' 19 | img = cv2.imread(img_path) 20 | img_newpath = '/aul/homes/final/split/train/nopanel/' + 'img_name' + '.png' 21 | cv2.imwrite(img_newpath ,img) 22 | csvfile.close() 23 | -------------------------------------------------------------------------------- /data_preprocessing/csvupdate.py: -------------------------------------------------------------------------------- 1 | import csv 2 | import os.path 3 | from os import path 4 | 5 | csv_path = '/aul/homes/dataset/dataset930/house' + str(1) + '/house' + str(1) + '.csv' 6 | csvpath = '/aul/homes//dataset/dataset930/house' + str(1) + '/location' + str(1) + '.csv' 7 | 8 | with open(csvpath, 'a') as csvupdate: 9 | myFields = ['id', 'location','label'] 10 | writer = csv.DictWriter(csvupdate, fieldnames=myFields) 11 | writer.writeheader() 12 | csvupdate.close() 13 | with open(csv_path, newline='') as csvfile: 14 | reader = csv.DictReader(csvfile) 15 | for row in reader: 16 | img = {} 17 | img['id'] = row['id'] 18 | img['location'] = row['location'] 19 | img['lable'] = row['label'] 20 | if (path.exists('/aul/homes/dataset/dataset930/house' + str(1) + '/roof/' + img['id'] + '.png') == True): 21 | with open(csvpath, 'a') as csvupdate: 22 | writer = csv.writer(csvupdate) 23 | writer.writerow([img['id'],img['location'], img['lable']]) 24 | csvupdate.close() 25 | csvfile.close() 26 | print('finish') 27 | 28 | 29 | 30 | -------------------------------------------------------------------------------- /data_preprocessing/data_augment.py: -------------------------------------------------------------------------------- 1 | import Augmentor 2 | p = Augmentor.Pipeline("/aul/homes/final/split/location17/panel/") 3 | # Point to a directory containing ground truth data. 4 | # Images with the same file names will be added as ground truth data 5 | # and augmented in parallel to the original data. 6 | 7 | # Point to a directory containing ground truth data. 8 | # Images with the same file names will be added as ground truth data 9 | # and augmented in parallel to the original data. 10 | 11 | # Add operations to the pipeline as normal: 12 | p.rotate90(probability=1) 13 | p.rotate270(probability=1) 14 | p.flip_left_right(probability=1) 15 | p.flip_top_bottom(probability=1) 16 | p.rotate(probability=1, max_left_rotation=5, max_right_rotation=5) 17 | p.flip_left_right(probability=1) 18 | p.zoom_random(probability=1, percentage_area=0.8) 19 | p.flip_top_bottom(probability=1) 20 | p.sample(27420) 21 | -------------------------------------------------------------------------------- /data_preprocessing/data_integration.py: -------------------------------------------------------------------------------- 1 | import csv 2 | import cv2 3 | 4 | 5 | csvpath_all = '/aul/homes/final/split/location810/contour_all.csv' 6 | with open(csvpath_all, 'a') as csvfile: 7 | myFields = ['id', 'location','image', 'size','pole','mean','stddev','square','ratiowh','ratioarea','approxlen','numangle','numangle90','numangle70','label'] 8 | writer = csv.DictWriter(csvfile, fieldnames=myFields) 9 | writer.writeheader() 10 | csvfile.close() 11 | 12 | csvpath_yes = '/aul/homes/final/split/location810/contour_features.csv' 13 | with open(csvpath_yes, 'a') as csvfile: 14 | myFields = ['id', 'location', 'image', 'size','pole','mean','stddev','square','ratiowh','ratioarea','approxlen','numangle','numangle90','numangle70','label'] 15 | writer = csv.DictWriter(csvfile, fieldnames=myFields) 16 | writer.writeheader() 17 | csvfile.close() 18 | 19 | csvpath_no = '/aul/homes/final/split/location810/no_contour_features.csv' 20 | with open(csvpath_no, 'a') as csvfile: 21 | myFields = ['id', 'location','image', 'size','pole','mean','stddev','square','ratiowh','ratioarea','approxlen','numangle','numangle90','numangle70','label'] 22 | writer = csv.DictWriter(csvfile, fieldnames=myFields) 23 | writer.writeheader() 24 | csvfile.close() 25 | 26 | for i in range(8,11): 27 | csv_path = '/aul/homes/final_contour/house' + str(i) + '/contour_all.csv' 28 | with open(csv_path, newline='') as csv_file: 29 | reader = csv.DictReader(csv_file) 30 | for row in reader: 31 | contour = {} 32 | contour = row 33 | contour['location'] = 'location' + str(i) 34 | with open(csvpath_all, 'a') as csvfile: 35 | writer = csv.writer(csvfile) 36 | writer.writerow([contour['id'], contour['location'],contour['image'],contour['size'],contour['pole'],contour['mean'],contour['stddev'],contour['square'],contour['ratiowh'],contour['ratioarea'],contour['approxlen'],contour['numangle'],contour['numangle90'], contour['numangle70'],contour['label']]) 37 | csvfile.close() 38 | # print(contour) 39 | if(contour['label'] == str(1)): 40 | with open(csvpath_yes, 'a') as csvfile: 41 | writer = csv.writer(csvfile) 42 | writer.writerow([contour['id'], contour['location'],contour['image'],contour['size'],contour['pole'],contour['mean'],contour['stddev'],contour['square'],contour['ratiowh'],contour['ratioarea'],contour['approxlen'],contour['numangle'],contour['numangle90'], contour['numangle70'],contour['label']]) 43 | csvfile.close() 44 | 45 | if(contour['label'] == str(0)): 46 | with open(csvpath_no, 'a') as csvfile: 47 | writer = csv.writer(csvfile) 48 | writer.writerow([contour['id'], contour['location'],contour['image'],contour['size'],contour['pole'],contour['mean'],contour['stddev'],contour['square'],contour['ratiowh'],contour['ratioarea'],contour['approxlen'],contour['numangle'],contour['numangle90'], contour['numangle70'],contour['label']]) 49 | csvfile.close() 50 | 51 | 52 | csv_file.close() 53 | print(csv_path) 54 | 55 | 56 | 57 | 58 | 59 | import csv 60 | import cv2 61 | 62 | 63 | csvpath_train_nopanel = '/aul/homes/final/nosplit/train/train_nopanel.csv' 64 | with open(csvpath_train_nopanel, 'a') as csvfile: 65 | myFields = ['id', 'location','image', 'size','pole','mean','stddev','square','ratiowh','ratioarea','approxlen','numangle','numangle90','numangle70','label'] 66 | writer = csv.DictWriter(csvfile, fieldnames=myFields) 67 | writer.writeheader() 68 | csvfile.close() 69 | 70 | csvpath_test_nopanel = '/aul/homes/final/nosplit/test/test_nopanel.csv' 71 | with open(csvpath_test_nopanel , 'a') as csvfile: 72 | myFields = ['id', 'location', 'image', 'size','pole','mean','stddev','square','ratiowh','ratioarea','approxlen','numangle','numangle90','numangle70','label'] 73 | writer = csv.DictWriter(csvfile, fieldnames=myFields) 74 | writer.writeheader() 75 | csvfile.close() 76 | 77 | csvpath_validation_nopanel = '/aul/homes/final/nosplit/validation/validation_nopanel.csv' 78 | with open(csvpath_validation_nopanel, 'a') as csvfile: 79 | myFields = ['id', 'location','image', 'size','pole','mean','stddev','square','ratiowh','ratioarea','approxlen','numangle','numangle90','numangle70','label'] 80 | writer = csv.DictWriter(csvfile, fieldnames=myFields) 81 | writer.writeheader() 82 | csvfile.close() 83 | 84 | 85 | csv_path = '/aul/homes/final/nosplit/no_contour_features.csv' 86 | i = 0 87 | with open(csv_path, newline='') as csv_file: 88 | reader = csv.DictReader(csv_file) 89 | for row in reader: 90 | contour = {} 91 | contour = row 92 | if ((i %10) <3): 93 | with open(csvpath_test_nopanel, 'a') as csvfile: 94 | writer = csv.writer(csvfile) 95 | writer.writerow([contour['id'], contour['location'],contour['image'],contour['size'],contour['pole'],contour['mean'],contour['stddev'],contour['square'],contour['ratiowh'],contour['ratioarea'],contour['approxlen'],contour['numangle'],contour['numangle90'], contour['numangle70'],contour['label']]) 96 | csvfile.close() 97 | 98 | # print(contour) 99 | elif ((i %10) >7): 100 | with open(csvpath_validation_nopanel, 'a') as csvfile: 101 | writer = csv.writer(csvfile) 102 | writer.writerow([contour['id'], contour['location'],contour['image'],contour['size'],contour['pole'],contour['mean'],contour['stddev'],contour['square'],contour['ratiowh'],contour['ratioarea'],contour['approxlen'],contour['numangle'],contour['numangle90'], contour['numangle70'],contour['label']]) 103 | csvfile.close() 104 | 105 | else: 106 | with open(csvpath_train_nopanel, 'a') as csvfile: 107 | writer = csv.writer(csvfile) 108 | writer.writerow([contour['id'], contour['location'],contour['image'],contour['size'],contour['pole'],contour['mean'],contour['stddev'],contour['square'],contour['ratiowh'],contour['ratioarea'],contour['approxlen'],contour['numangle'],contour['numangle90'], contour['numangle70'],contour['label']]) 109 | csvfile.close() 110 | i = i + 1 111 | 112 | csv_file.close() 113 | 114 | 115 | -------------------------------------------------------------------------------- /data_preprocessing/feature_extraction.py: -------------------------------------------------------------------------------- 1 | # OpenCV lib 2 | import os 3 | 4 | 5 | import cv2 6 | import glob as gb 7 | import numpy as np 8 | import csv 9 | import math 10 | 11 | def getContourStat(img, contour): 12 | mask = np.zeros(img.shape, dtype="uint8") 13 | cv2.drawContours(mask, [contour], -1, 255, -1) 14 | mean, stddev = cv2.meanStdDev(img, mask=mask) 15 | return mean, stddev 16 | 17 | def cal_roofarea(image): 18 | black = cv2.threshold(image, 0, 255, 0)[1] 19 | _,contours, hierarchy = cv2.findContours(black, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE) 20 | # cv2.drawContours(img, contours, -1, (255, 0, 0), 2) 21 | area = [cv2.contourArea(c) for c in contours] 22 | roof_index = np.argmax(area) 23 | roof_cnt = contours[roof_index] 24 | # contourArea will return the wrong value if the contours are self-intersections 25 | roof_area = cv2.contourArea(roof_cnt) 26 | #print('roof area = '+ str(roof_area)) 27 | return (roof_area,roof_cnt) 28 | 29 | def pole(img, contour): 30 | ori_img = img.copy() 31 | image_grayscale = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) 32 | cont = cal_roofarea(image_grayscale)[1] 33 | cv2.drawContours(ori_img, cont, -1, (255, 0, 0), 3) 34 | #print(len(contour)) 35 | contour_res =[] 36 | back = 1 37 | cnt = contour 38 | leftmost = tuple(cnt[cnt[:, :, 0].argmin()][0]) 39 | rightmost = tuple(cnt[cnt[:, :, 0].argmax()][0]) 40 | topmost = tuple(cnt[cnt[:, :, 1].argmin()][0]) 41 | bottommost = tuple(cnt[cnt[:, :, 1].argmax()][0]) 42 | pole = [leftmost,rightmost,topmost,bottommost] 43 | for point in pole: 44 | # check the distance with contours, biggest contour 45 | # when it is negative, means the point is outside the contours 46 | dist = cv2.pointPolygonTest(cont, point, True) 47 | # print(dist) 48 | if (dist <=0): 49 | back = 0 50 | else: 51 | pass 52 | 53 | return (ori_img,contour,back) 54 | def rotate_rectangle(img_name,img, contour): 55 | 56 | shape= {} 57 | shape['id'] = img_name 58 | # for c in contour: 59 | c = contour 60 | 61 | area = cv2.contourArea(c) 62 | x,y,w,h = cv2.boundingRect(c) 63 | ratiowh = min(float(w/h),float(h/w)) 64 | shape['ratiowh'] = ratiowh 65 | 66 | ratioarea = float(area/(w*h)) 67 | shape['ratioarea'] = ratioarea 68 | 69 | epsilon = 0.01 * cv2.arcLength(c, True) 70 | approx = cv2.approxPolyDP(c, epsilon, True) 71 | 72 | approxlen = len(approx) 73 | shape['approxlen'] = approxlen 74 | 75 | 76 | # the original num set to be -1 to be different no operation 77 | num_angle = 0 78 | num_angle90 = -1 79 | num_angle80 = -1 80 | num_angle70 = -1 81 | 82 | mask = np.zeros(img.shape, np.uint8) 83 | cv2.drawContours(mask, [approx], -1, (255, 255, 255), -1) 84 | cv2.drawContours(img, [approx], -1, (255, 255, 255), 2) 85 | # mask = np.concatenate((mask, mask, mask), axis=-1) 86 | gray = cv2.cvtColor(mask, cv2.COLOR_BGR2GRAY) 87 | contour_list = [] 88 | ret, thresh = cv2.threshold(gray, 100, 255, cv2.THRESH_BINARY) 89 | _,contours, hierarchy = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE) 90 | # get the list of contours 91 | for points in contours[0]: 92 | x, y = points.ravel() 93 | contour_list.append([x, y]) 94 | corners = cv2.goodFeaturesToTrack(gray, 50, 0.01, 10) 95 | corners = np.int0(corners) 96 | for i in corners: 97 | x, y = i.ravel() 98 | # decide whether the corner is on the contours 99 | if (cv2.pointPolygonTest(contours[0], (x, y), True) == 0): 100 | center_index = contour_list.index([x, y]) 101 | length = len(contour_list) 102 | # get the point three before, and ignore the end point 103 | a_index = center_index - 5 104 | b_index = center_index + 5 105 | if ((a_index > 0) & (b_index > 0) & (a_index < length)& (b_index < length)): 106 | xa, ya = contour_list[a_index] 107 | xb, yb = contour_list[b_index] 108 | # print(x , y) 109 | # print(xa, ya) 110 | a = math.sqrt((x - xa) * (x - xa) + (y - ya) * (y - ya)) 111 | b = math.sqrt((x - xb) * (x - xb) + (y - yb) * (y - yb)) 112 | c = math.sqrt((xa - xb) * (xa - xb) + (ya - yb) * (ya - yb)) 113 | if ((a > 0) & (b > 0)): 114 | if(((a * a + b * b - c * c) / (2 * a * b))<1) & (((a * a + b * b - c * c) / (2 * a * b) >-1)): 115 | angle = math.degrees(math.acos((a * a + b * b - c * c) / (2 * a * b))) 116 | num_angle =num_angle +1 117 | # print(angle) 118 | if (angle < 90): 119 | num_angle90 = num_angle90 + 1 120 | if (angle < 80): 121 | num_angle80 = num_angle80 + 1 122 | if (angle < 70): 123 | num_angle70 = num_angle70 + 1 124 | cv2.circle(img, (x, y), 5, 255, -1) 125 | 126 | shape['numangle'] = num_angle 127 | shape['numangle90'] = num_angle90 128 | shape['numangle80'] = num_angle80 129 | shape['numangle70'] = num_angle70 130 | 131 | return(shape) 132 | 133 | def main(): 134 | # the file store the contour file 135 | csvpath_all = '/aul/homes/final_contour/house3/contour_all.csv' 136 | with open(csvpath_all, 'a') as csvfile: 137 | myFields = ['id', 'image', 'size','pole','mean','square','ratiowh','ratioarea','approxlen','numangle','numangle90','numangle70','label'] 138 | writer = csv.DictWriter(csvfile, fieldnames=myFields) 139 | writer.writeheader() 140 | csvfile.close() 141 | 142 | csvpath_yes = '/aul/homes/final_contour/house3/contour_features.csv' 143 | with open(csvpath_yes, 'a') as csvfile: 144 | myFields = ['id', 'image', 'size','pole','mean','square','ratiowh','ratioarea','approxlen','numangle','numangle90','numangle70','label'] 145 | writer = csv.DictWriter(csvfile, fieldnames=myFields) 146 | writer.writeheader() 147 | csvfile.close() 148 | 149 | csvpath_no = '/aul/homes/final_contour/house3/no_contour_features.csv' 150 | with open(csvpath_no, 'a') as csvfile: 151 | myFields = ['id', 'image', 'size','pole','mean','square','ratiowh','ratioarea','approxlen','numangle','numangle90','numangle70','label'] 152 | writer = csv.DictWriter(csvfile, fieldnames=myFields) 153 | writer.writeheader() 154 | csvfile.close() 155 | 156 | 157 | img_path = gb.glob('/aul/homes/final_contour/house3/panel/*.png') 158 | npy_path = '/aul/homes/dataset/dataset930/house3/contour/' 159 | for path in img_path: 160 | contour = {} 161 | contour_name = path.split("/")[-1] 162 | contour_name = contour_name.split(".")[0] 163 | contour['id'] = contour_name 164 | img_name = contour_name.split("_")[0] 165 | # print(img_name) 166 | c = np.load(npy_path + contour_name + '.npy') 167 | # print(c) 168 | # the file store images 169 | img = cv2.imread('/aul/homes/dataset/dataset930/house3/roof/'+ img_name + '.png') 170 | cv2.drawContours(img, c, -1, (0, 255, 0), 3) 171 | image_grayscale = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) 172 | mean = getContourStat(image_grayscale, c)[0] 173 | stddev =getContourStat(image_grayscale, c)[1] 174 | contour['mean'] = mean[0][0] 175 | contour['stddev'] = stddev[0][0] 176 | 177 | contour['image'] = str(img_name) 178 | contour['size'] = cv2.contourArea(c) 179 | # contour['cont'] = c 180 | contour['pole'] = pole(img.copy(), c)[2] 181 | area = cv2.contourArea(c) 182 | perimeter = cv2.arcLength(c, True) 183 | sq = 4 * math.pi * area / (perimeter * perimeter) 184 | contour['square'] = sq 185 | # print(sq) 186 | shape = rotate_rectangle(img_name,img.copy(), c) 187 | contour['ratiowh'] = shape['ratiowh'] 188 | contour['ratioarea'] = shape['ratioarea'] 189 | contour['approxlen'] = shape['approxlen'] 190 | contour['numangle'] = shape['numangle'] 191 | contour['numangle90'] = shape['numangle90'] 192 | contour['numangle70'] = shape['numangle70'] 193 | contour['label'] = str(1) 194 | # the file to store the mean value and stddev 195 | with open(csvpath_all, 'a') as csvfile: 196 | writer = csv.writer(csvfile) 197 | writer.writerow([contour['id'], contour['image'],contour['size'],contour['pole'],contour['mean'],contour['stddev'],contour['square'],contour['ratiowh'],contour['ratioarea'],contour['approxlen'],contour['numangle'],contour['numangle90'], contour['numangle70'],contour['label']]) 198 | csvfile.close() 199 | with open(csvpath_yes, 'a') as csvfile: 200 | writer = csv.writer(csvfile) 201 | writer.writerow([contour['id'], contour['image'],contour['size'],contour['pole'],contour['mean'],contour['stddev'],contour['square'],contour['ratiowh'],contour['ratioarea'],contour['approxlen'],contour['numangle'],contour['numangle90'], contour['numangle70'],contour['label']]) 202 | csvfile.close() 203 | print('finish') 204 | 205 | 206 | img_path = gb.glob('/aul/homes/final_contour/house3/nopanel/*.png') 207 | npy_path = '/aul/homes/dataset/dataset930/house3/contour/' 208 | for path in img_path: 209 | contour = {} 210 | contour_name = path.split("/")[-1] 211 | contour_name = contour_name.split(".")[0] 212 | contour['id'] = contour_name 213 | img_name = contour_name.split("_")[0] 214 | # print(img_name) 215 | c = np.load(npy_path + contour_name + '.npy') 216 | # print(c) 217 | # the file store images 218 | img = cv2.imread('/aul/homes/dataset/dataset930/house3/roof/'+ img_name + '.png') 219 | cv2.drawContours(img, c, -1, (0, 255, 0), 3) 220 | image_grayscale = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) 221 | mean = getContourStat(image_grayscale, c)[0] 222 | stddev =getContourStat(image_grayscale, c)[1] 223 | contour['mean'] = mean[0][0] 224 | contour['stddev'] = stddev[0][0] 225 | 226 | contour['image'] = str(img_name) 227 | contour['size'] = cv2.contourArea(c) 228 | # contour['cont'] = c 229 | contour['pole'] = pole(img.copy(), c)[2] 230 | area = cv2.contourArea(c) 231 | perimeter = cv2.arcLength(c, True) 232 | sq = 4 * math.pi * area / (perimeter * perimeter) 233 | contour['square'] = sq 234 | # print(sq) 235 | shape = rotate_rectangle(img_name,img.copy(), c) 236 | contour['ratiowh'] = shape['ratiowh'] 237 | contour['ratioarea'] = shape['ratioarea'] 238 | contour['approxlen'] = shape['approxlen'] 239 | contour['numangle'] = shape['numangle'] 240 | contour['numangle90'] = shape['numangle90'] 241 | contour['numangle70'] = shape['numangle70'] 242 | contour['label'] = str(0) 243 | # the file to store the mean value and stddev 244 | with open(csvpath_all, 'a') as csvfile: 245 | writer = csv.writer(csvfile) 246 | writer.writerow([contour['id'], contour['image'],contour['size'],contour['pole'],contour['mean'],contour['stddev'],contour['square'],contour['ratiowh'],contour['ratioarea'],contour['approxlen'],contour['numangle'],contour['numangle90'], contour['numangle70'],contour['label']]) 247 | csvfile.close() 248 | with open(csvpath_no, 'a') as csvfile: 249 | writer = csv.writer(csvfile) 250 | writer.writerow([contour['id'], contour['image'],contour['size'],contour['pole'],contour['mean'],contour['stddev'],contour['square'],contour['ratiowh'],contour['ratioarea'],contour['approxlen'],contour['numangle'],contour['numangle90'], contour['numangle70'],contour['label']]) 251 | csvfile.close() 252 | print('finish') 253 | 254 | 255 | 256 | 257 | main() 258 | 259 | 260 | 261 | 262 | 263 | 264 | 265 | 266 | -------------------------------------------------------------------------------- /data_preprocessing/mean_cache.py: -------------------------------------------------------------------------------- 1 | #openCV lib 2 | import os 3 | 4 | 5 | import cv2 6 | import glob as gb 7 | import numpy as np 8 | import csv 9 | import math 10 | 11 | def getContourStat(img, contour): 12 | mask = np.zeros((800,800), dtype="uint8") 13 | cv2.drawContours(mask, [contour], -1, 255, -1) 14 | mean, stddev = cv2.meanStdDev(img, mask=mask) 15 | return mean, stddev 16 | 17 | def main(): 18 | # the file store the contour file 19 | csvpath_all = '/aul/homes/1019/split/feature_all.csv' 20 | with open(csvpath_all, 'a') as csvfile: 21 | myFields = ['id','location','image', 'size','pole','mean','stddev','b_mean','g_mean','r_mean','b_stddev','g_stddev','r_stddev','square','ratiowh','ratioarea','approxlen','numangle','numangle90','numangle70','label','vgg_pro','vgg_class'] 22 | writer = csv.DictWriter(csvfile, fieldnames=myFields) 23 | writer.writeheader() 24 | csvfile.close() 25 | # image path 26 | 27 | img_path_panel = gb.glob('/aul/homes/final_contour/house'+ str(i) +'/panel/*.png') 28 | img_path_nopanel = gb.glob('/aul/homes/final_contour/house'+ str(i) +'/nopanel/*.png') 29 | npy_path = '/aul/homes/dataset/dataset930/house'+ str(i) +'/contour/' 30 | csv_path = '/aul/homes/1019/split/feature17.csv' 31 | with open(csv_path, newline='') as csvfile: 32 | reader = csv.DictReader(csvfile) 33 | for row in reader: 34 | contour = row 35 | i = contour['location'][-1] 36 | if (i =='0'): 37 | i = '10' 38 | if (contour['label']=='1'): 39 | path = img_path_panel 40 | if (contour['label']=='0'): 41 | path = img_path_nopanel 42 | img = cv2.imread(path) 43 | c = np.load(npy_path + contour['image'] + '.npy') 44 | image_grayscale = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) 45 | mean = getContourStat(image_grayscale, c)[0] 46 | stddev =getContourStat(image_grayscale, c)[1] 47 | contour['mean'] = mean[0][0] 48 | contour['stddev'] = stddev[0][0] 49 | mean_all = getContourStat(img, c)[0] 50 | stddev_all = getContourStat(img, c)[1] 51 | contour['b_mean'] = mean_all[0][0] 52 | contour['g_mean'] = mean_all[1][0] 53 | contour['r_mean'] = mean_all[2][0] 54 | contour['b_stddev'] = stddev_all[0][0] 55 | contour['g_stddev'] = stddev_all[1][0] 56 | contour['r_stddev'] = stddev_all[2][0] 57 | 58 | with open(csvpath_all, 'a') as csvfile: 59 | writer = csv.writer(csvfile) 60 | writer.writerow([contour['id'], contour['location'],contour['image'],contour['size'],contour['pole'],contour['mean'],contour['stddev'],contour['b_mean'],contour['g_mean'],contour['r_mean'],contour['b_stddev'],contour['g_stddev'],contour['r_stddev'],contour['square'],contour['ratiowh'],contour['ratioarea'],contour['approxlen'],contour['numangle'],contour['numangle90'], contour['numangle70'],contour['label'],contour['vgg_pro'],contour['vgg_class']]) 61 | csvfile.close() 62 | 63 | main() 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | -------------------------------------------------------------------------------- /data_preprocessing/mean_cache_nosplit.py: -------------------------------------------------------------------------------- 1 | #openCV lib 2 | import os 3 | 4 | 5 | import cv2 6 | import glob as gb 7 | import numpy as np 8 | import csv 9 | import math 10 | 11 | def getContourStat(img, contour): 12 | mask = np.zeros((800,800), dtype="uint8") 13 | cv2.drawContours(mask, [contour], -1, 255, -1) 14 | mean, stddev = cv2.meanStdDev(img, mask=mask) 15 | return mean, stddev 16 | 17 | def main(): 18 | # the file store the contour file 19 | csvpath_all = '/aul/homes/1019/nosplit/feature_train_all.csv' 20 | with open(csvpath_all, 'a') as csvfile: 21 | myFields = ['id','image', 'size','pole','mean','stddev','b_mean','g_mean','r_mean','b_stddev','g_stddev','r_stddev','square','ratiowh','ratioarea','approxlen','numangle','numangle90','numangle70','label','vgg_pro','vgg_class'] 22 | writer = csv.DictWriter(csvfile, fieldnames=myFields) 23 | writer.writeheader() 24 | csvfile.close() 25 | # image path 26 | 27 | csv_path = '/aul/homes/1019/nosplit/feature_train.csv' 28 | with open(csv_path, newline='') as csvfile: 29 | reader = csv.DictReader(csvfile) 30 | for row in reader: 31 | contour = row 32 | img_path_panel = '/aul/homes/final/nosplit/panel/' + contour['id'] +'.png' 33 | img_path_nopanel = '/aul/homes/final/nosplit/nopanel/' + contour['id'] +'.png' 34 | npy_path = '/aul/homes/dataset/dataset930/npy/' 35 | if (contour['label']=='1'): 36 | path = img_path_panel 37 | if (contour['label']=='0'): 38 | path = img_path_nopanel 39 | 40 | img = cv2.imread(path) 41 | c = np.load(npy_path + contour['id'] + '.npy') 42 | image_grayscale = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) 43 | mean = getContourStat(image_grayscale, c)[0] 44 | stddev =getContourStat(image_grayscale, c)[1] 45 | contour['mean'] = mean[0][0] 46 | contour['stddev'] = stddev[0][0] 47 | mean_all = getContourStat(img, c)[0] 48 | stddev_all = getContourStat(img, c)[1] 49 | contour['b_mean'] = mean_all[0][0] 50 | contour['g_mean'] = mean_all[1][0] 51 | contour['r_mean'] = mean_all[2][0] 52 | contour['b_stddev'] = stddev_all[0][0] 53 | contour['g_stddev'] = stddev_all[1][0] 54 | contour['r_stddev'] = stddev_all[2][0] 55 | 56 | with open(csvpath_all, 'a') as csvfile: 57 | writer = csv.writer(csvfile) 58 | writer.writerow([contour['id'],contour['image'],contour['size'],contour['pole'],contour['mean'],contour['stddev'],contour['b_mean'],contour['g_mean'],contour['r_mean'],contour['b_stddev'],contour['g_stddev'],contour['r_stddev'],contour['square'],contour['ratiowh'],contour['ratioarea'],contour['approxlen'],contour['numangle'],contour['numangle90'], contour['numangle70'],contour['label'],contour['vgg_pro'],contour['vgg_class']]) 59 | csvfile.close() 60 | 61 | main() 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | -------------------------------------------------------------------------------- /data_preprocessing/mean_cache_split.py: -------------------------------------------------------------------------------- 1 | # OpenCV lib 2 | import os 3 | 4 | 5 | import cv2 6 | import glob as gb 7 | import numpy as np 8 | import csv 9 | import math 10 | 11 | def getContourStat(img, contour): 12 | mask = np.zeros((800,800), dtype="uint8") 13 | cv2.drawContours(mask, [contour], -1, 255, -1) 14 | mean, stddev = cv2.meanStdDev(img, mask=mask) 15 | return mean, stddev 16 | 17 | def main(): 18 | # the file store the contour file 19 | csvpath_all = './feature_train_all.csv' 20 | with open(csvpath_all, 'a') as csvfile: 21 | myFields = ['id','location','image', 'size','pole','mean','stddev','b_mean','g_mean','r_mean','b_stddev','g_stddev','r_stddev','square','ratiowh','ratioarea','approxlen','numangle','numangle90','numangle70','label','vgg_pro','vgg_class'] 22 | writer = csv.DictWriter(csvfile, fieldnames=myFields) 23 | writer.writeheader() 24 | csvfile.close() 25 | # image path 26 | 27 | csv_path = './feature_train.csv' 28 | with open(csv_path, newline='') as csvfile: 29 | reader = csv.DictReader(csvfile) 30 | for row in reader: 31 | contour = row 32 | i = contour['location'][-1] 33 | if (i =='0'): 34 | i = '10' 35 | img_path_panel = './final_contour/house'+ str(i) +'/panel/' + contour['id'] +'.png' 36 | img_path_nopanel = './final_contour/house'+ str(i) +'/nopanel/' + contour['id'] +'.png' 37 | npy_path = './dataset930/house'+ str(i) +'/contour/' 38 | if (contour['label']=='1'): 39 | path = img_path_panel 40 | if (contour['label']=='0'): 41 | path = img_path_nopanel 42 | 43 | img = cv2.imread(path) 44 | c = np.load(npy_path + contour['id'] + '.npy') 45 | image_grayscale = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) 46 | mean = getContourStat(image_grayscale, c)[0] 47 | stddev =getContourStat(image_grayscale, c)[1] 48 | contour['mean'] = mean[0][0] 49 | contour['stddev'] = stddev[0][0] 50 | mean_all = getContourStat(img, c)[0] 51 | stddev_all = getContourStat(img, c)[1] 52 | contour['b_mean'] = mean_all[0][0] 53 | contour['g_mean'] = mean_all[1][0] 54 | contour['r_mean'] = mean_all[2][0] 55 | contour['b_stddev'] = stddev_all[0][0] 56 | contour['g_stddev'] = stddev_all[1][0] 57 | contour['r_stddev'] = stddev_all[2][0] 58 | 59 | with open(csvpath_all, 'a') as csvfile: 60 | writer = csv.writer(csvfile) 61 | writer.writerow([contour['id'], contour['location'],contour['image'],contour['size'],contour['pole'],contour['mean'],contour['stddev'],contour['b_mean'],contour['g_mean'],contour['r_mean'],contour['b_stddev'],contour['g_stddev'],contour['r_stddev'],contour['square'],contour['ratiowh'],contour['ratioarea'],contour['approxlen'],contour['numangle'],contour['numangle90'], contour['numangle70'],contour['label'],contour['vgg_pro'],contour['vgg_class']]) 62 | csvfile.close() 63 | 64 | main() 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | -------------------------------------------------------------------------------- /data_preprocessing/read_calculatemetric.py: -------------------------------------------------------------------------------- 1 | import math 2 | import csv 3 | 4 | 5 | def metric(des,panel_panel, panel_nopanel,nopanel_panel,nopanel_nopanel): 6 | metric = {} 7 | TP = int(panel_panel) 8 | FN = int(panel_nopanel) 9 | FP = int(nopanel_panel) 10 | TN = int(nopanel_nopanel) 11 | ACCURACY = float((TP + TN)/(TP + FP + FN + TN)) 12 | PRECISION = float(TP/(TP + FP)) 13 | RECALL = float(TP/(TP + FN)) 14 | F1 = float(2*PRECISION*RECALL/(PRECISION + RECALL)) 15 | MCC = float((TP * TN - FP * FN)/ math.sqrt((TP + FP) * (FN + TN) * (FP + TN) * (TP + FN))) 16 | SPECIFICITY = float(TN/(TN + FP)) 17 | metric['TP'] = float(TP/(TP + FN)) 18 | metric['FN'] = float(FN /(TP + FN)) 19 | metric['TN'] = float(TN /(TN + FP)) 20 | metric['FP'] =float(FP /(TN + FP)) 21 | metric['ACCURACY'] = ACCURACY 22 | metric['PRECISION'] =PRECISION 23 | metric['RECALL']= RECALL 24 | metric['F1'] = F1 25 | metric['MCC'] = MCC 26 | metric['SPECIFICITY'] = SPECIFICITY 27 | metric['description'] = des 28 | print(metric) 29 | csvpath = './result1.csv' 30 | with open(csvpath, 'a') as csvfile: 31 | writer = csv.writer(csvfile) 32 | writer.writerow([metric['description'],metric['TP'],metric['FN'],metric['TN'],metric['FP'],metric['ACCURACY'],metric['MCC'],metric['F1'],metric['SPECIFICITY'],metric['PRECISION'],metric['RECALL']]) 33 | csvfile.close() 34 | 35 | def main(): 36 | with open('./result.csv', newline='') as csvfile: 37 | reader = csv.DictReader(csvfile) 38 | for row in reader: 39 | metric(row['des'],row['TP'],row['FN'],row['FP'],row['TN']) 40 | csvfile.close() 41 | main() 42 | -------------------------------------------------------------------------------- /data_preprocessing/under_smaple.py: -------------------------------------------------------------------------------- 1 | import os 2 | import cv2 3 | import glob as gb 4 | num = 0 5 | img_path = gb.glob("./*.png") 6 | for path in img_path: 7 | img_name = path.split("/")[-1] 8 | img = cv2.imread(path) 9 | if ((num % 5) == 0): 10 | cv2.imwrite(os.path.join('./' + img_name),img) 11 | num = num + 1 12 | -------------------------------------------------------------------------------- /evaluation/generate_evaluation_confusionmatrics.py: -------------------------------------------------------------------------------- 1 | import math 2 | import csv 3 | 4 | 5 | def metric(des,panel_panel, panel_nopanel,nopanel_panel,nopanel_nopanel): 6 | metric = {} 7 | TP = int(panel_panel) 8 | FN = int(panel_nopanel) 9 | FP = int(nopanel_panel) 10 | TN = int(nopanel_nopanel) 11 | ACCURACY = float((TP + TN)/(TP + FP + FN + TN)) 12 | PRECISION = float(TP/(TP + FP)) 13 | RECALL = float(TP/(TP + FN)) 14 | F1 = float(2*PRECISION*RECALL/(PRECISION + RECALL)) 15 | MCC = float((TP * TN - FP * FN)/ math.sqrt((TP + FP) * (FN + TN) * (FP + TN) * (TP + FN))) 16 | SPECIFICITY = float(TN/(TN + FP)) 17 | metric['TP'] = float(TP/(TP + FN)) 18 | metric['FN'] = float(FN /(TP + FN)) 19 | metric['TN'] = float(TN /(TN + FP)) 20 | metric['FP'] =float(FP /(TN + FP)) 21 | metric['ACCURACY'] = ACCURACY 22 | metric['PRECISION'] =PRECISION 23 | metric['RECALL']= RECALL 24 | metric['F1'] = F1 25 | metric['MCC'] = MCC 26 | metric['SPECIFICITY'] = SPECIFICITY 27 | metric['description'] = des 28 | print(metric) 29 | csvpath = './resultall.csv' 30 | with open(csvpath, 'a') as csvfile: 31 | writer = csv.writer(csvfile) 32 | writer.writerow([metric['description'],metric['TP'],metric['FN'],metric['TN'],metric['FP'],metric['ACCURACY'],metric['MCC'],metric['F1'],metric['SPECIFICITY'],metric['PRECISION'],metric['RECALL']]) 33 | csvfile.close() 34 | def main(): 35 | with open('./result.csv', newline='') as csvfile: 36 | reader = csv.DictReader(csvfile) 37 | for row in reader: 38 | metric(row['des'],row['TP'],row['FN'],row['FP'],row['TN']) 39 | csvfile.close() 40 | main() -------------------------------------------------------------------------------- /evaluation/iou_calculation.py: -------------------------------------------------------------------------------- 1 | import csv 2 | import math 3 | num = {} 4 | for i in range(0,11): 5 | num[i] = 0 6 | number = 0 7 | csv_path = './rooftop_iou.csv' 8 | with open(csv_path, newline='') as csvfile: 9 | reader = csv.DictReader(csvfile) 10 | for row in reader: 11 | iou = float(row['iou']) 12 | for i in range(0,11): 13 | if (iou > i*0.1): 14 | num[i] = num[i] +1 15 | number = number + 1 16 | csvfile.close() 17 | print(num) 18 | print(number) 19 | -------------------------------------------------------------------------------- /evaluation/iou_score_generator.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import cv2 3 | import sys 4 | import csv 5 | 6 | import os.path as ospath 7 | 8 | def hotkey(): 9 | global outline_list 10 | global current_outline 11 | 12 | KEY_UNDO = ord('u') 13 | KEY_CLEAN = ord('c') 14 | KEY_NEXT = ord('n') 15 | KEY_SAVE = ord('s') 16 | KEY_QUIT = ord('q') 17 | 18 | key = cv2.waitKey(0) 19 | if key == KEY_QUIT: 20 | print('*** Quit') 21 | exit() 22 | else: 23 | print('*** Next Image') 24 | cv2.destroyAllWindows() 25 | 26 | def main(argv): 27 | # print ('Number of arguments:', len(argv), 'arguments.') 28 | # print ('Argument List:', str(argv)) 29 | contours_dir = "./data/panel/" 30 | rooftop_img_dir = "./panel/" 31 | rooftop_csv_path = './data/rooftop_solar_array_outlines_new.csv' 32 | rooftop_iou_csv_path = './rooftop_iou.csv' 33 | with open(rooftop_iou_csv_path, 'a') as csvfile: 34 | myFields = ['id', 'location_id', 'label', 'solar_list', 'contour_num','iou'] 35 | writer = csv.DictWriter(csvfile, fieldnames=myFields) 36 | writer.writeheader() 37 | with open(rooftop_csv_path, newline='') as rooftop_csv_file: 38 | reader = csv.DictReader(rooftop_csv_file) 39 | for row in reader: 40 | roof = {} 41 | roof = row 42 | contour_mask = eval(row['contour_num']) 43 | # print(contour_mask) 44 | contour_img = np.zeros((800,800,3), np.uint8) 45 | for contour in contour_mask: 46 | contour_path = contours_dir + contour + '.png' 47 | # print(contour_path ) 48 | img = cv2.imread(contour_path) 49 | # cv2.imshow('img', img) 50 | # cv2.waitKey(0) 51 | excluded_color = [0, 0, 0] 52 | indices_list = np.where(np.all(img != excluded_color, axis=-1)) 53 | contour_img[indices_list] = [255, 255, 255] 54 | # cv2.imshow('img',contour_img) 55 | # cv2.waitKey(0) 56 | 57 | solar_mask = np.zeros((800,800,3), np.uint8) 58 | outline_list = eval(row['solar_list']) 59 | for outline in outline_list: 60 | # print(outline) 61 | pts = np.asarray(outline) 62 | cv2.fillPoly(solar_mask, np.int_([pts]), (255, 255, 255)) 63 | # cv2.polylines(solar_mask, [pts], True, (0, 0, 255), 2) 64 | # cv2.imshow('img', solar_mask) 65 | # cv2.waitKey(0) 66 | # cv2.fillPoly(img_to_show, np.int_([pts]), (198, 133, 61)) 67 | # cv2.fillPoly(img_to_show, np.int_([pts]), (255, 255, 255)) 68 | # 69 | predict_gray_mask = cv2.cvtColor(contour_img, cv2.COLOR_BGR2GRAY) 70 | label_gray_mask = cv2.cvtColor(solar_mask, cv2.COLOR_BGR2GRAY) 71 | # 72 | # # rooftop_mask_size = cv2.countNonZero(rooftop_gray_mask) 73 | # # solar_mask_size = cv2.countNonZero(solar_gray_mask) 74 | # # size_ration = solar_mask_size / rooftop_mask_size 75 | # # print(rooftop_mask_size) 76 | # # print(solar_mask_size) 77 | # # print(size_ration) 78 | # 79 | # # IOU Score 80 | intersection = np.logical_and(predict_gray_mask, label_gray_mask) 81 | union = np.logical_or(predict_gray_mask, label_gray_mask) 82 | iou_score = np.sum(intersection) / np.sum(union) 83 | # print(iou_score) 84 | # 85 | # print(iou_score) 86 | # 87 | # # print(size_ration/iou_score) 88 | 89 | # cv2.imshow(row['id'], img_to_show) 90 | 91 | # hotkey() 92 | roof['iou'] = iou_score 93 | with open(rooftop_iou_csv_path, 'a') as csvfile_new: 94 | writer = csv.writer(csvfile_new) 95 | writer.writerow([roof['id'], roof['location_id'], roof['label'], 96 | roof['solar_list'], roof['contour_num'],roof['iou']]) 97 | csvfile_new.close() 98 | 99 | rooftop_csv_file.close() 100 | 101 | if __name__ == "__main__": 102 | main(sys.argv[1:]) -------------------------------------------------------------------------------- /evaluation/orientation_calculate.py: -------------------------------------------------------------------------------- 1 | import csv 2 | import math 3 | csv_path = './orientation_positive.csv' 4 | num_all = 0 5 | num_5 = 0 6 | num_10 = 0 7 | num_15 = 0 8 | num_20 = 0 9 | with open(csv_path, newline='') as csvfile: 10 | reader = csv.DictReader(csvfile) 11 | for row in reader: 12 | contour_orientation =float(row['contour']) 13 | roof_orientation = float(row['roof']) 14 | contour_orientation_45differ = math.fabs(math.fabs(contour_orientation)- 45) 15 | roof_orientation_45differ = math.fabs(math.fabs(roof_orientation)- 45) 16 | differ = math.fabs(contour_orientation_45differ - roof_orientation_45differ) 17 | if(differ < 5): 18 | num_5 = num_5 + 1 19 | if (differ < 10): 20 | num_10 = num_10 + 1 21 | if (differ < 15): 22 | num_15 = num_15 + 1 23 | if (differ < 20): 24 | num_20 = num_20 + 1 25 | num_all = num_all + 1 26 | csvfile.close() 27 | percent_5 = num_5 /num_all 28 | percent_10 = num_10 /num_all 29 | percent_15 = num_15 /num_all 30 | percent_20 = num_20 /num_all 31 | 32 | print(num_all ,num_5,num_10,num_15,num_20) 33 | print(percent_5,percent_10,percent_15,percent_20) -------------------------------------------------------------------------------- /evaluation/postive_contour_generate.py: -------------------------------------------------------------------------------- 1 | 2 | import csv 3 | csv_path ='./feature_test_all_vgg_svm_linear.csv' 4 | csv_path_new = './contour_all_positive.csv' 5 | with open(csv_path_new, 'a') as csvfile: 6 | myFields = ['id', 'location','image', 'label','predict'] 7 | writer = csv.DictWriter(csvfile, fieldnames=myFields) 8 | writer.writeheader() 9 | csvfile.close() 10 | with open(csv_path, newline='') as csvfile: 11 | reader = csv.DictReader(csvfile) 12 | for row in reader: 13 | contour = row 14 | if(contour['linear_nosplit_class']== '1'): 15 | with open(csv_path_new , 'a') as csvfile_new: 16 | writer = csv.writer(csvfile_new) 17 | writer.writerow([contour['id'], contour['location'], contour['image'],contour['label'], contour['linear_nosplit_class']]) 18 | csvfile_new.close() 19 | csvfile.close() 20 | -------------------------------------------------------------------------------- /evaluation/roof_contour_match.py: -------------------------------------------------------------------------------- 1 | import csv 2 | csv_path = './rooftop_solar_array_outlines.csv' 3 | csv_path_new = './rooftop_solar_array_outlines_new.csv' 4 | csv_path_contour = './contour_all_positive.csv' 5 | with open(csv_path_new, 'a') as csvfile: 6 | myFields = ['id', 'location','location_id','label','solar_list','contour_num'] 7 | writer = csv.DictWriter(csvfile, fieldnames=myFields) 8 | writer.writeheader() 9 | with open(csv_path, newline='') as csvfile: 10 | reader = csv.DictReader(csvfile) 11 | for row in reader: 12 | contour = row 13 | img_name = contour['id'] 14 | contour_num = [] 15 | with open(csv_path_contour, newline='') as csv_file: 16 | reader = csv.DictReader(csv_file) 17 | for row in reader: 18 | if(row['image'] == img_name): 19 | if (row['id'] not in contour_num): 20 | contour_num.append(row['id']) 21 | else: 22 | pass 23 | print(contour_num) 24 | csv_file.close() 25 | contour['contour_num'] = contour_num 26 | with open(csv_path_new, 'a') as csvfile_new: 27 | writer = csv.writer(csvfile_new) 28 | writer.writerow([contour['id'], contour['location'], contour['location_id'], contour['label'], 29 | contour['solar_list'],contour['contour_num']]) 30 | csvfile_new.close() 31 | csvfile.close() 32 | 33 | 34 | -------------------------------------------------------------------------------- /evaluation/solar_array_orientation.py: -------------------------------------------------------------------------------- 1 | import os 2 | import cv2 3 | from skimage.segmentation import slic 4 | from skimage import color 5 | from skimage import data 6 | from skimage import io 7 | # Traverse files 8 | import glob as gb 9 | # Math lib 10 | import numpy as np 11 | import time 12 | import matplotlib.pyplot as plt 13 | import matplotlib.gridspec as gridspec 14 | import math 15 | import csv 16 | import os.path as path 17 | 18 | from matplotlib.pyplot import imshow 19 | import matplotlib.pyplot as plt 20 | import matplotlib.image as mpimg 21 | def cal_roofarea(image): 22 | black = cv2.threshold(image, 0, 255, 0)[1] 23 | # cv2.imshow('img', black) 24 | # cv2.waitKey(0) 25 | contours, hierarchy = cv2.findContours(black, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE) 26 | # cv2.drawContours(img, contours, -1, (255, 0, 0), 2) 27 | area = [cv2.contourArea(c) for c in contours] 28 | roof_index = np.argmax(area) 29 | roof_cnt = contours[roof_index] 30 | # contourArea will return the wrong value if the contours are self-intersections 31 | roof_area = cv2.contourArea(roof_cnt) 32 | #print('roof area = '+ str(roof_area)) 33 | return (roof_area,roof_cnt) 34 | 35 | 36 | 37 | 38 | img_path = './panel/' 39 | contours_path = './projects/data/panel/' 40 | csv_path = './vggsvmlogicalregression2features.csv' 41 | with open('./data/orientation_positive.csv', 'a') as csvfile: 42 | myFields = ['id', 'image','contour','roof'] 43 | writer = csv.DictWriter(csvfile, fieldnames=myFields) 44 | writer.writeheader() 45 | csvfile.close() 46 | # num_all = 0 47 | # num_5 =0 48 | # num_10 =0 49 | # num_15 = 0 50 | with open(csv_path, newline='') as csvfile: 51 | reader = csv.DictReader(csvfile) 52 | for row in reader: 53 | orientation = {} 54 | if(row['label'] == '1' and row['vggsvmlogicalregression2features']=='1'): 55 | orientation['id'] =row['id'] 56 | orientation['image'] = row['image'] 57 | img_name = row['image'] 58 | contour_name = row['id'] 59 | image_path = img_path + img_name + '.png' 60 | contour_path = img_path + contour_name + '.png' 61 | if path.exists(image_path): 62 | if path.exists(contour_path ): 63 | img_roof = cv2.imread(image_path) 64 | img_contour = cv2.imread(contour_path) 65 | # cal_roofarea(img) 66 | img_contour_grayscale = cv2.cvtColor(img_contour, cv2.COLOR_BGR2GRAY) 67 | cont_contour = cal_roofarea(img_contour_grayscale)[1] 68 | cv2.drawContours(img_contour, cont_contour, -1, (0, 0, 255), -1) 69 | rect_contour = cv2.minAreaRect(cont_contour) 70 | orientation['contour'] = rect_contour[2] 71 | # print(rect_contour[2]) 72 | # box_contour = cv2.boxPoints(rect_contour) 73 | # box = np.int0(box) 74 | # print(box) 75 | # cv2.drawContours(img_contour, [box], 0, (255, 0, 0), 1) 76 | img_roof_grayscale = cv2.cvtColor(img_roof, cv2.COLOR_BGR2GRAY) 77 | cont_roof = cal_roofarea(img_roof_grayscale )[1] 78 | # cv2.drawContours(img , cont, -1, (0, 0, 255), -1) 79 | rect_roof = cv2.minAreaRect(cont_roof) 80 | orientation['roof'] = rect_roof[2] 81 | # print(rect[2]) 82 | # box = cv2.boxPoints(rect) 83 | # box = np.int0(box) 84 | # # print(box) 85 | # cv2.drawContours(img, [box], 0, (255, 0, 0), 1) 86 | # 87 | # x, y, w, h = cv2.boundingRect(cont) 88 | # cv2.rectangle(img, (x, y), (x + w, y + h), (255, 0, 0), 2) 89 | # print(x,y,w,h) 90 | # print(cal_roofarea(cont)[0]) 91 | print(orientation) 92 | # cv2.imshow('img', img_contour) 93 | # cv2.waitKey(0) 94 | with open('./data/orientation_positive.csv', 'a') as csvfile: 95 | writer = csv.writer(csvfile) 96 | writer.writerow([orientation['id'], orientation['image'], orientation['contour'],orientation['roof']]) 97 | csvfile.close() 98 | 99 | 100 | csvfile.close() -------------------------------------------------------------------------------- /image/pipeline.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cyber-physical-systems/SolarFinder/69e833f0e03093a3151ff718f5a313ffe2ed8944/image/pipeline.png -------------------------------------------------------------------------------- /models/PCA/components.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from sklearn.decomposition import PCA 3 | from sklearn.preprocessing import MinMaxScaler 4 | 5 | filepath = './vgg_predict.csv' #your path here 6 | data = np.genfromtxt(filepath, delimiter=',', dtype='float64') 7 | 8 | scaler = MinMaxScaler(feature_range=[0, 1]) 9 | data_rescaled = scaler.fit_transform(data[1:, 3:13]) 10 | #Fitting the PCA algorithm with our Data 11 | pca = PCA().fit(data_rescaled) 12 | #Plotting the Cumulative Summation of the Explained Variance 13 | plt.figure() 14 | plt.plot(np.cumsum(pca.explained_variance_ratio_)) 15 | plt.xlabel('Number of Components') 16 | plt.ylabel('Variance (%)') #for each component 17 | plt.title('Pulsar Dataset Explained Variance') 18 | plt.savefig('pca.png') 19 | plt.show() 20 | 21 | -------------------------------------------------------------------------------- /models/PCA/pca.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import numpy as np 3 | import matplotlib.pyplot as plt 4 | from sklearn.preprocessing import StandardScaler 5 | 6 | import numpy as np 7 | from sklearn.metrics import classification_report, confusion_matrix 8 | from sklearn.model_selection import train_test_split 9 | col_names = ['id', 'image', 'size','pole','mean','square','ratiowh','ratioarea','approxlen','numangle','numangle90','numangle70','prediction','prediction_class','label'] 10 | # load dataset 11 | data = pd.read_csv("./vgg_predict.csv", header=None, names=col_names) 12 | data = data.dropna() 13 | # feature_cols = ['pole','mean','stddev','square','ratiowh','ratioarea','approxlen','numangle','numangle90','prediction'] 14 | feature_cols = ['pole','mean','square','ratiowh','ratioarea','approxlen','numangle','numangle90','numangle70'] 15 | # feature_cols = ['pole','prediction'] 16 | X = data[feature_cols] 17 | y = data.label 18 | scaler = StandardScaler() 19 | X = scaler.fit_transform(X)# Features 20 | from sklearn.decomposition import PCA 21 | 22 | X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.20) 23 | pca = PCA(n_components=6) 24 | X_train = pca.fit_transform(X_train) 25 | X_test = pca.transform(X_test) 26 | 27 | 28 | from sklearn.svm import SVC 29 | svclassifier = SVC(kernel='poly',degree = 7,class_weight='balanced', random_state=0) 30 | svclassifier.fit(X_train, y_train) 31 | y_pred = svclassifier.predict(X_test) 32 | from sklearn.metrics import classification_report, confusion_matrix 33 | print(confusion_matrix(y_test,y_pred)) 34 | print(classification_report(y_test,y_pred)) 35 | 36 | -------------------------------------------------------------------------------- /models/hybrid/hybrid.py: -------------------------------------------------------------------------------- 1 | import pandas 2 | import pandas as pd 3 | import pickle 4 | from sklearn.linear_model import LogisticRegression 5 | from sklearn import metrics 6 | from sklearn import datasets 7 | from sklearn.preprocessing import StandardScaler 8 | import numpy as np 9 | from sklearn.metrics import classification_report, confusion_matrix 10 | import csv 11 | import time 12 | start_time = time.time() 13 | 14 | 15 | 16 | data = pd.read_csv("./svmrbftrainprobility.csv") 17 | data = data.dropna() 18 | feature_cols = ['vgg_pro','vgg_class','svmrbf_class','svmrbfpro'] 19 | X = data[feature_cols] 20 | 21 | scaler = StandardScaler() 22 | X = scaler.fit_transform(X)# Features 23 | 24 | y = data.label # Target variable 25 | 26 | # from sklearn.model_selection import train_test_split 27 | # X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.3,random_state=0) 28 | X_train = X 29 | y_train = y 30 | 31 | from sklearn.svm import SVC 32 | svclassifier = SVC(kernel='rbf',class_weight='balanced') 33 | model = svclassifier.fit(X_train, y_train) 34 | 35 | 36 | # instantiate the model (using the default parameters) 37 | 38 | # fit the model with data 39 | model.fit(X_train, y_train) 40 | from sklearn.externals import joblib 41 | from joblib import dump, load 42 | dump(model, 'svmrbfhybrid.joblib') 43 | # model = load('svmrbfhybrid.joblib') 44 | 45 | from sklearn import metrics 46 | 47 | 48 | 49 | 50 | datatest = pd.read_csv("./svmrbftestpro.csv") 51 | datatest = datatest.dropna() 52 | feature_cols = ['vgg_pro','vgg_class','svmrbf_class','svmrbfpro'] 53 | Xtest = datatest[feature_cols] 54 | scaler = StandardScaler() 55 | Xtest = scaler.fit_transform(Xtest)# Features 56 | ytest = datatest.label # Target variable 57 | 58 | y_predict= model.predict(Xtest) 59 | 60 | 61 | df = pd.DataFrame(datatest) 62 | df.insert(25, "hybrid", y_predict, True) 63 | 64 | export_csv = df.to_csv ('./svmrbftestprohybrid.csv', index = None) 65 | print(confusion_matrix(ytest, y_predict)) 66 | tn, fp, fn, tp = confusion_matrix(ytest, y_predict, labels=[0,1]).ravel() 67 | print(tn,fp,fn,tp) 68 | with open('./result.csv', 'a') as csvfile: 69 | writer = csv.writer(csvfile) 70 | writer.writerow(['hybrid',tn,fp,fn,tp]) 71 | csvfile.close() 72 | time = time.time() - start_time 73 | with open('./time.csv', 'a') as csvfile: 74 | writer = csv.writer(csvfile) 75 | writer.writerow(['hybrid',time]) 76 | csvfile.close() 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | -------------------------------------------------------------------------------- /models/hybrid/linear_model/linearmodel.py: -------------------------------------------------------------------------------- 1 | mport pandas as pd 2 | import numpy as np 3 | from sklearn.linear_model import LogisticRegression 4 | import matplotlib.pyplot as plt 5 | from sklearn.preprocessing import StandardScaler 6 | 7 | from sklearn.metrics import classification_report, confusion_matrix 8 | col_names = ['id', 'image', 'size','pole','mean','square','ratiowh','ratioarea','approxlen','numangle','numangle90','numangle70','prediction','prediction_class','label'] 9 | # load dataset 10 | data = pd.read_csv("./train/vgg_predict.csv", header=None, names=col_names) 11 | data = data.dropna() 12 | # feature_cols = ['pole','mean','stddev','square','ratiowh','ratioarea','approxlen','numangle','numangle90','prediction'] 13 | feature_cols = ['pole','mean','square','ratiowh','ratioarea','approxlen','numangle','numangle90','numangle70','prediction'] 14 | # feature_cols = ['pole','prediction'] 15 | X = data[feature_cols] 16 | 17 | scaler = StandardScaler() 18 | X = scaler.fit_transform(X)# Features 19 | 20 | y = data.label # Target variable 21 | 22 | X_train = X 23 | y_train = y 24 | from sklearn import linear_model 25 | 26 | from sklearn.linear_model import LogisticRegression 27 | from sklearn import metrics 28 | from sklearn.linear_model import LogisticRegressionCV 29 | from sklearn.linear_model import RidgeClassifier 30 | from sklearn.linear_model import RidgeClassifierCV 31 | from sklearn.linear_model import PassiveAggressiveClassifier 32 | from sklearn.datasets import make_classification 33 | X, y = make_classification(n_features=4, random_state=0) 34 | model =PassiveAggressiveClassifier(max_iter=1000, random_state=0,tol=1e-3,class_weight = 'balanced') 35 | 36 | 37 | # fit the model with data 38 | model.fit(X_train, y_train) 39 | 40 | col_names = ['id', 'image', 'size','pole','mean','square','ratiowh','ratioarea','approxlen','numangle','numangle90','numangle70','prediction','prediction_class','label','lrpredict','svmpredict'] 41 | # load dataset 42 | data = pd.read_csv("./vgg_predict.csv", header=None, names=col_names) 43 | 44 | data = data.dropna() 45 | # feature_cols = ['pole','mean','stddev','square','ratiowh','ratioarea','approxlen','numangle','numangle90','prediction'] 46 | feature_cols = ['pole','mean','square','ratiowh','ratioarea','approxlen','numangle','numangle90','numangle70','prediction'] 47 | 48 | X1 = data[feature_cols] 49 | 50 | scaler = StandardScaler() 51 | X1 = scaler.fit_transform(X1)# Features 52 | 53 | y1 = data.label # Target variable 54 | 55 | 56 | y_pred1 = model.predict(X1) 57 | 58 | 59 | 60 | print(confusion_matrix(y1,y_pred1 )) 61 | print(classification_report(y1,y_pred1 )) 62 | 63 | -------------------------------------------------------------------------------- /models/hybrid/union_differentweights.py: -------------------------------------------------------------------------------- 1 | import pandas 2 | import pandas as pd 3 | import pickle 4 | from sklearn.linear_model import LogisticRegression 5 | from sklearn import metrics 6 | from sklearn import datasets 7 | from sklearn.preprocessing import StandardScaler 8 | import numpy as np 9 | from sklearn.metrics import classification_report, confusion_matrix 10 | import pandas 11 | import pandas as pd 12 | import pickle 13 | from sklearn.linear_model import LogisticRegression 14 | from sklearn import metrics 15 | from sklearn import datasets 16 | from sklearn.preprocessing import StandardScaler 17 | import numpy as np 18 | from sklearn.metrics import classification_report, confusion_matrix 19 | import csv 20 | dataset1 = pd.read_csv("./non_split_test_result.csv") 21 | dataset1 = dataset1.dropna() 22 | df = pd.DataFrame(dataset1) 23 | 24 | # def f(x,y): 25 | # # print(x,y) 26 | # return round(0.5*x + 0.5*y) 27 | 28 | ytest1 = dataset1.label 29 | 30 | 31 | y_predict1=dataset1.hard_pred_label 32 | print(confusion_matrix(ytest1, y_predict1)) 33 | tn, fp, fn, tp = confusion_matrix(ytest1, y_predict1, labels=[0,1]).ravel() 34 | print(tn,fp,fn,tp) 35 | 36 | 37 | -------------------------------------------------------------------------------- /models/logical_regression/logical_model_test.py: -------------------------------------------------------------------------------- 1 | import pandas 2 | import pandas as pd 3 | import pickle 4 | from sklearn.linear_model import LogisticRegression 5 | from sklearn import metrics 6 | from sklearn import datasets 7 | from sklearn.preprocessing import StandardScaler 8 | import numpy as np 9 | col_names = ['id','image', 'size','pole','mean','square','ratiowh','ratioarea','approxlen','numangle','numangle90','numangle70','prediction','prediction_class','label'] 10 | # load dataset 11 | data = pd.read_csv("./final/nosplit/test/vgg_predict.csv", header=None, names=col_names) 12 | data = data.dropna() 13 | feature_cols = ['pole','mean','square','ratiowh','ratioarea','approxlen','numangle','numangle90','numangle70','prediction'] 14 | X = data[feature_cols] 15 | 16 | scaler = StandardScaler() 17 | testX = scaler.fit_transform(X)# Features 18 | 19 | testy = data.label # Target variable 20 | 21 | 22 | filename ='./' + 'RLmodel.sav' 23 | # pickle.dump(model, open(filename, 'wb')) 24 | 25 | model = pickle.load(open(filename, 'rb')) 26 | lr_probs = model.predict_proba(testX) 27 | # keep probabilities for the positive outcome only 28 | lr_probs = lr_probs[:, 1] 29 | # calculate scores 30 | ns_auc = roc_auc_score(testy, ns_probs) 31 | lr_auc = roc_auc_score(testy, lr_probs) 32 | # summarize scores 33 | print('No Skill: ROC AUC=%.3f' % (ns_auc)) 34 | print('Logistic: ROC AUC=%.3f' % (lr_auc)) 35 | # calculate roc curves 36 | ns_fpr, ns_tpr, _ = roc_curve(testy, ns_probs) 37 | lr_fpr, lr_tpr, _ = roc_curve(testy, lr_probs) 38 | # plot the roc curve for the model 39 | pyplot.plot(ns_fpr, ns_tpr, linestyle='--', label='No Skill') 40 | pyplot.plot(lr_fpr, lr_tpr, marker='.', label='Logistic') 41 | # axis labels 42 | pyplot.xlabel('False Positive Rate') 43 | pyplot.ylabel('True Positive Rate') 44 | # show the legend 45 | pyplot.legend() 46 | # show the plot 47 | pyplot.show() 48 | 49 | 50 | -------------------------------------------------------------------------------- /models/logical_regression/logical_model_train.py: -------------------------------------------------------------------------------- 1 | import pandas 2 | import pandas as pd 3 | import pickle 4 | from sklearn.linear_model import LogisticRegression 5 | from sklearn import metrics 6 | from sklearn import datasets 7 | from sklearn.preprocessing import StandardScaler 8 | import numpy as np 9 | col_names = ['id', 'location', 'image', 'size','pole','mean','stddev','square','ratiowh','ratioarea','approxlen','numangle','numangle90','prediction','prediction_class','label'] 10 | # load dataset 11 | data = pd.read_csv("./location810/lr.csv", header=None, names=col_names) 12 | data = data.dropna() 13 | feature_cols = ['pole','mean','stddev','square','ratiowh','ratioarea','approxlen','numangle','numangle90','prediction'] 14 | X = data[feature_cols] 15 | 16 | scaler = StandardScaler() 17 | X = scaler.fit_transform(X)# Features 18 | 19 | y = data.label # Target variable 20 | 21 | from sklearn.model_selection import train_test_split 22 | X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.3,random_state=0) 23 | 24 | from sklearn.linear_model import LogisticRegression 25 | from sklearn import metrics 26 | 27 | # instantiate the model (using the default parameters) 28 | model = LogisticRegression(class_weight = 'balanced') 29 | 30 | # fit the model with data 31 | model.fit(X_train, y_train) 32 | print(model.coef_ ) 33 | print(model.intercept_ ) 34 | filename = 'RLmodel.sav' 35 | pickle.dump(model, open(filename, 'wb')) 36 | 37 | loaded_model = pickle.load(open(filename, 'rb')) 38 | result = loaded_model.score(X_test, y_test) 39 | print(result) 40 | y_predict= model.predict(X_test) 41 | print("Y predict/hat ", y_predict) 42 | print(metrics.confusion_matrix(y_test, y_predict)) 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | import pandas 53 | import pandas as pd 54 | import pickle 55 | from sklearn.linear_model import LogisticRegression 56 | from sklearn import metrics 57 | from sklearn import datasets 58 | from sklearn.preprocessing import StandardScaler 59 | import numpy as np 60 | col_names = ['id','image', 'size','pole','mean','square','ratiowh','ratioarea','approxlen','numangle','numangle90','numangle70','prediction','prediction_class','label'] 61 | # load dataset 62 | data = pd.read_csv("./vgg_predict.csv", header=None, names=col_names) 63 | data = data.dropna() 64 | feature_cols = ['pole','mean','square','ratiowh','ratioarea','approxlen','numangle','numangle90','numangle70','prediction'] 65 | X = data[feature_cols] 66 | 67 | scaler = StandardScaler() 68 | X = scaler.fit_transform(X)# Features 69 | 70 | y = data.label # Target variable 71 | 72 | 73 | filename ='./nosplit/' + 'RLmodel.sav' 74 | # pickle.dump(model, open(filename, 'wb')) 75 | 76 | loaded_model = pickle.load(open(filename, 'rb')) 77 | result = loaded_model.score(X, y) 78 | print(result) 79 | y_predict= loaded_model.predict(X) 80 | print("Y predict/hat ", y_predict) 81 | print(metrics.confusion_matrix(y, y_predict)) 82 | 83 | y_predict= loaded_model.predict(X) 84 | print(y_predict) 85 | 86 | 87 | # Convert the dictionary into DataFrame 88 | df = pd.DataFrame(data) 89 | 90 | # Using DataFrame.insert() to add a column 91 | df.insert(15, "predict", y_predict, True) 92 | 93 | # Observe the result 94 | 95 | export_csv = df.to_csv ('./vgg_predict.csv', index = None, header=False) 96 | 97 | 98 | 99 | 100 | -------------------------------------------------------------------------------- /models/logical_regression/lrmodel.py: -------------------------------------------------------------------------------- 1 | import pandas 2 | import pandas as pd 3 | import pickle 4 | from sklearn.linear_model import LogisticRegression 5 | from sklearn import metrics 6 | from sklearn import datasets 7 | from sklearn.preprocessing import StandardScaler 8 | import numpy as np 9 | from sklearn.metrics import classification_report, confusion_matrix 10 | import csv 11 | 12 | data = pd.read_csv("./feature_17_all.csv") 13 | data = data.dropna() 14 | feature_cols = ['size','pole','mean','stddev','b_mean','g_mean','r_mean','b_stddev','g_stddev','r_stddev','square','ratiowh','ratioarea','approxlen','numangle','numangle90','numangle70'] 15 | X = data[feature_cols] 16 | 17 | scaler = StandardScaler() 18 | X = scaler.fit_transform(X)# Features 19 | 20 | y = data.label # Target variable 21 | 22 | # from sklearn.model_selection import train_test_split 23 | # X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.3,random_state=0) 24 | 25 | from sklearn.linear_model import LogisticRegression 26 | from sklearn import metrics 27 | 28 | # instantiate the model (using the default parameters) 29 | model = LogisticRegression(class_weight = 'balanced') 30 | X_train = X 31 | y_train = y 32 | # fit the model with data 33 | model.fit(X_train, y_train) 34 | 35 | 36 | 37 | datatest = pd.read_csv("./feature_810_all.csv") 38 | datatest = datatest.dropna() 39 | feature_cols = ['size','pole','mean','stddev','b_mean','g_mean','r_mean','b_stddev','g_stddev','r_stddev','square','ratiowh','ratioarea','approxlen','numangle','numangle90','numangle70'] 40 | Xtest = datatest[feature_cols] 41 | scaler = StandardScaler() 42 | Xtest = scaler.fit_transform(Xtest)# Features 43 | ytest = datatest.label # Target variable 44 | 45 | 46 | y_predict= model.predict(Xtest) 47 | y_pro = model.predict_proba(Xtest)[:,1] 48 | print(y_pro) 49 | 50 | 51 | df = pd.DataFrame(datatest) 52 | df.insert(23, "lr_class", y_predict, True) 53 | df.insert(24, "lr_pro", y_pro , True) 54 | export_csv = df.to_csv ('./lrmodel.csv', index = None) 55 | print(confusion_matrix(ytest, y_predict)) 56 | tn, fp, fn, tp = confusion_matrix(ytest, y_predict, labels=[0,1]).ravel() 57 | print(tn,fp,fn,tp) 58 | with open('./result.csv', 'a') as csvfile: 59 | writer = csv.writer(csvfile) 60 | writer.writerow([tn,fp,fn,tp]) 61 | csvfile.close() 62 | 63 | 64 | -------------------------------------------------------------------------------- /models/logical_regression/vgg.py: -------------------------------------------------------------------------------- 1 | from keras.models import Sequential 2 | from keras.layers import Dense 3 | import numpy 4 | import os 5 | # fix random seed for reproducibility 6 | seed = 7 7 | numpy.random.seed(seed) 8 | # load pima indians dataset 9 | dataset = numpy.loadtxt('./lr_train.csv', delimiter=",") 10 | # split into input (X) and output (Y) variables 11 | X = dataset[:,2:13] 12 | Y = dataset[:,14] 13 | # create model 14 | model = Sequential() 15 | model.add(Dense(12, input_dim=11, init='uniform', activation='relu')) 16 | model.add(Dense(8, init='uniform', activation='relu')) 17 | model.add(Dense(1, init='uniform', activation='sigmoid')) 18 | # Compile model 19 | model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy']) # Fit the model 20 | model.fit(X, Y, nb_epoch=5, batch_size=128) 21 | datasettest = numpy.loadtxt('./vggtest.csv', delimiter=",") 22 | # split into input (X) and output (Y) variables 23 | Xtest = datasettest[:,2:13] 24 | Ytest = datasettest[:,14] 25 | 26 | 27 | # evaluate the model 28 | scores = model.evaluate(Xtest, Ytest) 29 | print("%s: %.2f%%" % (model.metrics_names[1], scores[1]*100)) 30 | 31 | 32 | 33 | 34 | 35 | -------------------------------------------------------------------------------- /models/logical_regression/vgg_physical_integration.py: -------------------------------------------------------------------------------- 1 | import csv 2 | physical_feature_path = './location17/contour_all.csv' 3 | vgg_predict_path = './location17/vgg_predict.csv' 4 | lr_path = './location17/lr.csv' 5 | 6 | with open(lr_path, 'a') as csvfile: 7 | myFields = ['id', 'location', 'image', 'size','pole','mean','stddev','square','ratiowh','ratioarea','approxlen','numangle','numangle90','numangle70','prediction','prediction_class','label',] 8 | writer = csv.DictWriter(csvfile, fieldnames=myFields) 9 | writer.writeheader() 10 | csvfile.close() 11 | 12 | with open(physical_feature_path, newline='') as phyfile: 13 | contour = {} 14 | reader = csv.DictReader(phyfile) 15 | for phy in reader: 16 | contour = phy 17 | 18 | with open(vgg_predict_path, newline='') as vggfile: 19 | reader = csv.DictReader(vggfile) 20 | for vgg in reader: 21 | if (vgg['id'] ==contour['id']): 22 | contour['prediction'] = vgg['prediction'] 23 | contour['prediction_class'] = vgg['prediction_class'] 24 | vggfile.close() 25 | with open(lr_path, 'a') as lrfile: 26 | writer = csv.writer(lrfile) 27 | writer.writerow([contour['id'], contour['location'],contour['image'],contour['size'],contour['pole'],contour['mean'],contour['stddev'],contour['square'],contour['ratiowh'],contour['ratioarea'],contour['approxlen'],contour['numangle'],contour['numangle90'], contour['prediction'],contour['prediction_class'],contour['label']]) 28 | lrfile.close() 29 | phyfile.close() 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | -------------------------------------------------------------------------------- /models/random_forest/random_forest.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import numpy as np 3 | import matplotlib.pyplot as plt 4 | from sklearn.preprocessing import StandardScaler 5 | %matplotlib inline 6 | 7 | col_names = ['id', 'image', 'size','pole','mean','square','ratiowh','ratioarea','approxlen','numangle','numangle90','numangle70','prediction','prediction_class','label'] 8 | # load dataset 9 | data = pd.read_csv(".vgg_predict.csv", header=None, names=col_names) 10 | data = data.dropna() 11 | # feature_cols = ['pole','mean','stddev','square','ratiowh','ratioarea','approxlen','numangle','numangle90','prediction'] 12 | feature_cols = ['pole','mean','square','ratiowh','ratioarea','approxlen','numangle','numangle90','numangle70'] 13 | # feature_cols = ['pole','prediction'] 14 | X = data[feature_cols] 15 | 16 | scaler = StandardScaler() 17 | X = scaler.fit_transform(X)# Features 18 | 19 | y = data.label # Target variable 20 | 21 | X_train = X 22 | y_train = y 23 | from sklearn.ensemble import RandomForestClassifier 24 | clf = RandomForestClassifier(n_estimators=100, max_depth=2,random_state=0,class_weight='balanced') 25 | 26 | model = clf.fit(X_train, y_train) 27 | # # y_pred = svclassifier.predict(X_test) 28 | # # from sklearn.metrics import classification_report, confusion_matrix 29 | # # print(confusion_matrix(y_test,y_pred)) 30 | # # print(classification_report(y_test,y_pred)) 31 | 32 | col_names = ['id', 'image', 'size','pole','mean','square','ratiowh','ratioarea','approxlen','numangle','numangle90','numangle70','prediction','prediction_class','label','lrpredict','svmpredict'] 33 | # load dataset 34 | data = pd.read_csv("./vgg_predict.csv", header=None, names=col_names) 35 | 36 | data = data.dropna() 37 | # # feature_cols = ['pole','mean','stddev','square','ratiowh','ratioarea','approxlen','numangle','numangle90','prediction'] 38 | feature_cols = ['pole','mean','square','ratiowh','ratioarea','approxlen','numangle','numangle90','numangle70'] 39 | 40 | X1 = data[feature_cols] 41 | 42 | scaler = StandardScaler() 43 | X1 = scaler.fit_transform(X1)# Features 44 | 45 | y1 = data.label # Target variable 46 | y_pred1 = model.predict(X1) 47 | 48 | 49 | from sklearn.metrics import classification_report, confusion_matrix 50 | from sklearn.metrics import accuracy_score 51 | from sklearn.metrics import cohen_kappa_score 52 | from sklearn import metrics 53 | from sklearn.metrics import precision_recall_curve 54 | from sklearn.metrics import average_precision_score 55 | from sklearn.metrics import matthews_corrcoef 56 | from sklearn.metrics import roc_auc_score 57 | from sklearn.metrics import balanced_accuracy_score 58 | print(confusion_matrix(y1,y_pred1)) 59 | print(classification_report(y1,y_pred1)) 60 | print(accuracy_score(y1,y_pred1)) 61 | print(balanced_accuracy_score(y1,y_pred1)) 62 | print(metrics.precision_score(y1,y_pred1)) 63 | print(metrics.recall_score(y1,y_pred1)) 64 | print(metrics.f1_score(y1,y_pred1)) 65 | print(matthews_corrcoef(y1,y_pred1)) 66 | print(roc_auc_score(y1,y_pred1)) -------------------------------------------------------------------------------- /models/svm/svm10.py: -------------------------------------------------------------------------------- 1 | import pandas 2 | import pandas as pd 3 | import pickle 4 | from sklearn.linear_model import LogisticRegression 5 | from sklearn import metrics 6 | from sklearn import datasets 7 | from sklearn.preprocessing import StandardScaler 8 | import numpy as np 9 | from sklearn.metrics import classification_report, confusion_matrix 10 | import csv 11 | import time 12 | start_time = time.time() 13 | 14 | 15 | 16 | data = pd.read_csv("./feature_17_all.csv") 17 | data = data.dropna() 18 | feature_cols = ['size','pole','mean','stddev','b_mean','g_mean','r_mean','b_stddev','g_stddev','r_stddev','square','ratiowh','ratioarea','approxlen','numangle','numangle90','numangle70'] 19 | X = data[feature_cols] 20 | 21 | scaler = StandardScaler() 22 | X = scaler.fit_transform(X)# Features 23 | 24 | y = data.label # Target variable 25 | 26 | # from sklearn.model_selection import train_test_split 27 | # X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.3,random_state=0) 28 | X_train = X 29 | y_train = y 30 | 31 | from sklearn.svm import SVC 32 | svclassifier = SVC(kernel='poly',class_weight='balanced', degree=10, random_state=0,probability=True) 33 | model = svclassifier.fit(X_train, y_train) 34 | 35 | 36 | from sklearn import metrics 37 | 38 | # instantiate the model (using the default parameters) 39 | 40 | 41 | # fit the model with data 42 | model.fit(X_train, y_train) 43 | 44 | 45 | 46 | datatest = pd.read_csv("./feature_810_all.csv") 47 | datatest = datatest.dropna() 48 | feature_cols = ['size','pole','mean','stddev','b_mean','g_mean','r_mean','b_stddev','g_stddev','r_stddev','square','ratiowh','ratioarea','approxlen','numangle','numangle90','numangle70'] 49 | Xtest = datatest[feature_cols] 50 | scaler = StandardScaler() 51 | Xtest = scaler.fit_transform(Xtest)# Features 52 | ytest = datatest.label # Target variable 53 | 54 | y_predict= model.predict(Xtest) 55 | y_pro = model.predict_proba(Xtest)[:,1] 56 | 57 | 58 | 59 | df = pd.DataFrame(datatest) 60 | df.insert(23, "svm_poly10_class", y_predict, True) 61 | df.insert(24, "svm_poly10_pro", y_predict, True) 62 | export_csv = df.to_csv ('./svm_poly10.csv', index = None) 63 | print(confusion_matrix(ytest, y_predict)) 64 | tn, fp, fn, tp = confusion_matrix(ytest, y_predict, labels=[0,1]).ravel() 65 | print(tn,fp,fn,tp) 66 | with open('./result.csv', 'a') as csvfile: 67 | writer = csv.writer(csvfile) 68 | writer.writerow(['svm_poly10',tn,fp,fn,tp]) 69 | csvfile.close() 70 | time = time.time() - start_time 71 | with open('./time.csv', 'a') as csvfile: 72 | writer = csv.writer(csvfile) 73 | writer.writerow(['svm_poly10',time]) 74 | csvfile.close() 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | -------------------------------------------------------------------------------- /models/svm/svm2.py: -------------------------------------------------------------------------------- 1 | import pandas 2 | import pandas as pd 3 | import pickle 4 | from sklearn.linear_model import LogisticRegression 5 | from sklearn import metrics 6 | from sklearn import datasets 7 | from sklearn.preprocessing import StandardScaler 8 | import numpy as np 9 | from sklearn.metrics import classification_report, confusion_matrix 10 | import pandas 11 | import pandas as pd 12 | import pickle 13 | from sklearn.linear_model import LogisticRegression 14 | from sklearn import metrics 15 | from sklearn import datasets 16 | from sklearn.preprocessing import StandardScaler 17 | import numpy as np 18 | from sklearn.metrics import classification_report, confusion_matrix 19 | import csv 20 | import time 21 | start_time = time.time() 22 | data = pd.read_csv("./feature_train_all.csv") 23 | data = data.dropna() 24 | feature_cols = ['size','pole','mean','stddev','b_mean','g_mean','r_mean','b_stddev','g_stddev','r_stddev','square','ratiowh','ratioarea','approxlen','numangle','numangle90','numangle70'] 25 | X = data[feature_cols] 26 | 27 | scaler = StandardScaler() 28 | X = scaler.fit_transform(X)# Features 29 | 30 | y = data.label # Target variable 31 | 32 | X_train = X 33 | y_train = y 34 | 35 | 36 | from sklearn.svm import SVC 37 | svclassifier = SVC(kernel='poly',class_weight='balanced', degree=2, random_state=0) 38 | model = svclassifier.fit(X_train, y_train) 39 | 40 | 41 | # fit the model with data 42 | model.fit(X_train, y_train) 43 | 44 | 45 | 46 | datatest = pd.read_csv("./feature_test_all.csv") 47 | datatest = datatest.dropna() 48 | feature_cols = ['size','pole','mean','stddev','b_mean','g_mean','r_mean','b_stddev','g_stddev','r_stddev','square','ratiowh','ratioarea','approxlen','numangle','numangle90','numangle70'] 49 | Xtest = datatest[feature_cols] 50 | scaler = StandardScaler() 51 | Xtest = scaler.fit_transform(Xtest)# Features 52 | ytest = datatest.label # Target variable 53 | 54 | 55 | y_predict= model.predict(Xtest) 56 | # y_pro = model.predict_proba(Xtest)[:,1] 57 | 58 | 59 | df = pd.DataFrame(datatest) 60 | df.insert(22, "svm2_class", y_predict, True) 61 | # df.insert(23, "svm6_pro", y_pro , True) 62 | export_csv = df.to_csv ('./svm2.csv', index = None) 63 | print(confusion_matrix(ytest, y_predict)) 64 | tn, fp, fn, tp = confusion_matrix(ytest, y_predict, labels=[0,1]).ravel() 65 | print(tn,fp,fn,tp) 66 | with open('./result.csv', 'a') as csvfile: 67 | writer = csv.writer(csvfile) 68 | writer.writerow(['svm2',tn,fp,fn,tp]) 69 | csvfile.close() 70 | 71 | time = time.time() - start_time 72 | with open('./time.csv', 'a') as csvfile: 73 | writer = csv.writer(csvfile) 74 | writer.writerow(['svm2',time]) 75 | csvfile.close() 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | -------------------------------------------------------------------------------- /models/svm/svm3.py: -------------------------------------------------------------------------------- 1 | import pandas 2 | import pandas as pd 3 | import pickle 4 | from sklearn.linear_model import LogisticRegression 5 | from sklearn import metrics 6 | from sklearn import datasets 7 | from sklearn.preprocessing import StandardScaler 8 | import numpy as np 9 | from sklearn.metrics import classification_report, confusion_matrix 10 | import csv 11 | import time 12 | start_time = time.time() 13 | 14 | 15 | 16 | data = pd.read_csv("./feature_17_all.csv") 17 | data = data.dropna() 18 | feature_cols = ['size','pole','mean','stddev','b_mean','g_mean','r_mean','b_stddev','g_stddev','r_stddev','square','ratiowh','ratioarea','approxlen','numangle','numangle90','numangle70'] 19 | X = data[feature_cols] 20 | 21 | scaler = StandardScaler() 22 | X = scaler.fit_transform(X)# Features 23 | 24 | y = data.label # Target variable 25 | 26 | # from sklearn.model_selection import train_test_split 27 | # X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.3,random_state=0) 28 | X_train = X 29 | y_train = y 30 | 31 | from sklearn.svm import SVC 32 | svclassifier = SVC(kernel='poly',class_weight='balanced', degree=3, random_state=0) 33 | model = svclassifier.fit(X_train, y_train) 34 | 35 | 36 | from sklearn import metrics 37 | 38 | # instantiate the model (using the default parameters) 39 | 40 | 41 | # fit the model with data 42 | model.fit(X_train, y_train) 43 | 44 | 45 | 46 | datatest = pd.read_csv("./feature_810_all.csv") 47 | datatest = datatest.dropna() 48 | feature_cols = ['size','pole','mean','stddev','b_mean','g_mean','r_mean','b_stddev','g_stddev','r_stddev','square','ratiowh','ratioarea','approxlen','numangle','numangle90','numangle70'] 49 | Xtest = datatest[feature_cols] 50 | scaler = StandardScaler() 51 | Xtest = scaler.fit_transform(Xtest)# Features 52 | ytest = datatest.label # Target variable 53 | 54 | y_predict= model.predict(Xtest) 55 | y_pro = model.predict_proba(Xtest)[:,1] 56 | 57 | 58 | 59 | df = pd.DataFrame(datatest) 60 | df.insert(23, "svm_linear_class", y_predict, True) 61 | df.insert(24, "svm_linear_pro", y_predict, True) 62 | export_csv = df.to_csv ('./output/svm_poly3.csv', index = None) 63 | print(confusion_matrix(ytest, y_predict)) 64 | tn, fp, fn, tp = confusion_matrix(ytest, y_predict, labels=[0,1]).ravel() 65 | print(tn,fp,fn,tp) 66 | with open('./result.csv', 'a') as csvfile: 67 | writer = csv.writer(csvfile) 68 | writer.writerow(['svm_poly3',tn,fp,fn,tp]) 69 | csvfile.close() 70 | time = time.time() - start_time 71 | with open('./time.csv', 'a') as csvfile: 72 | writer = csv.writer(csvfile) 73 | writer.writerow(['svm_poly3',time]) 74 | csvfile.close() 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | -------------------------------------------------------------------------------- /models/svm/svm4.py: -------------------------------------------------------------------------------- 1 | import pandas 2 | import pandas as pd 3 | import pickle 4 | from sklearn.linear_model import LogisticRegression 5 | from sklearn import metrics 6 | from sklearn import datasets 7 | from sklearn.preprocessing import StandardScaler 8 | import numpy as np 9 | from sklearn.metrics import classification_report, confusion_matrix 10 | import csv 11 | import time 12 | start_time = time.time() 13 | 14 | 15 | 16 | data = pd.read_csv("./feature_17_all.csv") 17 | data = data.dropna() 18 | feature_cols = ['size','pole','mean','stddev','b_mean','g_mean','r_mean','b_stddev','g_stddev','r_stddev','square','ratiowh','ratioarea','approxlen','numangle','numangle90','numangle70'] 19 | X = data[feature_cols] 20 | 21 | scaler = StandardScaler() 22 | X = scaler.fit_transform(X)# Features 23 | 24 | y = data.label # Target variable 25 | 26 | # from sklearn.model_selection import train_test_split 27 | # X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.3,random_state=0) 28 | X_train = X 29 | y_train = y 30 | 31 | from sklearn.svm import SVC 32 | svclassifier = SVC(kernel='poly',class_weight='balanced', degree=4, random_state=0,probability=True) 33 | model = svclassifier.fit(X_train, y_train) 34 | 35 | 36 | from sklearn import metrics 37 | 38 | # instantiate the model (using the default parameters) 39 | 40 | 41 | # fit the model with data 42 | model.fit(X_train, y_train) 43 | 44 | 45 | 46 | datatest = pd.read_csv("./feature_810_all.csv") 47 | datatest = datatest.dropna() 48 | feature_cols = ['size','pole','mean','stddev','b_mean','g_mean','r_mean','b_stddev','g_stddev','r_stddev','square','ratiowh','ratioarea','approxlen','numangle','numangle90','numangle70'] 49 | Xtest = datatest[feature_cols] 50 | scaler = StandardScaler() 51 | Xtest = scaler.fit_transform(Xtest)# Features 52 | ytest = datatest.label # Target variable 53 | 54 | y_predict= model.predict(Xtest) 55 | y_pro = model.predict_proba(Xtest)[:,1] 56 | 57 | 58 | 59 | df = pd.DataFrame(datatest) 60 | df.insert(23, "svm_poly4_class", y_predict, True) 61 | df.insert(24, "svm_poly4_pro", y_predict, True) 62 | export_csv = df.to_csv ('./svm_poly4.csv', index = None) 63 | print(confusion_matrix(ytest, y_predict)) 64 | tn, fp, fn, tp = confusion_matrix(ytest, y_predict, labels=[0,1]).ravel() 65 | print(tn,fp,fn,tp) 66 | with open('./result.csv', 'a') as csvfile: 67 | writer = csv.writer(csvfile) 68 | writer.writerow(['svm_poly4',tn,fp,fn,tp]) 69 | csvfile.close() 70 | time = time.time() - start_time 71 | with open('./time.csv', 'a') as csvfile: 72 | writer = csv.writer(csvfile) 73 | writer.writerow(['svm_poly4',time]) 74 | csvfile.close() 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | -------------------------------------------------------------------------------- /models/svm/svm5.py: -------------------------------------------------------------------------------- 1 | import pandas 2 | import pandas as pd 3 | import pickle 4 | from sklearn.linear_model import LogisticRegression 5 | from sklearn import metrics 6 | from sklearn import datasets 7 | from sklearn.preprocessing import StandardScaler 8 | import numpy as np 9 | from sklearn.metrics import classification_report, confusion_matrix 10 | import csv 11 | import time 12 | start_time = time.time() 13 | 14 | 15 | 16 | data = pd.read_csv("./feature_17_all.csv") 17 | data = data.dropna() 18 | feature_cols = ['size','pole','mean','stddev','b_mean','g_mean','r_mean','b_stddev','g_stddev','r_stddev','square','ratiowh','ratioarea','approxlen','numangle','numangle90','numangle70'] 19 | X = data[feature_cols] 20 | 21 | scaler = StandardScaler() 22 | X = scaler.fit_transform(X)# Features 23 | 24 | y = data.label # Target variable 25 | 26 | X_train = X 27 | y_train = y 28 | 29 | from sklearn.svm import SVC 30 | svclassifier = SVC(kernel='poly',class_weight='balanced', degree=5, random_state=0,probability=True) 31 | model = svclassifier.fit(X_train, y_train) 32 | 33 | 34 | from sklearn import metrics 35 | 36 | # instantiate the model (using the default parameters) 37 | 38 | 39 | # fit the model with data 40 | model.fit(X_train, y_train) 41 | 42 | 43 | 44 | datatest = pd.read_csv("./feature_810_all.csv") 45 | datatest = datatest.dropna() 46 | feature_cols = ['size','pole','mean','stddev','b_mean','g_mean','r_mean','b_stddev','g_stddev','r_stddev','square','ratiowh','ratioarea','approxlen','numangle','numangle90','numangle70'] 47 | Xtest = datatest[feature_cols] 48 | scaler = StandardScaler() 49 | Xtest = scaler.fit_transform(Xtest)# Features 50 | ytest = datatest.label # Target variable 51 | 52 | y_predict= model.predict(Xtest) 53 | y_pro = model.predict_proba(Xtest)[:,1] 54 | 55 | 56 | 57 | df = pd.DataFrame(datatest) 58 | df.insert(23, "svm_poly5_class", y_predict, True) 59 | df.insert(24, "svm_poly5_pro", y_predict, True) 60 | export_csv = df.to_csv ('./svm_poly5.csv', index = None) 61 | print(confusion_matrix(ytest, y_predict)) 62 | tn, fp, fn, tp = confusion_matrix(ytest, y_predict, labels=[0,1]).ravel() 63 | print(tn,fp,fn,tp) 64 | with open('./result.csv', 'a') as csvfile: 65 | writer = csv.writer(csvfile) 66 | writer.writerow(['svm_poly5',tn,fp,fn,tp]) 67 | csvfile.close() 68 | time = time.time() - start_time 69 | with open('./time.csv', 'a') as csvfile: 70 | writer = csv.writer(csvfile) 71 | writer.writerow(['svm_poly5',time]) 72 | csvfile.close() 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | -------------------------------------------------------------------------------- /models/svm/svm6.py: -------------------------------------------------------------------------------- 1 | import pandas 2 | import pandas as pd 3 | import pickle 4 | from sklearn.linear_model import LogisticRegression 5 | from sklearn import metrics 6 | from sklearn import datasets 7 | from sklearn.preprocessing import StandardScaler 8 | import numpy as np 9 | from sklearn.metrics import classification_report, confusion_matrix 10 | import csv 11 | import time 12 | start_time = time.time() 13 | 14 | 15 | 16 | data = pd.read_csv("./feature_17_all.csv") 17 | data = data.dropna() 18 | feature_cols = ['size','pole','mean','stddev','b_mean','g_mean','r_mean','b_stddev','g_stddev','r_stddev','square','ratiowh','ratioarea','approxlen','numangle','numangle90','numangle70'] 19 | X = data[feature_cols] 20 | 21 | scaler = StandardScaler() 22 | X = scaler.fit_transform(X)# Features 23 | 24 | y = data.label # Target variable 25 | 26 | # from sklearn.model_selection import train_test_split 27 | # X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.3,random_state=0) 28 | X_train = X 29 | y_train = y 30 | 31 | from sklearn.svm import SVC 32 | svclassifier = SVC(kernel='poly',class_weight='balanced', degree=6, random_state=0,probability=True) 33 | model = svclassifier.fit(X_train, y_train) 34 | 35 | 36 | from sklearn import metrics 37 | 38 | # instantiate the model (using the default parameters) 39 | 40 | 41 | # fit the model with data 42 | model.fit(X_train, y_train) 43 | 44 | 45 | 46 | datatest = pd.read_csv("./feature_810_all.csv") 47 | datatest = datatest.dropna() 48 | feature_cols = ['size','pole','mean','stddev','b_mean','g_mean','r_mean','b_stddev','g_stddev','r_stddev','square','ratiowh','ratioarea','approxlen','numangle','numangle90','numangle70'] 49 | Xtest = datatest[feature_cols] 50 | scaler = StandardScaler() 51 | Xtest = scaler.fit_transform(Xtest)# Features 52 | ytest = datatest.label # Target variable 53 | 54 | y_predict= model.predict(Xtest) 55 | y_pro = model.predict_proba(Xtest)[:,1] 56 | 57 | 58 | 59 | df = pd.DataFrame(datatest) 60 | df.insert(23, "svm_poly6_class", y_predict, True) 61 | df.insert(24, "svm_poly6_pro", y_predict, True) 62 | export_csv = df.to_csv ('./svm_poly6.csv', index = None) 63 | print(confusion_matrix(ytest, y_predict)) 64 | tn, fp, fn, tp = confusion_matrix(ytest, y_predict, labels=[0,1]).ravel() 65 | print(tn,fp,fn,tp) 66 | with open('./result.csv', 'a') as csvfile: 67 | writer = csv.writer(csvfile) 68 | writer.writerow(['svm_poly6',tn,fp,fn,tp]) 69 | csvfile.close() 70 | time = time.time() - start_time 71 | with open('./time.csv', 'a') as csvfile: 72 | writer = csv.writer(csvfile) 73 | writer.writerow(['svm_poly6',time]) 74 | csvfile.close() 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | -------------------------------------------------------------------------------- /models/svm/svm7.py: -------------------------------------------------------------------------------- 1 | import pandas 2 | import pandas as pd 3 | import pickle 4 | from sklearn.linear_model import LogisticRegression 5 | from sklearn import metrics 6 | from sklearn import datasets 7 | from sklearn.preprocessing import StandardScaler 8 | import numpy as np 9 | from sklearn.metrics import classification_report, confusion_matrix 10 | import csv 11 | import time 12 | start_time = time.time() 13 | 14 | 15 | 16 | data = pd.read_csv("./feature_17_all.csv") 17 | data = data.dropna() 18 | feature_cols = ['size','pole','mean','stddev','b_mean','g_mean','r_mean','b_stddev','g_stddev','r_stddev','square','ratiowh','ratioarea','approxlen','numangle','numangle90','numangle70'] 19 | X = data[feature_cols] 20 | 21 | scaler = StandardScaler() 22 | X = scaler.fit_transform(X)# Features 23 | 24 | y = data.label # Target variable 25 | 26 | # from sklearn.model_selection import train_test_split 27 | # X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.3,random_state=0) 28 | X_train = X 29 | y_train = y 30 | 31 | from sklearn.svm import SVC 32 | svclassifier = SVC(kernel='poly',class_weight='balanced', degree=7, random_state=0) 33 | model = svclassifier.fit(X_train, y_train) 34 | 35 | 36 | from sklearn import metrics 37 | 38 | # instantiate the model (using the default parameters) 39 | 40 | 41 | # fit the model with data 42 | model.fit(X_train, y_train) 43 | 44 | 45 | 46 | datatest = pd.read_csv("./feature_810_all.csv") 47 | datatest = datatest.dropna() 48 | feature_cols = ['size','pole','mean','stddev','b_mean','g_mean','r_mean','b_stddev','g_stddev','r_stddev','square','ratiowh','ratioarea','approxlen','numangle','numangle90','numangle70'] 49 | Xtest = datatest[feature_cols] 50 | scaler = StandardScaler() 51 | Xtest = scaler.fit_transform(Xtest)# Features 52 | ytest = datatest.label # Target variable 53 | 54 | y_predict= model.predict(Xtest) 55 | y_pro = model.predict_proba(Xtest)[:,1] 56 | 57 | 58 | 59 | df = pd.DataFrame(datatest) 60 | df.insert(23, "svm_poly7_class", y_predict, True) 61 | df.insert(24, "svm_poly7_pro", y_predict, True) 62 | export_csv = df.to_csv ('./svm_poly7.csv', index = None) 63 | print(confusion_matrix(ytest, y_predict)) 64 | tn, fp, fn, tp = confusion_matrix(ytest, y_predict, labels=[0,1]).ravel() 65 | print(tn,fp,fn,tp) 66 | with open('./result.csv', 'a') as csvfile: 67 | writer = csv.writer(csvfile) 68 | writer.writerow(['svm_poly7',tn,fp,fn,tp]) 69 | csvfile.close() 70 | time = time.time() - start_time 71 | with open('./time.csv', 'a') as csvfile: 72 | writer = csv.writer(csvfile) 73 | writer.writerow(['svm_poly7',time]) 74 | csvfile.close() 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | -------------------------------------------------------------------------------- /models/svm/svm8.py: -------------------------------------------------------------------------------- 1 | import pandas 2 | import pandas as pd 3 | import pickle 4 | from sklearn.linear_model import LogisticRegression 5 | from sklearn import metrics 6 | from sklearn import datasets 7 | from sklearn.preprocessing import StandardScaler 8 | import numpy as np 9 | from sklearn.metrics import classification_report, confusion_matrix 10 | import csv 11 | import time 12 | start_time = time.time() 13 | 14 | 15 | 16 | data = pd.read_csv("./feature_17_all.csv") 17 | data = data.dropna() 18 | feature_cols = ['size','pole','mean','stddev','b_mean','g_mean','r_mean','b_stddev','g_stddev','r_stddev','square','ratiowh','ratioarea','approxlen','numangle','numangle90','numangle70'] 19 | X = data[feature_cols] 20 | 21 | scaler = StandardScaler() 22 | X = scaler.fit_transform(X)# Features 23 | 24 | y = data.label # Target variable 25 | 26 | # from sklearn.model_selection import train_test_split 27 | # X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.3,random_state=0) 28 | X_train = X 29 | y_train = y 30 | 31 | from sklearn.svm import SVC 32 | svclassifier = SVC(kernel='poly',class_weight='balanced', degree=8, random_state=0,probability=True) 33 | model = svclassifier.fit(X_train, y_train) 34 | 35 | 36 | from sklearn import metrics 37 | 38 | # instantiate the model (using the default parameters) 39 | 40 | 41 | # fit the model with data 42 | model.fit(X_train, y_train) 43 | 44 | 45 | 46 | datatest = pd.read_csv("./feature_810_all.csv") 47 | datatest = datatest.dropna() 48 | feature_cols = ['size','pole','mean','stddev','b_mean','g_mean','r_mean','b_stddev','g_stddev','r_stddev','square','ratiowh','ratioarea','approxlen','numangle','numangle90','numangle70'] 49 | Xtest = datatest[feature_cols] 50 | scaler = StandardScaler() 51 | Xtest = scaler.fit_transform(Xtest)# Features 52 | ytest = datatest.label # Target variable 53 | 54 | y_predict= model.predict(Xtest) 55 | y_pro = model.predict_proba(Xtest)[:,1] 56 | 57 | 58 | 59 | df = pd.DataFrame(datatest) 60 | df.insert(23, "svm_poly8_class", y_predict, True) 61 | df.insert(24, "svm_poly8_pro", y_predict, True) 62 | export_csv = df.to_csv ('./svm_poly8.csv', index = None) 63 | print(confusion_matrix(ytest, y_predict)) 64 | tn, fp, fn, tp = confusion_matrix(ytest, y_predict, labels=[0,1]).ravel() 65 | print(tn,fp,fn,tp) 66 | with open('./result.csv', 'a') as csvfile: 67 | writer = csv.writer(csvfile) 68 | writer.writerow(['svm_poly8',tn,fp,fn,tp]) 69 | csvfile.close() 70 | time = time.time() - start_time 71 | with open('./time.csv', 'a') as csvfile: 72 | writer = csv.writer(csvfile) 73 | writer.writerow(['svm_poly8',time]) 74 | csvfile.close() 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | -------------------------------------------------------------------------------- /models/svm/svm9.py: -------------------------------------------------------------------------------- 1 | import pandas 2 | import pandas as pd 3 | import pickle 4 | from sklearn.linear_model import LogisticRegression 5 | from sklearn import metrics 6 | from sklearn import datasets 7 | from sklearn.preprocessing import StandardScaler 8 | import numpy as np 9 | from sklearn.metrics import classification_report, confusion_matrix 10 | import csv 11 | import time 12 | start_time = time.time() 13 | 14 | 15 | 16 | data = pd.read_csv("./feature_17_all.csv") 17 | data = data.dropna() 18 | feature_cols = ['size','pole','mean','stddev','b_mean','g_mean','r_mean','b_stddev','g_stddev','r_stddev','square','ratiowh','ratioarea','approxlen','numangle','numangle90','numangle70'] 19 | X = data[feature_cols] 20 | 21 | scaler = StandardScaler() 22 | X = scaler.fit_transform(X)# Features 23 | 24 | y = data.label # Target variable 25 | 26 | # from sklearn.model_selection import train_test_split 27 | # X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.3,random_state=0) 28 | X_train = X 29 | y_train = y 30 | 31 | from sklearn.svm import SVC 32 | svclassifier = SVC(kernel='poly',class_weight='balanced', degree=9, random_state=0,probability=True) 33 | model = svclassifier.fit(X_train, y_train) 34 | 35 | 36 | from sklearn import metrics 37 | 38 | # instantiate the model (using the default parameters) 39 | 40 | 41 | # fit the model with data 42 | model.fit(X_train, y_train) 43 | 44 | 45 | 46 | datatest = pd.read_csv("./feature_810_all.csv") 47 | datatest = datatest.dropna() 48 | feature_cols = ['size','pole','mean','stddev','b_mean','g_mean','r_mean','b_stddev','g_stddev','r_stddev','square','ratiowh','ratioarea','approxlen','numangle','numangle90','numangle70'] 49 | Xtest = datatest[feature_cols] 50 | scaler = StandardScaler() 51 | Xtest = scaler.fit_transform(Xtest)# Features 52 | ytest = datatest.label # Target variable 53 | 54 | y_predict= model.predict(Xtest) 55 | y_pro = model.predict_proba(Xtest)[:,1] 56 | 57 | 58 | 59 | df = pd.DataFrame(datatest) 60 | df.insert(23, "svm_poly9_class", y_predict, True) 61 | df.insert(24, "svm_poly9_pro", y_predict, True) 62 | export_csv = df.to_csv ('./svm_poly9.csv', index = None) 63 | print(confusion_matrix(ytest, y_predict)) 64 | tn, fp, fn, tp = confusion_matrix(ytest, y_predict, labels=[0,1]).ravel() 65 | print(tn,fp,fn,tp) 66 | with open('./result.csv', 'a') as csvfile: 67 | writer = csv.writer(csvfile) 68 | writer.writerow(['svm_poly9',tn,fp,fn,tp]) 69 | csvfile.close() 70 | time = time.time() - start_time 71 | with open('./time.csv', 'a') as csvfile: 72 | writer = csv.writer(csvfile) 73 | writer.writerow(['svm_poly9',time]) 74 | csvfile.close() 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | -------------------------------------------------------------------------------- /models/svm/svm_roc.py: -------------------------------------------------------------------------------- 1 | import pandas 2 | import pandas as pd 3 | import pickle 4 | from sklearn.linear_model import LogisticRegression 5 | from sklearn import metrics 6 | from sklearn import datasets 7 | from sklearn.preprocessing import StandardScaler 8 | import numpy as np 9 | from sklearn.metrics import classification_report, confusion_matrix 10 | import csv 11 | import time 12 | start_time = time.time() 13 | %matplotlib inline 14 | 15 | 16 | 17 | data = pd.read_csv("./svmrbftrainprobility.csv") 18 | data = data.dropna() 19 | # feature_cols = ['vgg_pro','vgg_class','svmrbf_class','svmrbfpro'] 20 | feature_cols = ['vgg_pro','svmrbfpro'] 21 | X = data[feature_cols] 22 | 23 | scaler = StandardScaler() 24 | X = scaler.fit_transform(X)# Features 25 | 26 | y = data.label # Target variable 27 | 28 | 29 | X_train = X 30 | y_train = y 31 | 32 | 33 | 34 | 35 | # use linear regression 36 | from sklearn.linear_model import LogisticRegression 37 | model = LogisticRegression(class_weight = 'balanced') 38 | 39 | # instantiate the model (using the default parameters) 40 | 41 | # fit the model with data 42 | model.fit(X_train, y_train) 43 | # from sklearn.externals import joblib 44 | # from joblib import dump, load 45 | # dump(model, 'svmrbfhybrid.joblib') 46 | # model = load('svmrbfhybrid.joblib') 47 | print(model.coef_ ) 48 | print(model.intercept_ ) 49 | from sklearn import metrics 50 | 51 | 52 | 53 | 54 | datatest = pd.read_csv("./svmrbftestpro.csv") 55 | datatest = datatest.dropna() 56 | # feature_cols = ['vgg_pro','vgg_class','svmrbf_class','svmrbfpro'] 57 | feature_cols = ['vgg_pro','svmrbfpro'] 58 | Xtest = datatest[feature_cols] 59 | scaler = StandardScaler() 60 | Xtest = scaler.fit_transform(Xtest)# Features 61 | ytest = datatest.label # Target variable 62 | y_predict_vgg = datatest.vgg_pro 63 | y_predict_svm = datatest.svmrbfpro 64 | 65 | 66 | 67 | y_predict= model.predict(Xtest) 68 | y_predict_pro = model.predict_proba(Xtest) 69 | y_predict_pro = y_predict_pro[:, 1] 70 | 71 | 72 | 73 | df = pd.DataFrame(datatest) 74 | df.insert(25, "svm_nosplit_pro", y_predict_pro, True) 75 | df.insert(26, "svm_nosplit_class", y_predict, True) 76 | 77 | export_csv = df.to_csv ('./vggsvmlogicalregression2features.csv', index = None) 78 | print(confusion_matrix(ytest, y_predict)) 79 | tn, fp, fn, tp = confusion_matrix(ytest, y_predict, labels=[0,1]).ravel() 80 | print(tn,fp,fn,tp) 81 | with open('./result.csv', 'a') as csvfile: 82 | writer = csv.writer(csvfile) 83 | writer.writerow(['vggsvmlogicalregression2features.csv',tn,fp,fn,tp]) 84 | csvfile.close() 85 | time = time.time() - start_time 86 | with open('./time.csv', 'a') as csvfile: 87 | writer = csv.writer(csvfile) 88 | writer.writerow(['vggsvmlogicalregression2features.csv',time]) 89 | csvfile.close() 90 | 91 | 92 | 93 | from sklearn.metrics import classification_report, confusion_matrix 94 | from sklearn.metrics import accuracy_score 95 | from sklearn.metrics import cohen_kappa_score 96 | from sklearn import metrics 97 | from sklearn.metrics import precision_recall_curve 98 | from sklearn.metrics import average_precision_score 99 | from sklearn.metrics import matthews_corrcoef 100 | from sklearn.metrics import roc_auc_score 101 | from sklearn.metrics import balanced_accuracy_score 102 | from sklearn.metrics import roc_curve 103 | from matplotlib import pyplot 104 | print(confusion_matrix(ytest, y_predict)) 105 | print(classification_report(ytest, y_predict)) 106 | print(accuracy_score(ytest, y_predict)) 107 | print(balanced_accuracy_score(ytest, y_predict)) 108 | print(metrics.precision_score(ytest, y_predict)) 109 | print(metrics.recall_score(ytest, y_predict)) 110 | print(metrics.f1_score(ytest, y_predict)) 111 | print(matthews_corrcoef(ytest, y_predict)) 112 | print(roc_auc_score(ytest, y_predict)) 113 | print(roc_auc_score(ytest, y_predict_vgg )) 114 | print(roc_auc_score(ytest, y_predict)) 115 | lr_fpr, lr_tpr, _ = roc_curve(ytest, y_predict_pro) 116 | lr_fpr_vgg, lr_tpr_vgg, _ = roc_curve(ytest, y_predict_vgg ) 117 | lr_fpr_svm, lr_tpr_svm, _ = roc_curve(ytest, y_predict_svm) 118 | pyplot.plot(lr_fpr, lr_tpr, marker='x', label='Logistic') 119 | pyplot.plot(lr_fpr_vgg, lr_tpr_vgg, marker='o', label='vgg') 120 | pyplot.plot(lr_fpr_svm, lr_tpr_svm, marker='v', label='svm kernel=rbf') 121 | pyplot.xlabel('False Positive Rate',{'size': 14}) 122 | pyplot.ylabel('True Positive Rate',{'size': 14}) 123 | # show the legend 124 | pyplot.legend() 125 | pyplot.tight_layout() 126 | pyplot.savefig('./split_roc.png') 127 | # show the plot 128 | pyplot.show() 129 | 130 | 131 | 132 | 133 | 134 | 135 | 136 | 137 | -------------------------------------------------------------------------------- /models/svm/svmaggressive.py: -------------------------------------------------------------------------------- 1 | import pandas 2 | import pandas as pd 3 | import pickle 4 | from sklearn.linear_model import LogisticRegression 5 | from sklearn import metrics 6 | from sklearn import datasets 7 | from sklearn.preprocessing import StandardScaler 8 | import numpy as np 9 | from sklearn.metrics import classification_report, confusion_matrix 10 | import csv 11 | import time 12 | start_time = time.time() 13 | 14 | 15 | 16 | data = pd.read_csv("./feature_17_all.csv") 17 | data = data.dropna() 18 | feature_cols = ['size','pole','mean','stddev','b_mean','g_mean','r_mean','b_stddev','g_stddev','r_stddev','square','ratiowh','ratioarea','approxlen','numangle','numangle90','numangle70'] 19 | X = data[feature_cols] 20 | 21 | scaler = StandardScaler() 22 | X = scaler.fit_transform(X)# Features 23 | 24 | y = data.label # Target variable 25 | 26 | # from sklearn.model_selection import train_test_split 27 | # X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.3,random_state=0) 28 | X_train = X 29 | y_train = y 30 | 31 | from sklearn.linear_model import PassiveAggressiveClassifier 32 | svclassifier = PassiveAggressiveClassifier(max_iter=1000, random_state=0,tol=1e-3,class_weight='balanced') 33 | model = svclassifier.fit(X_train, y_train) 34 | 35 | 36 | from sklearn import metrics 37 | 38 | # instantiate the model (using the default parameters) 39 | 40 | 41 | # fit the model with data 42 | model.fit(X_train, y_train) 43 | 44 | 45 | 46 | datatest = pd.read_csv("./feature_810_all.csv") 47 | datatest = datatest.dropna() 48 | feature_cols = ['size','pole','mean','stddev','b_mean','g_mean','r_mean','b_stddev','g_stddev','r_stddev','square','ratiowh','ratioarea','approxlen','numangle','numangle90','numangle70'] 49 | Xtest = datatest[feature_cols] 50 | scaler = StandardScaler() 51 | Xtest = scaler.fit_transform(Xtest)# Features 52 | ytest = datatest.label # Target variable 53 | 54 | y_predict= model.predict(Xtest) 55 | y_pro = model.predict_proba(Xtest)[:,1] 56 | 57 | 58 | 59 | df = pd.DataFrame(datatest) 60 | df.insert(23, "PassiveAggressive", y_predict, True) 61 | df.insert(24, "PassiveAggressive", y_predict, True) 62 | export_csv = df.to_csv ('./PassiveAggressive.csv', index = None) 63 | print(confusion_matrix(ytest, y_predict)) 64 | tn, fp, fn, tp = confusion_matrix(ytest, y_predict, labels=[0,1]).ravel() 65 | print(tn,fp,fn,tp) 66 | with open('./split/result.csv', 'a') as csvfile: 67 | writer = csv.writer(csvfile) 68 | writer.writerow(['PassiveAggressive',tn,fp,fn,tp]) 69 | csvfile.close() 70 | time = time.time() - start_time 71 | with open('./split/time.csv', 'a') as csvfile: 72 | writer = csv.writer(csvfile) 73 | writer.writerow(['PassiveAggressive',time]) 74 | csvfile.close() 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | -------------------------------------------------------------------------------- /models/svm/svmlinear.py: -------------------------------------------------------------------------------- 1 | import pandas 2 | import pandas as pd 3 | import pickle 4 | from sklearn.linear_model import LogisticRegression 5 | from sklearn import metrics 6 | from sklearn import datasets 7 | from sklearn.preprocessing import StandardScaler 8 | import numpy as np 9 | from sklearn.metrics import classification_report, confusion_matrix 10 | import csv 11 | 12 | data = pd.read_csv("./feature_17_all.csv") 13 | data = data.dropna() 14 | feature_cols = ['size','pole','mean','stddev','b_mean','g_mean','r_mean','b_stddev','g_stddev','r_stddev','square','ratiowh','ratioarea','approxlen','numangle','numangle90','numangle70'] 15 | X = data[feature_cols] 16 | 17 | scaler = StandardScaler() 18 | X = scaler.fit_transform(X)# Features 19 | 20 | y = data.label # Target variable 21 | 22 | # from sklearn.model_selection import train_test_split 23 | # X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.3,random_state=0) 24 | X_train = X 25 | y_train = y 26 | 27 | from sklearn.svm import SVC 28 | svclassifier = SVC(kernel='linear',class_weight='balanced',probability=True) 29 | model = svclassifier.fit(X_train, y_train) 30 | 31 | 32 | from sklearn import metrics 33 | 34 | # instantiate the model (using the default parameters) 35 | 36 | 37 | # fit the model with data 38 | model.fit(X_train, y_train) 39 | 40 | 41 | 42 | datatest = pd.read_csv("./feature_810_all.csv") 43 | datatest = datatest.dropna() 44 | feature_cols = ['size','pole','mean','stddev','b_mean','g_mean','r_mean','b_stddev','g_stddev','r_stddev','square','ratiowh','ratioarea','approxlen','numangle','numangle90','numangle70'] 45 | Xtest = datatest[feature_cols] 46 | scaler = StandardScaler() 47 | Xtest = scaler.fit_transform(Xtest)# Features 48 | ytest = datatest.label # Target variable 49 | 50 | y_predict= model.predict(Xtest) 51 | y_pro = model.predict_proba(Xtest)[:,1] 52 | 53 | 54 | 55 | df = pd.DataFrame(datatest) 56 | df.insert(23, "svm_linear_class", y_predict, True) 57 | df.insert(24, "svm_linear_pro", y_predict, True) 58 | export_csv = df.to_csv ('./output/svm_linear.csv', index = None) 59 | print(confusion_matrix(ytest, y_predict)) 60 | tn, fp, fn, tp = confusion_matrix(ytest, y_predict, labels=[0,1]).ravel() 61 | print(tn,fp,fn,tp) 62 | with open('./result.csv', 'a') as csvfile: 63 | writer = csv.writer(csvfile) 64 | writer.writerow(['svm_linear',tn,fp,fn,tp]) 65 | csvfile.close() 66 | 67 | 68 | 69 | 70 | -------------------------------------------------------------------------------- /models/svm/svmnosplit.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import numpy as np 3 | import matplotlib.pyplot as plt 4 | from sklearn.preprocessing import StandardScaler 5 | %matplotlib inline 6 | 7 | data = pd.read_csv("./vgg_predict.csv") 8 | data = data.dropna() 9 | 10 | 11 | df = pd.DataFrame(data) 12 | y1 = df.iloc[:,14].astype(int) 13 | print(y1) 14 | y_pred1 = df.iloc[:,16].astype(int) 15 | 16 | 17 | from sklearn.metrics import classification_report, confusion_matrix 18 | from sklearn.metrics import accuracy_score 19 | from sklearn.metrics import cohen_kappa_score 20 | from sklearn import metrics 21 | from sklearn.metrics import precision_recall_curve 22 | from sklearn.metrics import average_precision_score 23 | from sklearn.metrics import matthews_corrcoef 24 | from sklearn.metrics import roc_auc_score 25 | from sklearn.metrics import balanced_accuracy_score 26 | print(confusion_matrix(y1,y_pred1)) 27 | print(classification_report(y1,y_pred1)) 28 | print(accuracy_score(y1,y_pred1)) 29 | print(balanced_accuracy_score(y1,y_pred1)) 30 | print(metrics.precision_score(y1,y_pred1)) 31 | print(metrics.recall_score(y1,y_pred1)) 32 | print(metrics.f1_score(y1,y_pred1)) 33 | print(matthews_corrcoef(y1,y_pred1)) 34 | print(roc_auc_score(y1,y_pred1)) -------------------------------------------------------------------------------- /models/svm/svmrbf.py: -------------------------------------------------------------------------------- 1 | import pandas 2 | import pandas as pd 3 | import pickle 4 | from sklearn.linear_model import LogisticRegression 5 | from sklearn import metrics 6 | from sklearn import datasets 7 | from sklearn.preprocessing import StandardScaler 8 | import numpy as np 9 | from sklearn.metrics import classification_report, confusion_matrix 10 | import csv 11 | import time 12 | start_time = time.time() 13 | 14 | 15 | 16 | data = pd.read_csv("./feature_17_all.csv") 17 | data = data.dropna() 18 | feature_cols = ['size','pole','mean','stddev','b_mean','g_mean','r_mean','b_stddev','g_stddev','r_stddev','square','ratiowh','ratioarea','approxlen','numangle','numangle90','numangle70'] 19 | X = data[feature_cols] 20 | 21 | scaler = StandardScaler() 22 | X = scaler.fit_transform(X)# Features 23 | 24 | y = data.label # Target variable 25 | 26 | 27 | X_train = X 28 | y_train = y 29 | 30 | from sklearn.svm import SVC 31 | svclassifier = SVC(kernel='rbf',class_weight='balanced') 32 | model = svclassifier.fit(X_train, y_train) 33 | 34 | 35 | 36 | 37 | # fit the model with data 38 | model.fit(X_train, y_train) 39 | 40 | 41 | 42 | datatest = pd.read_csv("./feature_810_all.csv") 43 | datatest = datatest.dropna() 44 | feature_cols = ['size','pole','mean','stddev','b_mean','g_mean','r_mean','b_stddev','g_stddev','r_stddev','square','ratiowh','ratioarea','approxlen','numangle','numangle90','numangle70'] 45 | Xtest = datatest[feature_cols] 46 | scaler = StandardScaler() 47 | Xtest = scaler.fit_transform(Xtest)# Features 48 | ytest = datatest.label # Target variable 49 | 50 | y_predict= model.predict(Xtest) 51 | y_pro = model.predict_proba(Xtest)[:,1] 52 | 53 | 54 | 55 | df = pd.DataFrame(datatest) 56 | df.insert(23, "svm_linear_class", y_predict, True) 57 | df.insert(24, "svm_linear_pro", y_predict, True) 58 | export_csv = df.to_csv ('./svm_poly3.csv', index = None) 59 | print(confusion_matrix(ytest, y_predict)) 60 | tn, fp, fn, tp = confusion_matrix(ytest, y_predict, labels=[0,1]).ravel() 61 | print(tn,fp,fn,tp) 62 | with open('./result.csv', 'a') as csvfile: 63 | writer = csv.writer(csvfile) 64 | writer.writerow(['svm_poly3',tn,fp,fn,tp]) 65 | csvfile.close() 66 | time = time.time() - start_time 67 | with open('./time.csv', 'a') as csvfile: 68 | writer = csv.writer(csvfile) 69 | writer.writerow(['svm_poly3',time]) 70 | csvfile.close() 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | -------------------------------------------------------------------------------- /models/svm/svmridge.py: -------------------------------------------------------------------------------- 1 | import pandas 2 | import pandas as pd 3 | import pickle 4 | from sklearn.linear_model import LogisticRegression 5 | from sklearn import metrics 6 | from sklearn import datasets 7 | from sklearn.preprocessing import StandardScaler 8 | import numpy as np 9 | from sklearn.metrics import classification_report, confusion_matrix 10 | import csv 11 | import time 12 | start_time = time.time() 13 | 14 | 15 | 16 | data = pd.read_csv("./feature_17_all.csv") 17 | data = data.dropna() 18 | feature_cols = ['size','pole','mean','stddev','b_mean','g_mean','r_mean','b_stddev','g_stddev','r_stddev','square','ratiowh','ratioarea','approxlen','numangle','numangle90','numangle70'] 19 | X = data[feature_cols] 20 | 21 | scaler = StandardScaler() 22 | X = scaler.fit_transform(X)# Features 23 | 24 | y = data.label # Target variable 25 | 26 | # from sklearn.model_selection import train_test_split 27 | # X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.3,random_state=0) 28 | X_train = X 29 | y_train = y 30 | 31 | 32 | from sklearn.linear_model import RidgeClassifierCV 33 | svclassifier = RidgeClassifierCV(alphas=[1e-3, 1e-2, 1e-1, 1],class_weight='balanced') 34 | model = svclassifier.fit(X_train, y_train) 35 | 36 | 37 | from sklearn import metrics 38 | 39 | # instantiate the model (using the default parameters) 40 | 41 | 42 | # fit the model with data 43 | model.fit(X_train, y_train) 44 | 45 | 46 | 47 | datatest = pd.read_csv("./feature_810_all.csv") 48 | datatest = datatest.dropna() 49 | feature_cols = ['size','pole','mean','stddev','b_mean','g_mean','r_mean','b_stddev','g_stddev','r_stddev','square','ratiowh','ratioarea','approxlen','numangle','numangle90','numangle70'] 50 | Xtest = datatest[feature_cols] 51 | scaler = StandardScaler() 52 | Xtest = scaler.fit_transform(Xtest)# Features 53 | ytest = datatest.label # Target variable 54 | 55 | y_predict= model.predict(Xtest) 56 | y_pro = model.predict_proba(Xtest)[:,1] 57 | 58 | 59 | 60 | df = pd.DataFrame(datatest) 61 | df.insert(23, "RidgeClassifier_class", y_predict, True) 62 | df.insert(24, "RidgeClassifier_pro", y_predict, True) 63 | export_csv = df.to_csv ('./RidgeClassifier.csv', index = None) 64 | print(confusion_matrix(ytest, y_predict)) 65 | tn, fp, fn, tp = confusion_matrix(ytest, y_predict, labels=[0,1]).ravel() 66 | print(tn,fp,fn,tp) 67 | with open('./split/result.csv', 'a') as csvfile: 68 | writer = csv.writer(csvfile) 69 | writer.writerow(['RidgeClassifier',tn,fp,fn,tp]) 70 | csvfile.close() 71 | time = time.time() - start_time 72 | with open('./split/time.csv', 'a') as csvfile: 73 | writer = csv.writer(csvfile) 74 | writer.writerow(['RidgeClassifier',time]) 75 | csvfile.close() 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | -------------------------------------------------------------------------------- /models/svm/svmsplit.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import numpy as np 3 | import matplotlib.pyplot as plt 4 | from sklearn.preprocessing import StandardScaler 5 | %matplotlib inline 6 | 7 | col_names = ['id', 'location','image', 'size','pole','mean','stddev','square','ratiowh','ratioarea','approxlen','numangle','numangle90','prediction','prediction_class','label'] 8 | # load dataset 9 | data = pd.read_csv("./lr.csv", header=None, names=col_names) 10 | data = data.dropna() 11 | # feature_cols = ['pole','mean','stddev','square','ratiowh','ratioarea','approxlen','numangle','numangle90','prediction'] 12 | feature_cols = ['pole','mean','stddev','square','ratiowh','ratioarea','approxlen','numangle','numangle90','prediction'] 13 | X = data[feature_cols] 14 | 15 | scaler = StandardScaler() 16 | X = scaler.fit_transform(X)# Features 17 | 18 | y = data.label # Target variable 19 | # from sklearn.model_selection import train_test_split 20 | # X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.20) 21 | X_train = X 22 | y_train = y 23 | from sklearn.svm import SVC 24 | svclassifier = SVC(kernel='poly',class_weight='balanced', degree=8, random_state=0) 25 | svclassifier.fit(X_train, y_train) 26 | # y_pred = svclassifier.predict(X_test) 27 | # from sklearn.metrics import classification_report, confusion_matrix 28 | # print(confusion_matrix(y_test,y_pred)) 29 | # print(classification_report(y_test,y_pred)) 30 | 31 | col_names = ['id', 'location','image', 'size','pole','mean','stddev','square','ratiowh','ratioarea','approxlen','numangle','numangle90','prediction','prediction_class','label','lrpredict'] 32 | # load dataset 33 | data = pd.read_csv("./location810/lr.csv", header=None, names=col_names) 34 | data = data.dropna() 35 | # feature_cols = ['pole','mean','stddev','square','ratiowh','ratioarea','approxlen','numangle','numangle90','prediction'] 36 | feature_cols = ['pole','mean','stddev','square','ratiowh','ratioarea','approxlen','numangle','numangle90','prediction'] 37 | X1 = data[feature_cols] 38 | 39 | scaler = StandardScaler() 40 | X1 = scaler.fit_transform(X1)# Features 41 | 42 | y1 = data.label # Target variable 43 | y_pred1 = svclassifier.predict(X1) 44 | 45 | from sklearn.metrics import classification_report, confusion_matrix 46 | print(confusion_matrix(y1,y_pred1)) 47 | print(classification_report(y1,y_pred1)) 48 | 49 | 50 | -------------------------------------------------------------------------------- /models/thresholding/append_new_column.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | 4 | loaded_model = pickle.load(open(filename, 'rb')) 5 | result = loaded_model.score(X, y) 6 | print(result) 7 | y_predict= loaded_model.predict(X) 8 | print("Y predict/hat ", y_predict) 9 | print(metrics.confusion_matrix(y, y_predict)) 10 | 11 | y_predict= loaded_model.predict(X) 12 | print(y_predict) 13 | 14 | 15 | # Convert the dictionary into DataFrame 16 | df = pd.DataFrame(data) 17 | 18 | # Using DataFrame.insert() to add a column 19 | df.insert(15, "predict", y_predict, True) 20 | 21 | # Observe the result 22 | 23 | export_csv = df.to_csv ('./vgg_predict.csv', index = None, header=False) -------------------------------------------------------------------------------- /models/thresholding/data_description.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | import seaborn as sns 4 | import matplotlib.pyplot as plt 5 | 6 | import csv 7 | 8 | # col_names = ['id', 'image', 'size', 'pole', 'mean', 'stddev', 'square', 'ratiowh', 'ratioarea', 'approxlen', 'numangle', 'numangle90', 'numangle70', 'label'] 9 | col_names = ['id', 'location', 'image', 'size', 'pole', 'mean', 'stddev', 'b_mean', 'g_mean', 'r_mean', 'b_stddev', 'g_stddev', 'r_stddev', 'square', 'ratiowh', 'ratioarea', 'approxlen', 'numangle', 'numangle90', 'numangle70', 'label', 'vgg_pro', 'vgg_class'] 10 | 11 | data = pd.read_csv("./feature_17_all.csv", names=col_names) 12 | data = data.dropna() 13 | 14 | g_outputDir = './output/final/split/' 15 | csv_path = g_outputDir + 'feature_description.csv' 16 | 17 | positive_sample_set = data[data['label'] == 1.0] 18 | negative_sample_set = data[data['label'] == 0.0] 19 | 20 | analysis_features = ['size', 'mean', 'stddev', 'b_mean', 'g_mean', 'r_mean', 'b_stddev', 'g_stddev', 'r_stddev', 'square', 'ratiowh', 'ratioarea', 'approxlen', 'numangle', 'numangle90', 'numangle70'] 21 | # analysis_features = ['size'] 22 | 23 | labels = ['mean', 'std', 'min', 'max', '50%', '25%','75%'] 24 | 25 | def get_whiskers(feature_array): 26 | Q1, median, Q3 = np.percentile(np.asarray(feature_array), [25, 50, 75]) 27 | 28 | IQR = Q3 - Q1 29 | 30 | loval = Q1 - 1.5 * IQR 31 | hival = Q3 + 1.5 * IQR 32 | 33 | upper_wisk_set = np.compress(feature_array <= hival, feature_array) 34 | lower_wisk_set = np.compress(feature_array >= loval, feature_array) 35 | upper_wisk = np.max(upper_wisk_set) 36 | lower_wisk = np.min(lower_wisk_set) 37 | 38 | return [lower_wisk, upper_wisk] 39 | 40 | csv_header = ['feature', 'mean', 'std', 'min', 'max', 'median', '25%', '75%', '0.35%', '99.65%'] 41 | with open(csv_path, 'a') as csv_file: 42 | writer = csv.DictWriter(csv_file, fieldnames=csv_header) 43 | writer.writeheader() 44 | csv_file.close() 45 | 46 | output = {} 47 | 48 | for analysis_feature in analysis_features: 49 | 50 | positive_sample_set_description = positive_sample_set[analysis_feature].describe() 51 | print('positive_sample_set:') 52 | 53 | row_name = str(analysis_feature+'_pos') 54 | 55 | for l in labels: 56 | output[l] = positive_sample_set_description[l] 57 | 58 | positive_whis = get_whiskers(positive_sample_set[analysis_feature]) 59 | output['0.35%'] = positive_whis[0] 60 | output['99.65%'] = positive_whis[1] 61 | 62 | print(output) 63 | 64 | with open(csv_path, 'a') as csv_file: 65 | writer = csv.writer(csv_file) 66 | writer.writerow([row_name, output['mean'], output['std'], output['min'], output['max'], output['50%'], output['25%'], output['75%'], output['0.35%'], output['99.65%']]) 67 | csv_file.close() 68 | 69 | 70 | negative_sample_set_description = negative_sample_set[analysis_feature].describe() 71 | print('negative_sample_set:') 72 | row_name = str(analysis_feature+'_neg') 73 | 74 | for l in labels: 75 | output[l] = negative_sample_set_description[l] 76 | 77 | negative_whis = get_whiskers(negative_sample_set[analysis_feature]) 78 | output['0.35%'] = negative_whis[0] 79 | output['99.65%'] = negative_whis[1] 80 | 81 | print(output) 82 | 83 | with open(csv_path, 'a') as csv_file: 84 | writer = csv.writer(csv_file) 85 | writer.writerow([row_name, output['mean'], output['std'], output['min'], output['max'], output['50%'], output['25%'], output['75%'], output['0.35%'], output['99.65%']]) 86 | csv_file.close() 87 | 88 | # input('Press ENTER to continue...') 89 | 90 | 91 | -------------------------------------------------------------------------------- /models/thresholding/hard_filters_test.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | import seaborn as sns 4 | import matplotlib.pyplot as plt 5 | 6 | import csv 7 | 8 | # dataset = 'split' 9 | dataset = 'non-split' 10 | 11 | # Generate hard filters 12 | 13 | if dataset == 'split': 14 | col_names = ['id', 'location', 'image', 'size', 'pole', 'mean', 'stddev', 'b_mean', 'g_mean', 'r_mean', 'b_stddev', 'g_stddev', 'r_stddev', 'square', 'ratiowh', 'ratioarea', 'approxlen', 'numangle', 'numangle90', 'numangle70', 'label', 'vgg_pro', 'vgg_class'] 15 | #split training data path 16 | training_data_csv_path = "./data/final/split/feature_17_all.csv" 17 | elif dataset == 'non-split': 18 | col_names = ['id', 'image', 'size', 'pole', 'mean', 'stddev', 'b_mean', 'g_mean', 'r_mean', 'b_stddev', 'g_stddev', 'r_stddev', 'square', 'ratiowh', 'ratioarea', 'approxlen', 'numangle', 'numangle90', 'numangle70', 'label', 'vgg_pro', 'vgg_class'] 19 | # non-split training data path 20 | training_data_csv_path = "./data/final/non_split/feature_train_all.csv" 21 | else: 22 | print('No dataset is selected.') 23 | exit() 24 | 25 | data = pd.read_csv(training_data_csv_path, names=col_names) 26 | data = data.dropna() 27 | 28 | positive_sample_set = data[data['label'] == 1.0] 29 | negative_sample_set = data[data['label'] == 0.0] 30 | 31 | analysis_features = ['size', 'mean', 'stddev', 'b_mean', 'g_mean', 'r_mean', 'b_stddev', 'g_stddev', 'r_stddev', 'square', 'ratiowh', 'ratioarea', 'approxlen', 'numangle', 'numangle90', 'numangle70'] 32 | # analysis_features = ['size'] 33 | 34 | number_of_features = len(analysis_features) + 1 35 | 36 | labels = ['mean', 'std', 'min', 'max', '50%', '25%','75%'] 37 | 38 | def get_whiskers(feature_array): 39 | Q1, median, Q3 = np.percentile(np.asarray(feature_array), [25, 50, 75]) 40 | 41 | IQR = Q3 - Q1 42 | 43 | loval = Q1 - 1.5 * IQR 44 | hival = Q3 + 1.5 * IQR 45 | 46 | upper_wisk_set = np.compress(feature_array <= hival, feature_array) 47 | lower_wisk_set = np.compress(feature_array >= loval, feature_array) 48 | upper_wisk = np.max(upper_wisk_set) 49 | lower_wisk = np.min(lower_wisk_set) 50 | 51 | return [lower_wisk, upper_wisk] 52 | 53 | hard_filters = {} 54 | 55 | for analysis_feature in analysis_features: 56 | 57 | hard_filters[analysis_feature] = {} 58 | 59 | positive_sample_set_description = positive_sample_set[analysis_feature].describe() 60 | 61 | positive_output = {} 62 | 63 | for l in labels: 64 | positive_output[l] = positive_sample_set_description[l] 65 | 66 | positive_whis = get_whiskers(positive_sample_set[analysis_feature]) 67 | positive_output['0.35%'] = positive_whis[0] 68 | positive_output['99.65%'] = positive_whis[1] 69 | 70 | ############ 71 | 72 | negative_sample_set_description = negative_sample_set[analysis_feature].describe() 73 | 74 | negative_output = {} 75 | 76 | for l in labels: 77 | negative_output[l] = negative_sample_set_description[l] 78 | 79 | negative_whis = get_whiskers(negative_sample_set[analysis_feature]) 80 | negative_output['0.35%'] = negative_whis[0] 81 | negative_output['99.65%'] = negative_whis[1] 82 | 83 | NU = negative_output['99.65%'] 84 | NL = negative_output['0.35%'] 85 | PU = positive_output['99.65%'] 86 | PL = positive_output['0.35%'] 87 | 88 | if NU == PU and NL == PL: 89 | hard_filters[analysis_feature]['filter_type'] = 'equal' 90 | hard_filters[analysis_feature]['accept_zone'] = [] 91 | hard_filters[analysis_feature]['reject_zone'] = [] 92 | hard_filters[analysis_feature]['unsure_zone'] = [[NL, NU]] 93 | elif NU >= PU and NL <= PL: 94 | hard_filters[analysis_feature]['filter_type'] = 'contain' 95 | hard_filters[analysis_feature]['accept_zone'] = [] 96 | hard_filters[analysis_feature]['reject_zone'] = [[NL, PL], [PU, NU]] 97 | hard_filters[analysis_feature]['unsure_zone'] = [[PL, PU]] 98 | elif NU < PU and NU > PL and NL < PL: 99 | hard_filters[analysis_feature]['filter_type'] = 'intersect' 100 | hard_filters[analysis_feature]['accept_zone'] = [[NU, PU]] 101 | hard_filters[analysis_feature]['reject_zone'] = [[NL, PL]] 102 | hard_filters[analysis_feature]['unsure_zone'] = [[PL, NU]] 103 | elif NL > PL and NL < PU and NU > PU: 104 | hard_filters[analysis_feature]['filter_type'] = 'intersect' 105 | hard_filters[analysis_feature]['accept_zone'] = [[PL, NL]] 106 | hard_filters[analysis_feature]['reject_zone'] = [[PU, NU]] 107 | hard_filters[analysis_feature]['unsure_zone'] = [[NL, PU]] 108 | else: 109 | hard_filters[analysis_feature]['filter_type'] = 'undefine' 110 | hard_filters[analysis_feature]['accept_zone'] = [] 111 | hard_filters[analysis_feature]['reject_zone'] = [] 112 | hard_filters[analysis_feature]['unsure_zone'] = [] 113 | # input('Press ENTER to continue...') 114 | print(hard_filters) 115 | 116 | print('start testing...') 117 | 118 | # Test data 119 | 120 | def filter(feature_value, filters): 121 | 122 | feature_value = float(feature_value) 123 | 124 | possibility = 0.5 125 | 126 | if len(filters['accept_zone']) != 0: 127 | for r in filters['accept_zone']: 128 | if feature_value >= float(r[0]) and feature_value <= float(r[1]): 129 | possibility = 1 130 | return possibility 131 | 132 | if len(filters['reject_zone']) != 0: 133 | for r in filters['reject_zone']: 134 | if feature_value >= float(r[0]) and feature_value <= float(r[1]): 135 | possibility = 0 136 | return possibility 137 | 138 | return possibility 139 | 140 | if dataset == 'split': 141 | g_output_dir = './output/final/split/' 142 | output_csv_path = g_output_dir + 'split_810_test_result.csv' 143 | 144 | g_test_data_dir = './data/final/split/' 145 | test_data_csv_path = g_test_data_dir + 'feature_810_all.csv' 146 | 147 | output_csv_header = ['id', 'location', 'image', 'size', 'pole', 'mean', 'stddev', 'b_mean', 'g_mean', 'r_mean', 'b_stddev', 'g_stddev', 'r_stddev', 'square', 'ratiowh', 'ratioarea', 'approxlen', 'numangle', 'numangle90', 'numangle70', 'label', 'hard_pred_label', 'hard_pred_pos'] 148 | elif dataset == 'non-split': 149 | g_output_dir = './output/final/non_split/' 150 | output_csv_path = g_output_dir + 'non_split_test_result.csv' 151 | 152 | g_test_data_dir = './data/final/non_split/' 153 | test_data_csv_path = g_test_data_dir + 'feature_test_all.csv' 154 | 155 | output_csv_header = ['id', 'image', 'size', 'pole', 'mean', 'stddev', 'b_mean', 'g_mean', 'r_mean', 'b_stddev', 'g_stddev', 'r_stddev', 'square', 'ratiowh', 'ratioarea', 'approxlen', 'numangle', 'numangle90', 'numangle70', 'label', 'hard_pred_label', 'hard_pred_pos'] 156 | else: 157 | print('No dataset is selected.') 158 | exit() 159 | 160 | with open(output_csv_path, 'a') as output_csv_file: 161 | writer = csv.DictWriter(output_csv_file, fieldnames=output_csv_header) 162 | writer.writeheader() 163 | output_csv_file.close() 164 | 165 | with open(test_data_csv_path, newline='') as test_data_csv_file: 166 | reader = csv.DictReader(test_data_csv_file) 167 | for row in reader: 168 | predict_label = 0 169 | predict_possibility = 0 170 | 171 | total_possibility = 0 172 | 173 | test_result = {} 174 | 175 | test_result['id'] = row['id'] 176 | if dataset == 'split': 177 | test_result['location'] = row['location'] 178 | test_result['image'] = row['image'] 179 | test_result['pole'] = row['pole'] 180 | test_result['label'] = row['label'] 181 | 182 | for analysis_feature in analysis_features: 183 | test_result[analysis_feature] = filter(row[analysis_feature], hard_filters[analysis_feature]) 184 | if test_result[analysis_feature] == 1: 185 | predict_label = 1 186 | 187 | total_possibility += test_result[analysis_feature] 188 | # input('Press ENTER to continue...') 189 | 190 | test_result['hard_pred_label'] = predict_label 191 | 192 | if predict_label == 1: 193 | test_result['hard_pred_pos'] = 1 194 | else: 195 | total_possibility += float(row['pole']) / 2 196 | test_result['hard_pred_pos'] = total_possibility / number_of_features 197 | 198 | with open(output_csv_path, 'a') as output_csv_file: 199 | writer = csv.writer(output_csv_file) 200 | if dataset == 'split': 201 | writer.writerow([ test_result['id'], test_result['location'], test_result['image'], test_result['size'], test_result['pole'], test_result['mean'], test_result['stddev'], test_result['b_mean'], test_result['g_mean'], test_result['r_mean'], test_result['b_stddev'], test_result['g_stddev'], test_result['r_stddev'], test_result['square'], test_result['ratiowh'], test_result['ratioarea'], test_result['approxlen'], test_result['numangle'], test_result['numangle90'], test_result['numangle70'], test_result['label'], test_result['hard_pred_label'], test_result['hard_pred_pos']]) 202 | if dataset == 'non-split': 203 | writer.writerow([ test_result['id'], test_result['image'], test_result['size'], test_result['pole'], test_result['mean'], test_result['stddev'], test_result['b_mean'], test_result['g_mean'], test_result['r_mean'], test_result['b_stddev'], test_result['g_stddev'], test_result['r_stddev'], test_result['square'], test_result['ratiowh'], test_result['ratioarea'], test_result['approxlen'], test_result['numangle'], test_result['numangle90'], test_result['numangle70'], test_result['label'], test_result['hard_pred_label'], test_result['hard_pred_pos']]) 204 | output_csv_file.close() 205 | 206 | test_data_csv_file.close() 207 | 208 | print('finished') 209 | 210 | 211 | -------------------------------------------------------------------------------- /models/thresholding/thresholding_model_generator.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | import seaborn as sns 4 | import matplotlib.pyplot as plt 5 | 6 | import csv 7 | 8 | # dataset = 'split' 9 | dataset = 'non-split' 10 | 11 | 12 | if dataset == 'split': 13 | col_names = ['id', 'location', 'image', 'size', 'pole', 'mean', 'stddev', 'b_mean', 'g_mean', 'r_mean', 'b_stddev', 'g_stddev', 'r_stddev', 'square', 'ratiowh', 'ratioarea', 'approxlen', 'numangle', 'numangle90', 'numangle70', 'label', 'vgg_pro', 'vgg_class'] 14 | #split training data path 15 | training_data_csv_path = "./feature_17_all.csv" 16 | 17 | g_outputDir = './final/split/' 18 | csv_path = g_outputDir + 'split_data_hard_filters.csv' 19 | elif dataset == 'non-split': 20 | col_names = ['id', 'image', 'size', 'pole', 'mean', 'stddev', 'b_mean', 'g_mean', 'r_mean', 'b_stddev', 'g_stddev', 'r_stddev', 'square', 'ratiowh', 'ratioarea', 'approxlen', 'numangle', 'numangle90', 'numangle70', 'label', 'vgg_pro', 'vgg_class'] 21 | # non-split training data path 22 | training_data_csv_path = "./final/non_split/feature_train_all.csv" 23 | 24 | g_outputDir = './output/final/non_split/' 25 | csv_path = g_outputDir + 'non_split_data_hard_filters.csv' 26 | else: 27 | print('No dataset is selected.') 28 | exit() 29 | 30 | data = pd.read_csv(training_data_csv_path, names=col_names) 31 | data = data.dropna() 32 | 33 | positive_sample_set = data[data['label'] == 1.0] 34 | negative_sample_set = data[data['label'] == 0.0] 35 | 36 | analysis_features = ['size', 'mean', 'stddev', 'b_mean', 'g_mean', 'r_mean', 'b_stddev', 'g_stddev', 'r_stddev', 'square', 'ratiowh', 'ratioarea', 'approxlen', 'numangle', 'numangle90', 'numangle70'] 37 | # analysis_features = ['size'] 38 | 39 | labels = ['mean', 'std', 'min', 'max', '50%', '25%','75%'] 40 | 41 | def get_whiskers(feature_array): 42 | Q1, median, Q3 = np.percentile(np.asarray(feature_array), [25, 50, 75]) 43 | 44 | IQR = Q3 - Q1 45 | 46 | loval = Q1 - 1.5 * IQR 47 | hival = Q3 + 1.5 * IQR 48 | 49 | upper_wisk_set = np.compress(feature_array <= hival, feature_array) 50 | lower_wisk_set = np.compress(feature_array >= loval, feature_array) 51 | upper_wisk = np.max(upper_wisk_set) 52 | lower_wisk = np.min(lower_wisk_set) 53 | 54 | return [lower_wisk, upper_wisk] 55 | 56 | csv_header = ['feature', 'filter_type', 'accept_zone', 'reject_zone', 'unsure_zone'] 57 | with open(csv_path, 'a') as csv_file: 58 | writer = csv.DictWriter(csv_file, fieldnames=csv_header) 59 | writer.writeheader() 60 | csv_file.close() 61 | 62 | hard_filters = {} 63 | 64 | for analysis_feature in analysis_features: 65 | 66 | hard_filters[analysis_feature] = {} 67 | 68 | positive_sample_set_description = positive_sample_set[analysis_feature].describe() 69 | print('positive_sample_set:') 70 | 71 | positive_output = {} 72 | 73 | for l in labels: 74 | positive_output[l] = positive_sample_set_description[l] 75 | 76 | positive_whis = get_whiskers(positive_sample_set[analysis_feature]) 77 | positive_output['0.35%'] = positive_whis[0] 78 | positive_output['99.65%'] = positive_whis[1] 79 | 80 | print(positive_output) 81 | 82 | ############ 83 | 84 | negative_sample_set_description = negative_sample_set[analysis_feature].describe() 85 | print('negative_sample_set:') 86 | 87 | negative_output = {} 88 | 89 | for l in labels: 90 | negative_output[l] = negative_sample_set_description[l] 91 | 92 | negative_whis = get_whiskers(negative_sample_set[analysis_feature]) 93 | negative_output['0.35%'] = negative_whis[0] 94 | negative_output['99.65%'] = negative_whis[1] 95 | 96 | print(negative_output) 97 | 98 | NU = negative_output['99.65%'] 99 | NL = negative_output['0.35%'] 100 | PU = positive_output['99.65%'] 101 | PL = positive_output['0.35%'] 102 | 103 | if NU == PU and NL == PL: 104 | hard_filters[analysis_feature]['filter_type'] = 'equal' 105 | hard_filters[analysis_feature]['accept_zone'] = [] 106 | hard_filters[analysis_feature]['reject_zone'] = [] 107 | hard_filters[analysis_feature]['unsure_zone'] = [[NL, NU]] 108 | elif NU >= PU and NL <= PL: 109 | hard_filters[analysis_feature]['filter_type'] = 'contain' 110 | hard_filters[analysis_feature]['accept_zone'] = [] 111 | hard_filters[analysis_feature]['reject_zone'] = [[NL, PL], [PU, NU]] 112 | hard_filters[analysis_feature]['unsure_zone'] = [[PL, PU]] 113 | elif NU < PU and NU > PL and NL < PL: 114 | hard_filters[analysis_feature]['filter_type'] = 'intersect-1over0' 115 | hard_filters[analysis_feature]['accept_zone'] = [[NU, PU]] 116 | hard_filters[analysis_feature]['reject_zone'] = [[NL, PL]] 117 | hard_filters[analysis_feature]['unsure_zone'] = [[PL, NU]] 118 | elif NL > PL and NL < PU and NU > PU: 119 | hard_filters[analysis_feature]['filter_type'] = 'intersect-0over1' 120 | hard_filters[analysis_feature]['accept_zone'] = [[PL, NL]] 121 | hard_filters[analysis_feature]['reject_zone'] = [[PU, NU]] 122 | hard_filters[analysis_feature]['unsure_zone'] = [[NL, PU]] 123 | else: 124 | hard_filters[analysis_feature]['filter_type'] = 'undefine' 125 | hard_filters[analysis_feature]['accept_zone'] = [] 126 | hard_filters[analysis_feature]['reject_zone'] = [] 127 | hard_filters[analysis_feature]['unsure_zone'] = [] 128 | 129 | with open(csv_path, 'a') as csv_file: 130 | writer = csv.writer(csv_file) 131 | writer.writerow([analysis_feature, str(hard_filters[analysis_feature]['filter_type']), str(hard_filters[analysis_feature]['accept_zone']), str(hard_filters[analysis_feature]['reject_zone']), str(hard_filters[analysis_feature]['unsure_zone'])]) 132 | csv_file.close() 133 | 134 | print(hard_filters) 135 | 136 | 137 | # input('Press ENTER to continue...') 138 | 139 | 140 | -------------------------------------------------------------------------------- /models/vgg_process/data_preparation.py: -------------------------------------------------------------------------------- 1 | # Create dataset for panel and nopanel 2 | import os 3 | import cv2 4 | os.environ["CUDA_VISIBLE_DEVICES"] = "1" 5 | import glob as gb 6 | original_dataset_dir = './location17/' 7 | 8 | base_dir = './split/' 9 | if not os.path.exists(base_dir): 10 | os.mkdir(base_dir) 11 | 12 | # Create directories 13 | train_dir = os.path.join(base_dir,'train/') 14 | if not os.path.exists(train_dir): 15 | os.mkdir(train_dir) 16 | validation_dir = os.path.join(base_dir,'validation/') 17 | if not os.path.exists(validation_dir): 18 | os.mkdir(validation_dir) 19 | # test_dir = os.path.join(base_dir,'test/') 20 | # if not os.path.exists(test_dir): 21 | # os.mkdir(test_dir) 22 | 23 | train_panel_dir = os.path.join(train_dir,'panel/') 24 | if not os.path.exists(train_panel_dir): 25 | os.mkdir(train_panel_dir) 26 | 27 | train_nopanel_dir = os.path.join(train_dir,'nopanel/') 28 | if not os.path.exists(train_nopanel_dir): 29 | os.mkdir(train_nopanel_dir) 30 | 31 | validation_panel_dir = os.path.join(validation_dir,'panel/') 32 | if not os.path.exists(validation_panel_dir): 33 | os.mkdir(validation_panel_dir) 34 | 35 | validation_nopanel_dir = os.path.join(validation_dir, 'nopanel/') 36 | if not os.path.exists(validation_nopanel_dir): 37 | os.mkdir(validation_nopanel_dir) 38 | 39 | 40 | num = 0 41 | img_path = gb.glob("./panel_samesize/*.png") 42 | for path in img_path: 43 | img_name = path.split("/")[-1] 44 | 45 | img = cv2.imread(path) 46 | # 0,1,2,3,4,5,6, 47 | if ((num % 10) < 7): 48 | cv2.imwrite(os.path.join(train_panel_dir + img_name),img) 49 | # elif ((num % 10) > 6): 50 | # pass 51 | # cv2.imwrite(os.path.join(test_panel_dir +str(1) + img_name),img) 52 | else: 53 | cv2.imwrite(os.path.join(validation_panel_dir + img_name),img) 54 | num = num + 1 55 | num = 0 56 | img_path = gb.glob("./nopanel_undersample/*.png") 57 | for path in img_path: 58 | img_name = path.split("/")[-1] 59 | 60 | img = cv2.imread(path) 61 | if ((num % 10) < 7): 62 | cv2.imwrite(os.path.join(train_nopanel_dir +img_name),img) 63 | # elif ((num % 10) > 6): 64 | # cv2.imwrite(os.path.join(test_nopanel_dir +img_name),img) 65 | else: 66 | cv2.imwrite(os.path.join(validation_nopanel_dir +img_name),img) 67 | num = num + 1 68 | # Sanity checks 69 | print('total training panel images:', len(os.listdir(train_panel_dir))) 70 | print('total training nopanel images:', len(os.listdir(train_nopanel_dir))) 71 | print('total validation panel images:', len(os.listdir(validation_panel_dir))) 72 | print('total validation nopanel images:', len(os.listdir(validation_nopanel_dir))) 73 | -------------------------------------------------------------------------------- /models/vgg_process/metrics.py: -------------------------------------------------------------------------------- 1 | import math 2 | import csv 3 | 4 | 5 | def metric(panel_panel, panel_nopanel,nopanel_panel,nopanel_nopanel): 6 | metric = {} 7 | TP = panel_panel 8 | FN = panel_nopanel 9 | FP = nopanel_panel 10 | TN = nopanel_nopanel 11 | ACCURACY = float((TP + TN)/(TP + FP + FN + TN)) 12 | PRECISION = float(TP/(TP + FP)) 13 | RECALL = float(TP/(TP + FN)) 14 | F1 = float(2*PRECISION*RECALL/(PRECISION + RECALL)) 15 | MCC = float((TP * TN - FP * FN)/ math.sqrt((TP + FP) * (FN + TN) * (FP + TN) * (TP + FN))) 16 | SPECIFICITY = float(TN/(TN + FP)) 17 | metric['TP'] = float(TP/(TP + FN)) 18 | metric['FN'] = float(FN /(TP + FN)) 19 | metric['TN'] = float(TN /(TN + FP)) 20 | metric['FP'] =float(FP /(TN + FP)) 21 | metric['ACCURACY'] = ACCURACY 22 | metric['PRECISION'] =PRECISION 23 | metric['RECALL']= RECALL 24 | metric['F1'] = F1 25 | metric['MCC'] = MCC 26 | metric['SPECIFICITY'] = SPECIFICITY 27 | metric['description'] = 'vgg pure nosplit' 28 | print(metric) 29 | csvpath = './solarpanel/svm/metric.csv' 30 | with open(csvpath, 'a') as csvfile: 31 | writer = csv.writer(csvfile) 32 | writer.writerow([metric['description'],metric['TP'],metric['FN'],metric['TN'],metric['FP'],metric['ACCURACY'],metric['PRECISION'],metric['RECALL'],metric['F1'],metric['MCC'],metric['SPECIFICITY']]) 33 | csvfile.close() 34 | 35 | # call function by the number panel_panel, panel_nopanel, nopanel_panel,nopanel_nopanel 36 | # for exmaple 37 | metric(603,276,8671,15396) -------------------------------------------------------------------------------- /models/vgg_process/train_validation.py: -------------------------------------------------------------------------------- 1 | import csv 2 | import cv2 3 | 4 | csvpath_all = './feature_test.csv' 5 | with open(csvpath_all, 'a') as csvfile: 6 | myFields = ['id','image', 'size','pole','mean','square','ratiowh','ratioarea','approxlen','numangle','numangle90','numangle70','prediction','prediction_class','label'] 7 | writer = csv.DictWriter(csvfile, fieldnames=myFields) 8 | writer.writeheader() 9 | csvfile.close() 10 | 11 | csv_path = './vgg_predict-Copy2.csv' 12 | with open(csv_path, newline='') as csv_file: 13 | reader = csv.DictReader(csv_file) 14 | for row in reader: 15 | contour = row 16 | with open(csvpath_all, 'a') as csvfile: 17 | writer = csv.writer(csvfile) 18 | writer.writerow([contour['id'],contour['image'],contour['size'],contour['pole'],contour['mean'],contour['square'],contour['ratiowh'],contour['ratioarea'],contour['approxlen'],contour['numangle'],contour['numangle90'], contour['numangle70'],contour['prediction'],contour['prediction_class'],contour['label']]) 19 | csvfile.close() 20 | 21 | 22 | 23 | 24 | 25 | -------------------------------------------------------------------------------- /models/vgg_process/vgg_images_test.py: -------------------------------------------------------------------------------- 1 | # this file is to test the vgg model 2 | import sys 3 | import os 4 | os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID" 5 | os.environ["CUDA_VISIBLE_DEVICES"]="1" 6 | from keras.preprocessing.image import ImageDataGenerator 7 | import csv 8 | import numpy as np 9 | import math 10 | import cv2 11 | import tensorflow as tf 12 | import glob as gb 13 | import time 14 | import os 15 | import timeit 16 | 17 | start = timeit.default_timer() 18 | 19 | 20 | 21 | CATEGORIES = ["panel", "nopanel"] 22 | 23 | # Input dirs 24 | 25 | model_path = './final/split/' 26 | path = model_path 27 | model = tf.keras.models.load_model(os.path.join(path,'20191014-173338.hdf5')) 28 | 29 | panel_panel = 0 30 | panel_nopanel = 0 31 | nopanel_panel = 0 32 | nopanel_nopanel = 0 33 | # test the panel result 34 | panel_img_path = gb.glob("./location17/panel/*png") 35 | nopanel_img_path = gb.glob(".//location17/nopanel/*png") 36 | 37 | 38 | contour = {} 39 | csvpath = './location17/vgg_predict.csv' 40 | with open(csvpath, 'a') as csvfile: 41 | myFields = ['id','prediction','prediction_class','label'] 42 | writer = csv.DictWriter(csvfile, fieldnames=myFields) 43 | writer.writeheader() 44 | csvfile.close() 45 | 46 | num = 0 47 | contour = {} 48 | for path in panel_img_path: 49 | 50 | detected_path = path.split("/")[-1] 51 | contour['id'] = detected_path.split(".")[0] 52 | img = cv2.imread(path) 53 | # print(img.shape) 54 | IMG_SIZE = 150 55 | img1 = cv2.resize(img, (IMG_SIZE, IMG_SIZE)) 56 | testimg = (img1.reshape(-1, IMG_SIZE, IMG_SIZE, 3)).astype('int32')/255 57 | prediction_class = model.predict_classes(testimg) 58 | prediction = model.predict(testimg) 59 | contour['prediction'] = prediction[0][0] 60 | contour['prediction_class'] = prediction_class[0][0] 61 | contour['label'] = 1 62 | if ((prediction_class[0][0]) == 1): 63 | panel_panel = panel_panel + 1 64 | else: 65 | panel_nopanel = panel_nopanel + 1 66 | 67 | with open(csvpath, 'a') as csvfile: 68 | writer = csv.writer(csvfile) 69 | writer.writerow([contour['id'],contour['prediction'],contour['prediction_class'],contour['label']]) 70 | csvfile.close() 71 | 72 | 73 | TP = panel_panel 74 | FN = panel_nopanel 75 | # test no panel result 76 | 77 | for path in nopanel_img_path: 78 | detected_path = path.split("/")[-1] 79 | contour['id'] = detected_path.split(".")[0] 80 | img = cv2.imread(path) 81 | IMG_SIZE = 150 82 | img1 = cv2.resize(img, (IMG_SIZE, IMG_SIZE)) 83 | testimg = (img1.reshape(-1, IMG_SIZE, IMG_SIZE, 3)).astype('int32')/255 84 | prediction_class = model.predict_classes(testimg) 85 | prediction = model.predict(testimg) 86 | contour['prediction'] = prediction[0][0] 87 | contour['prediction_class'] = prediction_class[0][0] 88 | contour['label'] = 0 89 | with open(csvpath, 'a') as csvfile: 90 | writer = csv.writer(csvfile) 91 | writer.writerow([contour['id'],contour['prediction'],contour['prediction_class'],contour['label']]) 92 | csvfile.close() 93 | 94 | if ((prediction_class[0][0]) == 1): 95 | nopanel_panel = nopanel_panel + 1 96 | else: 97 | nopanel_nopanel = nopanel_nopanel + 1 98 | 99 | TN = nopanel_nopanel 100 | FP = nopanel_panel 101 | 102 | stop = timeit.default_timer() 103 | time = {} 104 | time['description'] = 'get vgg prediction on location17' 105 | time['time'] = stop - start 106 | csv_path = './final/time.csv' 107 | with open(csv_path, 'a') as csvfile: 108 | writer = csv.writer(csvfile) 109 | writer.writerow([time['description'],time['time']]) 110 | csvfile.close() 111 | print('Time: ', stop - start) 112 | print(TP, FN,TN ,FP) 113 | 114 | 115 | 116 | -------------------------------------------------------------------------------- /models/vgg_process/vgg_images_train.py: -------------------------------------------------------------------------------- 1 | # this is used to trian the vgg model to classify panel and nopanel 2 | import keras 3 | import numpy 4 | import os 5 | os.environ["CUDA_VISIBLE_DEVICES"] = "0" 6 | import sys 7 | from keras.preprocessing.image import ImageDataGenerator 8 | from keras import optimizers 9 | import keras 10 | from keras import models 11 | from keras import layers 12 | from keras.callbacks import TensorBoard 13 | from keras.applications import VGG16 14 | import datetime 15 | from keras.callbacks import EarlyStopping 16 | from keras.callbacks import ModelCheckpoint 17 | 18 | # data to train vgg model 19 | 20 | # Input dirs 21 | 22 | workspace_dir = './dataset' 23 | 24 | original_dataset_dir = os.path.join(workspace_dir, 'contours') 25 | 26 | train_dir = os.path.join(original_dataset_dir, 'train') 27 | 28 | validation_dir = os.path.join(original_dataset_dir, 'validation') 29 | 30 | train_panel_dir = os.path.join(train_dir, 'panel') 31 | 32 | train_nopanel_dir = os.path.join(train_dir, 'nopanel') 33 | 34 | validation_panel_dir = os.path.join(validation_dir, 'panel') 35 | 36 | validation_nopanel_dir = os.path.join(validation_dir, 'nopanel') 37 | 38 | # Output dirs 39 | 40 | training_model_output_dir = './solar_panel/smalldata/' 41 | 42 | training_log_dir = './solar_panel/smalldata/' 43 | 44 | model_output_dir = './solar_panel/smalldata/' 45 | 46 | # pretrained model imagenet 47 | conv_base = VGG16(weights='imagenet', 48 | include_top=False, 49 | input_shape=(150, 150, 3)) 50 | 51 | NAME = "VGG-16_pretrain_1" 52 | print(NAME) 53 | 54 | # add the last sequential 55 | model = models.Sequential() 56 | model.add(conv_base) 57 | model.add(layers.Flatten()) 58 | model.add(layers.Dense(256, activation='relu')) 59 | model.add(layers.Dense(1, activation='sigmoid')) 60 | 61 | conv_base.trainable = True 62 | 63 | set_trainable = False 64 | 65 | print('trainable weights is :', len(model.trainable_weights)) 66 | 67 | train_datagen = ImageDataGenerator( 68 | rescale=1. / 255, 69 | rotation_range=40, 70 | width_shift_range=0.2, 71 | height_shift_range=0.2, 72 | shear_range=0.2, 73 | zoom_range=0.2, 74 | horizontal_flip=True, 75 | fill_mode='nearest') 76 | 77 | test_datagen = ImageDataGenerator(rescale=1. / 255) 78 | 79 | train_generator = train_datagen.flow_from_directory( 80 | train_dir, 81 | target_size=(150, 150), 82 | batch_size=32, 83 | class_mode='binary') 84 | 85 | validation_generator = test_datagen.flow_from_directory( 86 | validation_dir, 87 | target_size=(150, 150), 88 | batch_size=32, 89 | class_mode='binary') 90 | 91 | 92 | # model.compile(loss='binary_crossentropy', optimizer=optimizers.RMSprop(lr=2e-5), ) 93 | model.compile(loss='binary_crossentropy', optimizer=optimizers.RMSprop(lr=2e-5), metrics=['acc']) 94 | # use checkpointer to stop trainnig early 95 | checkpointer = ModelCheckpoint(filepath = training_model_output_dir + datetime.datetime.now().strftime("%Y%m%d-%H%M%S") + ".hdf5", verbose=1, save_best_only=True) 96 | earlystopper = EarlyStopping(monitor='val_loss', patience=20, verbose=1) 97 | log_dir = training_log_dir + datetime.datetime.now().strftime("%Y%m%d-%H%M%S") 98 | print (log_dir) 99 | tensorboard_callback = keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=0, write_graph=True, write_images=True) 100 | callbacks = [ checkpointer,earlystopper,tensorboard_callback] 101 | 102 | history = model.fit_generator( 103 | train_generator, 104 | samples_per_epoch=1000, 105 | epochs=50, 106 | validation_data=validation_generator, 107 | validation_steps=50, 108 | verbose=2, 109 | callbacks=callbacks) 110 | path = model_output_dir 111 | model.save(os.path.join(path ,'VGG16_pretrain_all.model')) 112 | 113 | print('finish') 114 | 115 | sys.stdout.flush() 116 | 117 | 118 | 119 | 120 | 121 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | cycler==0.10.0 2 | kiwisolver==1.1.0 3 | matplotlib==3.0.3 4 | numpy==1.17.2 5 | opencv-python==4.1.1.26 6 | pandas==0.25.1 7 | pyparsing==2.4.2 8 | python-dateutil==2.8.0 9 | pytz==2019.3 10 | scipy==1.3.1 11 | seaborn==0.9.0 12 | six==1.12.0 13 | -------------------------------------------------------------------------------- /result_presentation/10location_accuray.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | <<<<<<< HEAD 3 | df = pd.read_csv("./data/feature_test_all_vgg_svm_linear.csv") 4 | 5 | for i in range(1,11): 6 | data = pd.read_csv('./finaltest/data/10locations/location' + str(i) + '.csv') 7 | ======= 8 | df = pd.read_csv("") 9 | for i in range(1,11): 10 | data = pd.read_csv('' + str(i) + '.csv') 11 | >>>>>>> 39db66de7b321f1d8347e674b5c8fa5f34ff3b62 12 | y_predict = data.linear_nosplit_class 13 | y_test =data.label 14 | print(confusion_matrix(y_test, y_predict)) 15 | tn, fp, fn, tp = confusion_matrix(y_test, y_predict, labels=[0,1]).ravel() 16 | <<<<<<< HEAD 17 | with open('./finaltest/data/10locations/10location.csv', 'a') as csvfile: 18 | ======= 19 | with open('', 'a') as csvfile: 20 | >>>>>>> 39db66de7b321f1d8347e674b5c8fa5f34ff3b62 21 | writer = csv.writer(csvfile) 22 | writer.writerow(['location'+str(i),tn,fp,fn,tp]) 23 | csvfile.close() 24 | -------------------------------------------------------------------------------- /result_presentation/contours_extraction.py: -------------------------------------------------------------------------------- 1 | # OpenCV lib 2 | import os 3 | import tensorflow as tf 4 | import cv2 5 | from skimage.segmentation import slic 6 | from skimage import color 7 | from skimage import data 8 | from skimage import io 9 | # Traverse files 10 | import glob as gb 11 | import tensorflow as tf 12 | # Math lib 13 | import numpy as np 14 | import time 15 | import matplotlib.pyplot as plt 16 | import matplotlib.gridspec as gridspec 17 | import math 18 | import csv 19 | 20 | from matplotlib.pyplot import imshow 21 | import matplotlib.pyplot as plt 22 | import matplotlib.image as mpimg 23 | 24 | 25 | def kmeans(img): 26 | # K-means 27 | # Convert image to one dimension data 28 | img_ori = img.copy() 29 | img = cv2.cvtColor(img, cv2.COLOR_RGB2HSV) 30 | Z = img.reshape((-1, 3)) 31 | # Z = img.reshape((-1, 3)) 32 | Z = np.float32(Z) 33 | # define criteria, number of clusters(K) and apply kmeans() 34 | criteria = (cv2.TERM_CRITERIA_EPS + cv2.TERM_CRITERIA_MAX_ITER, 10, 1.0) 35 | K = 5 36 | # Run k-means 37 | # ret: compactness 38 | # labels: 39 | # centers: array of centers of clusters 40 | ret, label, center = cv2.kmeans(Z, K, None, criteria, 10, cv2.KMEANS_RANDOM_CENTERS) 41 | # Now convert back into uint8, and make original image 42 | center = np.uint8(center) 43 | res = center[label.flatten()] 44 | res2 = res.reshape(img.shape) 45 | res2_gray = cv2.cvtColor(res2, cv2.COLOR_BGR2GRAY) 46 | 47 | hist = res2_gray.ravel() 48 | hist = set(hist) 49 | hist = sorted(hist) 50 | # print(len(hist)) 51 | threshold = [] 52 | tag = [] 53 | tag1 = [] 54 | tag_dilate3 = [] 55 | tag_dilate5 = [] 56 | tag_dilate7 = [] 57 | tag_close3 = [] 58 | tag_close5 = [] 59 | tag_close7 = [] 60 | for i in range(len(hist) - 1): 61 | threshold.append(int(hist[i] / 2 + hist[i + 1] / 2)) 62 | # no dilate , not accurate 63 | kernal3 = cv2.getStructuringElement(cv2.MORPH_RECT, (3, 3)) 64 | kernal5 = cv2.getStructuringElement(cv2.MORPH_RECT, (5, 5)) 65 | kernal7 = cv2.getStructuringElement(cv2.MORPH_RECT, (7, 7)) 66 | for j in range(len(hist) - 1): 67 | if j == (len(hist) - 2): 68 | dia = cv2.inRange(res2_gray, threshold[j], 255) 69 | tag.append(dia) 70 | tag_dilate3.append(cv2.dilate(dia, kernal3, iterations=1)) 71 | tag_dilate5.append(cv2.dilate(dia, kernal5, iterations=1)) 72 | tag_dilate7.append(cv2.dilate(dia, kernal7, iterations=1)) 73 | else: 74 | dia = cv2.inRange(res2_gray, threshold[j], threshold[j + 1]) 75 | tag.append(dia) 76 | tag_dilate3.append(cv2.dilate(dia, kernal3, iterations=1)) 77 | tag_dilate5.append(cv2.dilate(dia, kernal5, iterations=1)) 78 | tag_dilate7.append(cv2.dilate(dia, kernal7, iterations=1)) 79 | 80 | for j in range(len(hist) - 1): 81 | if j == (len(hist) - 2): 82 | dia1 = cv2.inRange(res2_gray, threshold[j], 255) 83 | tag1.append(dia1) 84 | 85 | tag_close3.append(cv2.morphologyEx(dia1, cv2.MORPH_CLOSE, kernal3)) 86 | tag_close5.append(cv2.morphologyEx(dia1, cv2.MORPH_CLOSE, kernal5)) 87 | tag_close7.append(cv2.morphologyEx(dia1, cv2.MORPH_CLOSE, kernal7)) 88 | else: 89 | dia1 = cv2.inRange(res2_gray, threshold[j], threshold[j + 1]) 90 | tag1.append(dia1) 91 | tag_close3.append(cv2.morphologyEx(dia1, cv2.MORPH_CLOSE, kernal3)) 92 | tag_close5.append(cv2.morphologyEx(dia1, cv2.MORPH_CLOSE, kernal5)) 93 | tag_close7.append(cv2.morphologyEx(dia1, cv2.MORPH_CLOSE, kernal7)) 94 | 95 | # return(tag,tag_dilate3,tag_close3, tag_dilate5,tag_close5, tag_dilate7, tag_close7 ,hist) 96 | return (tag, hist, tag_close3, tag_dilate5, tag_close5, tag_dilate7, tag_close7, hist) 97 | 98 | 99 | # the kernel number is returned , use kernel 3 temporiarly. 100 | 101 | # find contours based on kmeans method 102 | def find_contours(img, mask_list): 103 | # Get the area of roof 104 | masks_length = len(mask_list) 105 | cont = [] 106 | for i in range(0, masks_length): 107 | c, h = cv2.findContours(mask_list[i], cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE) 108 | for contour in c: 109 | cont.append(contour) 110 | # cv2.drawContours(img, cont, -1, (0, 0, 255), 2) 111 | # cv2.imshow('img', img) 112 | # cv2.waitKey(0) 113 | return [img, cont] 114 | 115 | 116 | # use size filter 117 | def filter_size(img, contour): 118 | image_grayscale = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) 119 | roof_area = cal_roofarea(image_grayscale)[0] 120 | cont = [] 121 | for c in contour: 122 | area = cv2.contourArea(c) 123 | if (area > 0): 124 | ratio = area / roof_area 125 | if ((area > 800) & (ratio < 0.5)): 126 | cont.append(c) 127 | cv2.drawContours(img, cont, -1, (0, 0, 255), 2) 128 | areas = [] 129 | for i, co in enumerate(cont): 130 | areas.append((i, cv2.contourArea(co), co)) 131 | 132 | a2 = sorted(areas, key=lambda d: d[1], reverse=True) 133 | # cv2.drawContours(img, a2, -1, (0, 0, 255), 2) 134 | cv2.imshow('img',img) 135 | cv2.waitKey(0) 136 | cv2.imwrite('./solar_panel/show/47.png',img) 137 | return [img, a2] 138 | 139 | 140 | # calculate the roof area so we can remove a part of the contours 141 | def cal_roofarea(image): 142 | black = cv2.threshold(image, 0, 255, 0)[1] 143 | contours, hierarchy = cv2.findContours(black, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE) 144 | # cv2.drawContours(img, contours, -1, (255, 0, 0), 2) 145 | area = [cv2.contourArea(c) for c in contours] 146 | roof_index = np.argmax(area) 147 | roof_cnt = contours[roof_index] 148 | # contourArea will return the wrong value if the contours are self-intersections 149 | roof_area = cv2.contourArea(roof_cnt) 150 | # print('roof area = '+ str(roof_area)) 151 | return (roof_area, roof_cnt) 152 | 153 | 154 | # calculate the mean pixel value in the contours 155 | def getContourStat(img, contour): 156 | mask = np.zeros(img.shape, dtype="uint8") 157 | cv2.drawContours(mask, [contour], -1, 255, -1) 158 | mean, stddev = cv2.meanStdDev(img, mask=mask) 159 | return mean, stddev 160 | 161 | 162 | # use to show the result of kmeans 163 | 164 | def get_mask(img, mask_list): 165 | masks_length = len(mask_list) 166 | mask_color = [(255, 0, 0), (0, 255, 0), (0, 0, 255), (255, 255, 255), (128, 128, 128), (0, 0, 0)] 167 | for i in range(0, masks_length): 168 | img[mask_list[i] != 0] = mask_color[i] 169 | return img 170 | 171 | 172 | def pole(img, contour): 173 | ori_img = img.copy() 174 | image_grayscale = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) 175 | cont = cal_roofarea(image_grayscale)[1] 176 | cv2.drawContours(ori_img, cont, -1, (255, 0, 0), 3) 177 | # print(len(contour)) 178 | contour_res = [] 179 | back = 1 180 | cnt = contour 181 | leftmost = tuple(cnt[cnt[:, :, 0].argmin()][0]) 182 | rightmost = tuple(cnt[cnt[:, :, 0].argmax()][0]) 183 | topmost = tuple(cnt[cnt[:, :, 1].argmin()][0]) 184 | bottommost = tuple(cnt[cnt[:, :, 1].argmax()][0]) 185 | pole = [leftmost, rightmost, topmost, bottommost] 186 | for point in pole: 187 | # check the distance with contours, biggest contour 188 | # when it is negative, means the point is outside the contours 189 | dist = cv2.pointPolygonTest(cont, point, True) 190 | # print(dist) 191 | if (dist <= 0): 192 | back = 0 193 | else: 194 | pass 195 | 196 | return (ori_img, contour, back) 197 | 198 | 199 | def rotate_rectangle(img_name, img, contour): 200 | shape = {} 201 | shape['id'] = img_name 202 | # for c in contour: 203 | c = contour 204 | 205 | area = cv2.contourArea(c) 206 | x, y, w, h = cv2.boundingRect(c) 207 | ratiowh = min(float(w / h), float(h / w)) 208 | shape['ratiowh'] = ratiowh 209 | 210 | ratioarea = float(area / (w * h)) 211 | shape['ratioarea'] = ratioarea 212 | 213 | epsilon = 0.01 * cv2.arcLength(c, True) 214 | approx = cv2.approxPolyDP(c, epsilon, True) 215 | 216 | approxlen = len(approx) 217 | shape['approxlen'] = approxlen 218 | 219 | # the original num set to be -1 to be different no operation 220 | num_angle = 0 221 | num_angle90 = -1 222 | num_angle80 = -1 223 | num_angle70 = -1 224 | 225 | mask = np.zeros(img.shape, np.uint8) 226 | cv2.drawContours(mask, [approx], -1, (255, 255, 255), -1) 227 | cv2.drawContours(img, [approx], -1, (255, 255, 255), 2) 228 | # mask = np.concatenate((mask, mask, mask), axis=-1) 229 | gray = cv2.cvtColor(mask, cv2.COLOR_BGR2GRAY) 230 | contour_list = [] 231 | ret, thresh = cv2.threshold(gray, 100, 255, cv2.THRESH_BINARY) 232 | contours, hierarchy = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE) 233 | # get the list of contours 234 | for points in contours[0]: 235 | x, y = points.ravel() 236 | contour_list.append([x, y]) 237 | corners = cv2.goodFeaturesToTrack(gray, 50, 0.01, 10) 238 | corners = np.int0(corners) 239 | for i in corners: 240 | x, y = i.ravel() 241 | # decide whether the corner is on the contours 242 | if (cv2.pointPolygonTest(contours[0], (x, y), True) == 0): 243 | center_index = contour_list.index([x, y]) 244 | length = len(contour_list) 245 | # get the point three before, and ignore the end point 246 | a_index = center_index - 5 247 | b_index = center_index + 5 248 | if ((a_index > 0) & (b_index > 0) & (a_index < length) & (b_index < length)): 249 | xa, ya = contour_list[a_index] 250 | xb, yb = contour_list[b_index] 251 | # print(x , y) 252 | # print(xa, ya) 253 | a = math.sqrt((x - xa) * (x - xa) + (y - ya) * (y - ya)) 254 | b = math.sqrt((x - xb) * (x - xb) + (y - yb) * (y - yb)) 255 | c = math.sqrt((xa - xb) * (xa - xb) + (ya - yb) * (ya - yb)) 256 | if ((a > 0) & (b > 0)): 257 | if (((a * a + b * b - c * c) / (2 * a * b)) < 1) & (((a * a + b * b - c * c) / (2 * a * b) > -1)): 258 | angle = math.degrees(math.acos((a * a + b * b - c * c) / (2 * a * b))) 259 | num_angle = num_angle + 1 260 | # print(angle) 261 | if (angle < 90): 262 | num_angle90 = num_angle90 + 1 263 | if (angle < 80): 264 | num_angle80 = num_angle80 + 1 265 | if (angle < 70): 266 | num_angle70 = num_angle70 + 1 267 | cv2.circle(img, (x, y), 5, 255, -1) 268 | 269 | shape['numangle'] = num_angle 270 | shape['numangle90'] = num_angle90 271 | shape['numangle80'] = num_angle80 272 | shape['numangle70'] = num_angle70 273 | # print(shape) 274 | # with open(csv_path, 'a') as csv_file: 275 | # writer = csv.writer(csv_file) 276 | # # writer.writerow(['image_id','size','pole','mean','square']) 277 | # writer.writerow([shape['id'],shape['ratiowh'], shape['ratioarea'],shape['approxlen'],shape['numangle'],shape['numangle90'],shape['numangle80'],shape['numangle70']]) 278 | # # for key, value in contour.items(): 279 | # # writer.writerow([key, value]) 280 | # csv_file.close() 281 | 282 | return (shape) 283 | 284 | 285 | def mean(img, contour): 286 | cont_res = [] 287 | ori_img = img.copy() 288 | 289 | img = cv2.cvtColor(img, cv2.COLOR_RGB2HSV) 290 | image_grayscale = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) 291 | mean_filter = 0 292 | c = contour 293 | mean = getContourStat(image_grayscale, c)[0] 294 | hist = kmeans(img)[1] 295 | if (mean[0][0] <= (hist[2] + 5)): 296 | # mean = 1 means panel 297 | mean_filter = 1 298 | 299 | else: 300 | # pass 301 | mean_filter = 0 302 | # print(mean) 303 | # cv2.drawContours(ori_img, cont_res, -1, (0, 0, 255), -1) 304 | return (ori_img, cont_res, mean_filter) 305 | 306 | 307 | def main(): 308 | img_path = gb.glob("./solar_panel/data/panel/3.png") 309 | 310 | # store the information of contours(the label) 311 | for path in img_path: 312 | contour = {} 313 | img_name = path.split("/")[-1] 314 | img_name = img_name.split(".")[0] 315 | # print(img_name) 316 | # original image 317 | img = cv2.imread(path) 318 | # this is to show the contours so we can label right 319 | img_contour = img.copy() 320 | # tag = kmeans(img.copy())[2] 321 | tag = kmeans(img)[2] 322 | # masks = get_mask(img, tag) 323 | # get the contours 324 | img_contours = find_contours(img, tag)[0] 325 | contours = find_contours(img, tag)[1] 326 | # filter: to remove the contours which is less than 1 block of solar panel 327 | img_size = filter_size(img, contours)[0] 328 | contourinfo = filter_size(img, contours)[1] 329 | # conotur_num is to tag the contours on the image 330 | contour_num = 0 331 | rank = 0 332 | 333 | for i, area, c in contourinfo: 334 | 335 | vgg_image = img.copy() 336 | mask = np.zeros_like(img) 337 | 338 | img2gray = cv2.cvtColor(mask, cv2.COLOR_BGR2GRAY) 339 | 340 | mask = cv2.drawContours(img2gray, [c], 0, (255, 255, 255), -1) 341 | # cv2.imshow('mask', mask) 342 | # cv2.waitKey(0) 343 | # cv2.destroyAllWindows() 344 | # get second masked value (background) mask must be inverted 345 | img_result = cv2.bitwise_or(vgg_image, vgg_image, mask=mask) 346 | # cv2.imshow('img',img_result) 347 | # cv2.waitKey(0) 348 | # cv2.destroyAllWindows() 349 | mask = cv2.bitwise_not(mask) 350 | background = np.zeros_like(img) 351 | # Fill image with color 352 | background[:] = (255, 0, 255) 353 | # background = np.full(img.shape, 255, dtype=np.uint8) 354 | bk = cv2.bitwise_or(background, background, mask=mask) 355 | final = cv2.bitwise_or(img_result, bk) 356 | <<<<<<< HEAD 357 | cv2.imwrite(('./solar_panel/show/' + str(i) + '.png'),final) 358 | ======= 359 | cv2.imwrite(('' + str(i) + '.png'),final) 360 | >>>>>>> 39db66de7b321f1d8347e674b5c8fa5f34ff3b62 361 | 362 | print('finish') 363 | 364 | 365 | main() 366 | 367 | 368 | 369 | 370 | 371 | 372 | -------------------------------------------------------------------------------- /result_presentation/csv_calculate.py: -------------------------------------------------------------------------------- 1 | import pandas 2 | import pandas as pd 3 | import pickle 4 | import numpy as np 5 | import csv 6 | image_panel = [] 7 | image_nopanel = [] 8 | vgg_panel_panel = [] 9 | vgg_panel_nopanel = [] 10 | vgg_nopanel_panel = [] 11 | vgg_nopanel_nopanel = [] 12 | 13 | lr_panel_panel = [] 14 | lr_panel_nopanel = [] 15 | lr_nopanel_panel = [] 16 | lr_nopanel_nopanel = [] 17 | csv_path = './nosplit/test/vgg_predict-Copy2.csv' 18 | with open(csv_path, newline='') as csvfile: 19 | reader = csv.DictReader(csvfile) 20 | for row in reader: 21 | if ((row['label'] == '1') and (row['image'] not in image_panel)): 22 | image_panel.append(row['image']) 23 | if ((row['label'] == '0') and (row['image'] not in image_nopanel)): 24 | image_nopanel.append(row['image']) 25 | 26 | csvfile.close() 27 | print(len(image_panel),len(image_nopanel)) 28 | 29 | with open(csv_path, newline='') as csvfile: 30 | reader = csv.DictReader(csvfile) 31 | for row in reader: 32 | if ((row['prediction_class'] == '1') and (row['image'] not in vgg_panel_panel) and (row['label'] == '1')): 33 | vgg_panel_panel.append(row['image']) 34 | if ((row['prediction_class'] == '0') and (row['image'] not in vgg_panel_nopanel) and (row['label'] == '1')): 35 | vgg_panel_nopanel.append(row['image']) 36 | if ((row['prediction_class'] == '1') and (row['image'] not in vgg_nopanel_panel) and (row['label'] == '0')): 37 | vgg_nopanel_panel.append(row['image']) 38 | if ((row['prediction_class'] == '0') and (row['image'] not in vgg_nopanel_nopanel) and (row['label'] == '0')): 39 | vgg_nopanel_nopanel.append(row['image']) 40 | 41 | csvfile.close() 42 | print(len(vgg_panel_panel),len( vgg_panel_nopanel),len(vgg_nopanel_panel),len(vgg_nopanel_nopanel)) 43 | 44 | 45 | with open(csv_path, newline='') as csvfile: 46 | reader = csv.DictReader(csvfile) 47 | for row in reader: 48 | if ((row['lrpredict'] == '1') and (row['image'] not in lr_panel_panel) and (row['label'] == '1')): 49 | lr_panel_panel.append(row['image']) 50 | if ((row['lrpredict'] == '0') and (row['image'] not in lr_panel_nopanel) and (row['label'] == '1')): 51 | lr_panel_nopanel.append(row['image']) 52 | if ((row['lrpredict'] == '1') and (row['image'] not in lr_nopanel_panel) and (row['label'] == '0')): 53 | lr_nopanel_panel.append(row['image']) 54 | if ((row['lrpredict'] == '0') and (row['image'] not in lr_nopanel_nopanel) and (row['label'] == '0')): 55 | lr_nopanel_nopanel.append(row['image']) 56 | 57 | csvfile.close() 58 | print(len(lr_panel_panel),len( lr_panel_nopanel),len(lr_nopanel_panel),len(lr_nopanel_nopanel)) -------------------------------------------------------------------------------- /result_presentation/data_stastics/box_plot.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | import seaborn as sns 4 | import matplotlib.pyplot as plt 5 | 6 | # seaborn.boxplot API 7 | # https://seaborn.pydata.org/generated/seaborn.boxplot.html 8 | # Understanding Boxplots 9 | # https://towardsdatascience.com/understanding-boxplots-5e2df7bcbd51 10 | 11 | # col_names = ['id', 'image', 'size', 'pole', 'mean', 'stddev', 'square', 'ratiowh', 'ratioarea', 'approxlen', 'numangle', 'numangle90', 'numangle70', 'label'] 12 | col_names = ['id', 'location', 'image', 'size', 'pole', 'mean', 'stddev', 'b_mean', 'g_mean', 'r_mean', 'b_stddev', 'g_stddev', 'r_stddev', 'square', 'ratiowh', 'ratioarea', 'approxlen', 'numangle', 'numangle90', 'numangle70', 'label', 'vgg_pro', 'vgg_class'] 13 | 14 | data = pd.read_csv("./data/final/split/feature_17_all.csv", names=col_names) 15 | 16 | data = data.dropna() 17 | 18 | # print(data[:5]) 19 | # print(data.shape) 20 | 21 | <<<<<<< HEAD 22 | g_plot_outputDir = './solarpanel/output/final/split/boxplot/' 23 | ======= 24 | g_plot_outputDir = '' 25 | >>>>>>> 39db66de7b321f1d8347e674b5c8fa5f34ff3b62 26 | 27 | positive_sample_set = data[data['label'] == 1.0] 28 | negative_sample_set = data[data['label'] == 0.0] 29 | # random_sample_set = data[(data['label'] != 0.0) & (data['label'] != 1.0)] 30 | 31 | analysis_features = ['size', 'mean', 'stddev', 'b_mean', 'g_mean', 'r_mean', 'b_stddev', 'g_stddev', 'r_stddev', 'square', 'ratiowh', 'ratioarea', 'approxlen', 'numangle', 'numangle90', 'numangle70'] 32 | # analysis_features = ['size'] 33 | 34 | labels_to_draw = ['25%','75%'] 35 | 36 | def draw_label(plot, label_type): 37 | labels = [negative_sample_set_description[label_type], positive_sample_set_description[label_type]] 38 | labels_text = [str(np.round(s, 2)) for s in labels] 39 | 40 | pos = range(len(labels_text)) 41 | 42 | for tick,label in zip(pos, plot.get_xticklabels()): 43 | plot.text( 44 | pos[tick], 45 | labels[tick], 46 | labels_text[tick], 47 | ha='center', 48 | va='center', 49 | fontweight='bold', 50 | size=10, 51 | color='white', 52 | bbox=dict(facecolor='#445A64')) 53 | 54 | def draw_single_label(plot, pos, value): 55 | plot.text( 56 | pos, 57 | value, 58 | str(np.round(value, 2)), 59 | ha='center', 60 | va='center', 61 | fontweight='bold', 62 | size=20, 63 | color='white', 64 | bbox=dict(facecolor='#445A64')) 65 | 66 | def get_whiskers(feature_array): 67 | Q1, median, Q3 = np.percentile(np.asarray(feature_array), [25, 50, 75]) 68 | 69 | IQR = Q3 - Q1 70 | 71 | loval = Q1 - 1.5 * IQR 72 | hival = Q3 + 1.5 * IQR 73 | 74 | upper_wisk_set = np.compress(feature_array <= hival, feature_array) 75 | lower_wisk_set = np.compress(feature_array >= loval, feature_array) 76 | upper_wisk = np.max(upper_wisk_set) 77 | lower_wisk = np.min(lower_wisk_set) 78 | 79 | return [lower_wisk, upper_wisk] 80 | 81 | palette = sns.color_palette(["#e69138", "#3d85c6"]) 82 | 83 | for analysis_feature in analysis_features: 84 | 85 | positive_sample_set_description = positive_sample_set[analysis_feature].describe() 86 | print('positive_sample_set:') 87 | print(positive_sample_set_description) 88 | positive_whis = get_whiskers(positive_sample_set[analysis_feature]) 89 | print(positive_whis[0]) 90 | print(positive_whis[1]) 91 | 92 | negative_sample_set_description = negative_sample_set[analysis_feature].describe() 93 | print('negative_sample_set:') 94 | print(negative_sample_set_description) 95 | negative_whis = get_whiskers(negative_sample_set[analysis_feature]) 96 | print(negative_whis[0]) 97 | print(negative_whis[1]) 98 | 99 | sns.set(font_scale = 2) 100 | 101 | # Generate boxplot 102 | sns_boxplot = sns.boxplot(x='label', y=analysis_feature, data=data, showfliers=False, palette=palette) 103 | # sns_boxplot = sns.boxplot(x='label', y=analysis_feature, data=data) 104 | 105 | for l in labels_to_draw: 106 | draw_single_label(sns_boxplot, 1, positive_sample_set_description[l]) 107 | draw_single_label(sns_boxplot, 0, negative_sample_set_description[l]) 108 | 109 | for l in positive_whis: 110 | draw_single_label(sns_boxplot, 1, l) 111 | 112 | for l in negative_whis: 113 | draw_single_label(sns_boxplot, 0, l) 114 | 115 | sns_boxplot.set_title(analysis_feature+'_distribution_boxplot') 116 | 117 | fig = sns_boxplot.get_figure() 118 | fig.savefig(g_plot_outputDir + analysis_feature + '_boxplot.png') 119 | plt.show() 120 | -------------------------------------------------------------------------------- /result_presentation/data_stastics/distribution_plot.py: -------------------------------------------------------------------------------- 1 | import math 2 | import numpy as np 3 | import pandas as pd 4 | from pandas.plotting import scatter_matrix 5 | import seaborn as sns 6 | import matplotlib.pyplot as plt 7 | 8 | # seaborn.distplot API 9 | # http://seaborn.pydata.org/generated/seaborn.distplot.html 10 | 11 | col_names = ['id', 'image', 'size', 'pole', 'mean', 'stddev', 'square', 'ratiowh', 'ratioarea', 'approxlen', 'numangle', 'numangle90', 'numangle70', 'label'] 12 | 13 | data = pd.read_csv("./data/Training_set/location_1_7_all.csv", 14 | names=col_names) 15 | data = data.dropna() 16 | 17 | # print(data[:5]) 18 | # print(data.shape) 19 | 20 | analysis_features = ['size', 'mean', 'stddev', 'square', 'ratiowh', 'ratioarea', 'approxlen', 'numangle', 'numangle90', 'numangle70'] 21 | 22 | g_plot_outputDir = './solarpanel/output/location1-7/distributions/' 23 | 24 | positive_sample_set = data[data['label'] == 1.0] 25 | negative_sample_set = data[data['label'] == 0.0] 26 | 27 | for analysis_feature in analysis_features: 28 | 29 | N = max(data[analysis_feature]) 30 | binsize = np.arange(0,N+1,math.ceil(N/100)) 31 | if analysis_feature == 'square' or analysis_feature == 'ratiowh' or analysis_feature == 'ratioarea': 32 | binsize = None 33 | 34 | distplot_labels=['ALL', 'positive_sample_set', 'negative_sample_set'] 35 | 36 | distplot_ked = False 37 | # Generate distplot 38 | # sns_distplot = sns.distplot(data[analysis_feature], kde=distplot_ked, label=distplot_labels[0], bins=binsize); 39 | sns_distplot = sns.distplot(positive_sample_set[analysis_feature], kde=distplot_ked, label=distplot_labels[1], bins=binsize) 40 | sns_distplot = sns.distplot(negative_sample_set[analysis_feature], kde=distplot_ked, label=distplot_labels[2], bins=binsize) 41 | sns_distplot.legend() 42 | 43 | sns_distplot.set_title(analysis_feature+'_distribution', fontsize=30) 44 | fig = sns_distplot.get_figure() 45 | fig.savefig(g_plot_outputDir + analysis_feature + '.png') 46 | plt.show() 47 | 48 | ''' 49 | # Generate distplot for positive_sample_set 50 | sns_distplot = sns.distplot(positive_sample_set[analysis_feature], kde=distplot_ked)#, bins=binsize) 51 | 52 | sns_distplot.set_title(analysis_feature+'_positive_set_distribution') 53 | fig = sns_distplot.get_figure() 54 | fig.savefig(g_plot_outputDir + analysis_feature + '_positive_set_distribution.png') 55 | plt.show() 56 | ''' 57 | 58 | # pd_hist = data.groupby('label')[analysis_feature].hist(alpha=0.4) 59 | # pd_hist = positive_sample_set.hist(column=analysis_features) 60 | # pd_hist = negative_sample_set.hist(column=analysis_features) 61 | 62 | # axis=0 for index, axis=1 for column 63 | # features_only_data = data.drop(['id', 'image'], axis=1) 64 | 65 | # sns_pairplot = sns.pairplot(features_only_data, diag_kind='kde') 66 | 67 | # sns_pairplot.savefig(g_plot_outputDir + 'scatter' + '.png') 68 | -------------------------------------------------------------------------------- /result_presentation/data_stastics/scatter_grid.py: -------------------------------------------------------------------------------- 1 | import math 2 | import numpy as np 3 | import pandas as pd 4 | from pandas.plotting import scatter_matrix 5 | import seaborn as sns 6 | import matplotlib.pyplot as plt 7 | 8 | # seaborn.PairGrid API 9 | # https://seaborn.pydata.org/generated/seaborn.PairGrid.html#seaborn.PairGrid 10 | 11 | col_names = ['id', 'image', 'size', 'pole', 'mean', 'stddev', 'square', 'ratiowh', 'ratioarea', 'approxlen', 'numangle', 'numangle90', 'numangle70', 'label'] 12 | 13 | data = pd.read_csv("./solarpanel/data/Training_set/location_1_7_all.csv", 14 | names=col_names) 15 | data = data.dropna() 16 | 17 | g_plot_outputDir = './solarpanel/output/location1-7/scatter/' 18 | 19 | analysis_features = ['size', 'mean', 'stddev', 'square', 'ratiowh', 'ratioarea', 'approxlen', 'numangle', 'numangle90', 'numangle70'] 20 | # analysis_features = ['size', 'mean'] 21 | 22 | palette = sns.color_palette(["#e69138", "#3d85c6"]) 23 | 24 | # sns.set(font_scale = 1.5) 25 | sns.set_context(rc={'axes.labelsize': 25.0, 'xtick.labelsize': 'small', 'ytick.labelsize': 'small', 'axes.linewidth': 0, 'ytick.major.size': 0, 'xtick.major.size': 0}) 26 | # print(sns.plotting_context()) 27 | 28 | sns_pairplot = sns.PairGrid(data, vars=analysis_features, 29 | hue='label', hue_kws={"marker": ["o", "s"]}, palette=palette) 30 | sns_pairplot = sns_pairplot.map(plt.scatter, linewidths=1, edgecolor="w", s=40) 31 | # sns_pairplot = sns_pairplot.add_legend() 32 | 33 | plt.subplots_adjust(hspace = 0.01, wspace = 0.01) 34 | sns_pairplot.savefig(g_plot_outputDir + 'scatter_grid' + '.png') 35 | plt.show() -------------------------------------------------------------------------------- /result_presentation/data_stastics/scatter_plot.py: -------------------------------------------------------------------------------- 1 | import math 2 | import numpy as np 3 | import pandas as pd 4 | from pandas.plotting import scatter_matrix 5 | import seaborn as sns 6 | import matplotlib.pyplot as plt 7 | 8 | # seaborn.pairplot API 9 | # https://seaborn.pydata.org/generated/seaborn.pairplot.html 10 | 11 | col_names = ['id', 'image', 'size', 'pole', 'mean', 'stddev', 'square', 'ratiowh', 'ratioarea', 'approxlen', 'numangle', 'numangle90', 'numangle70', 'label'] 12 | 13 | data = pd.read_csv("./solarpanel/data/Training_set/location_1_7_all.csv", 14 | names=col_names) 15 | data = data.dropna() 16 | 17 | g_plot_outputDir = './solarpanel/output/location1-7/scatter/' 18 | 19 | analysis_features = ['size', 'mean', 'stddev', 'square', 'ratiowh', 'ratioarea', 'approxlen', 'numangle', 'numangle90', 'numangle70'] 20 | # analysis_features = ['size', 'mean'] 21 | 22 | palette = sns.color_palette(["#e69138", "#3d85c6"]) 23 | 24 | sns.set(font_scale = 1.5) 25 | 26 | sns_pairplot = sns.pairplot(data, vars=analysis_features, 27 | hue='label', markers=["o", "s"], palette=palette, 28 | diag_kind='kde') 29 | 30 | sns_pairplot.savefig(g_plot_outputDir + 'scatter_plot' + '.png') 31 | 32 | plt.show() 33 | -------------------------------------------------------------------------------- /result_presentation/data_stastics/violin_plot.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | import seaborn as sns 4 | import matplotlib.pyplot as plt 5 | 6 | # seaborn.violinplot API 7 | # https://seaborn.pydata.org/generated/seaborn.violinplot.html 8 | # col_names = ['id', 'image', 'size', 'pole', 'mean', 'stddev', 'square', 'ratiowh', 'ratioarea', 'approxlen', 'numangle', 'numangle90', 'numangle70', 'label'] 9 | # col_names = ['id', 'location', 'image', 'size', 'pole', 'mean', 'stddev', 'b_mean', 'g_mean', 'r_mean', 'b_stddev', 'g_stddev', 'r_stddev', 'square', 'ratiowh', 'ratioarea', 'approxlen', 'numangle', 'numangle90', 'numangle70', 'label', 'vgg_pro', 'vgg_class'] 10 | # col_names = ['id', 'location', 'image', 'size', 'pole', 'gray mean', 'gray standard deviation', 'blue mean', 'green mean', 'red mean', 'blue standard deviation', 'green standard deviation', 'red standard deviation', 'square similarity', 'width height ratio', 'area ratio', 'number of curves', 'number of corners', 'number of corners less 90', 'number of corners less 70', 'label', 'vgg_pro', 'vgg_class'] 11 | col_names = ['id', 'location', 'image', 'size', 'pole', 'gray_mean', 'gray_std_deviation', 'blue_mean', 'green_mean', 'red_mean', 'blue_std_deviation', 'green_std_deviation', 'red_std_deviation', 'square_similarity', 'width_height_ratio', 'area_ratio', 'number_of_curves', 'number_of_corners', 'corners_less_90', 'corners_less_70', 'label', 'vgg_pro', 'vgg_class'] 12 | 13 | data = pd.read_csv("./data/final/split/feature_17_all.csv", names=col_names) 14 | 15 | data = data.dropna() 16 | 17 | # print(data[:5]) 18 | # print(data.shape) 19 | 20 | g_plot_outputDir = './output/location1-7/violinplot/' 21 | 22 | positive_sample_set = data[data['label'] == 1.0] 23 | negative_sample_set = data[data['label'] == 0.0] 24 | # random_sample_set = data[(data['label'] != 0.0) & (data['label'] != 1.0)] 25 | 26 | 27 | analysis_features = ['size', 'gray_mean', 'gray_std_deviation', 'blue_mean', 'green_mean', 'red_mean', 'blue_std_deviation', 'green_std_deviation', 'red_std_deviation', 'square_similarity', 'width_height_ratio', 'area_ratio', 'number_of_curves', 'number_of_corners', 'corners_less_90', 'corners_less_70'] 28 | # analysis_features = ['mean'] 29 | 30 | 31 | labels_to_draw = ['25%','75%'] 32 | 33 | def draw_single_label(plot, pos, value): 34 | plot.text( 35 | pos, 36 | value, 37 | str(np.round(value, 2)), 38 | ha='center', 39 | va='center', 40 | fontweight='bold', 41 | size=30, 42 | color='white', 43 | bbox=dict(facecolor='#445A64') 44 | ) 45 | 46 | def get_whiskers(feature_array): 47 | Q1, median, Q3 = np.percentile(np.asarray(feature_array), [25, 50, 75]) 48 | 49 | IQR = Q3 - Q1 50 | 51 | loval = Q1 - 1.5 * IQR 52 | hival = Q3 + 1.5 * IQR 53 | 54 | upper_wisk_set = np.compress(feature_array <= hival, feature_array) 55 | lower_wisk_set = np.compress(feature_array >= loval, feature_array) 56 | upper_wisk = np.max(upper_wisk_set) 57 | lower_wisk = np.min(lower_wisk_set) 58 | 59 | return [lower_wisk, upper_wisk] 60 | 61 | palette = sns.color_palette(["#e69138", "#3d85c6"]) 62 | 63 | for analysis_feature in analysis_features: 64 | 65 | data_whis = get_whiskers(data[analysis_feature]) 66 | 67 | positive_sample_set_description = positive_sample_set[analysis_feature].describe() 68 | print('positive_sample_set:') 69 | print(positive_sample_set_description) 70 | positive_whis = get_whiskers(positive_sample_set[analysis_feature]) 71 | print(positive_whis[0]) 72 | print(positive_whis[1]) 73 | 74 | negative_sample_set_description = negative_sample_set[analysis_feature].describe() 75 | print('negative_sample_set:') 76 | print(negative_sample_set_description) 77 | negative_whis = get_whiskers(negative_sample_set[analysis_feature]) 78 | print(negative_whis[0]) 79 | print(negative_whis[1]) 80 | 81 | data_to_show = data.loc[(data[analysis_feature] > data_whis[0]) & (data[analysis_feature] < data_whis[1])] 82 | 83 | # Generate boxplot 84 | # sns.set(font_scale = font_scale_value) 85 | # sns.set_context(rc={'xtick.major.size': 6.0, 'ytick.minor.size': 4.0, 'legend.fontsize': 22.0, 'ytick.major.width': 1.25, 'axes.labelsize': 24.0, 'ytick.minor.width': 1.0, 'xtick.minor.width': 1.0, 'font.size': 24.0, 'grid.linewidth': 1.0, 'axes.titlesize': 24.0, 'axes.linewidth': 1.25, 'patch.linewidth': 1.0, 'ytick.labelsize': 22.0, 'xtick.labelsize': 10.0, 'lines.linewidth': 1.5, 'ytick.major.size': 6.0, 'lines.markersize': 6.0, 'xtick.major.width': 1.25, 'xtick.minor.size': 4.0}) 86 | # sns.set_context(rc={'axes.titlesize': 'large', 'grid.linewidth': 0.8, 'lines.markersize': 6.0, 'xtick.major.size': 3.5, 'xtick.major.width': 0.8, 'ytick.major.size': 3.5, 'ytick.minor.width': 0.6, 'axes.linewidth': 0.8, 'xtick.labelsize': 'medium', 'patch.linewidth': 1.0, 'ytick.labelsize': 'medium', 'xtick.minor.size': 2.0, 'font.size': 10.0, 'legend.fontsize': 'medium', 'lines.linewidth': 1.5, 'ytick.minor.size': 2.0, 'xtick.minor.width': 0.6, 'axes.labelsize': 'medium', 'ytick.major.width': 0.8}) 87 | sns.set(rc={'figure.figsize':(10, 6)}) 88 | sns.set_context(rc={'axes.titlesize': 22.0, 'axes.labelsize': 50.0, 'xtick.labelsize': 'small', 'ytick.labelsize': 'small'}) 89 | # print(sns.plotting_context()) 90 | 91 | sns_violinplot = sns.violinplot(x='label', y=analysis_feature, data=data_to_show, showfliers=False, split=False, palette=palette) 92 | # sns_boxplot = sns.boxplot(x='label', y=analysis_feature, data=data) 93 | sns.despine(offset=10, trim=True); 94 | for l in labels_to_draw: 95 | draw_single_label(sns_violinplot, 1, positive_sample_set_description[l]) 96 | draw_single_label(sns_violinplot, 0, negative_sample_set_description[l]) 97 | 98 | for l in positive_whis: 99 | draw_single_label(sns_violinplot, 1, l) 100 | 101 | for l in negative_whis: 102 | draw_single_label(sns_violinplot, 0, l) 103 | 104 | # sns_violinplot.set_title(analysis_feature) 105 | 106 | # ADDED: Extract axes. 107 | sns_violinplot.set_xlabel('') 108 | 109 | fig = sns_violinplot.get_figure() 110 | fig.savefig(g_plot_outputDir + analysis_feature + '_violinplot.png') 111 | 112 | plt.show() 113 | # break 114 | -------------------------------------------------------------------------------- /result_presentation/draw_pca.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from sklearn.decomposition import PCA 3 | from sklearn.preprocessing import MinMaxScaler 4 | import matplotlib.pyplot as plt 5 | %matplotlib inline 6 | filepath = './feature_test_all.csv' #your path here 7 | data = np.genfromtxt(filepath, delimiter=',', dtype='float64') 8 | 9 | scaler = MinMaxScaler(feature_range=[0, 1]) 10 | data_rescaled = scaler.fit_transform(data[1:, 3:19]) 11 | #Fitting the PCA algorithm with our Data 12 | pca = PCA().fit(data_rescaled) 13 | #Plotting the Cumulative Summation of the Explained Variance 14 | plt.figure() 15 | plt.plot(np.cumsum(pca.explained_variance_ratio_), linewidth ='3') 16 | plt.xlabel('Number of Components',{'size': 14}) 17 | plt.ylabel('Variance',{'size': 14}) #for each component 18 | # plt.title('Pulsar Dataset Explained Variance') 19 | plt.tight_layout() 20 | plt.savefig('./finaltest/data/pca.png') 21 | plt.show() -------------------------------------------------------------------------------- /result_presentation/draw_roc.py: -------------------------------------------------------------------------------- 1 | import pandas 2 | import pandas as pd 3 | import pickle 4 | from sklearn.linear_model import LogisticRegression 5 | from sklearn import metrics 6 | from sklearn import datasets 7 | from sklearn.preprocessing import StandardScaler 8 | import numpy as np 9 | from sklearn.metrics import classification_report, confusion_matrix 10 | import csv 11 | import time 12 | start_time = time.time() 13 | %matplotlib inline 14 | 15 | 16 | 17 | <<<<<<< HEAD 18 | data = pd.read_csv("./output/svmrbftrainprobility.csv") 19 | ======= 20 | data = pd.read_csv("") 21 | >>>>>>> 39db66de7b321f1d8347e674b5c8fa5f34ff3b62 22 | data = data.dropna() 23 | # feature_cols = ['vgg_pro','vgg_class','svmrbf_class','svmrbfpro'] 24 | feature_cols = ['vgg_pro','svmrbfpro'] 25 | X = data[feature_cols] 26 | 27 | scaler = StandardScaler() 28 | X = scaler.fit_transform(X)# Features 29 | 30 | y = data.label # Target variable 31 | 32 | # from sklearn.model_selection import train_test_split 33 | # X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.3,random_state=0) 34 | X_train = X 35 | y_train = y 36 | 37 | # from sklearn.svm import SVC 38 | # svclassifier = SVC(kernel='rbf',class_weight='balanced') 39 | # model = svclassifier.fit(X_train, y_train) 40 | 41 | 42 | # use linear regression 43 | from sklearn.linear_model import LogisticRegression 44 | model = LogisticRegression(class_weight = 'balanced') 45 | 46 | # instantiate the model (using the default parameters) 47 | 48 | # fit the model with data 49 | model.fit(X_train, y_train) 50 | # from sklearn.externals import joblib 51 | # from joblib import dump, load 52 | # dump(model, 'svmrbfhybrid.joblib') 53 | # model = load('svmrbfhybrid.joblib') 54 | print(model.coef_ ) 55 | print(model.intercept_ ) 56 | from sklearn import metrics 57 | 58 | 59 | 60 | 61 | <<<<<<< HEAD 62 | datatest = pd.read_csv("./split/output/svmrbftestpro.csv") 63 | ======= 64 | datatest = pd.read_csv("") 65 | >>>>>>> 39db66de7b321f1d8347e674b5c8fa5f34ff3b62 66 | datatest = datatest.dropna() 67 | # feature_cols = ['vgg_pro','vgg_class','svmrbf_class','svmrbfpro'] 68 | feature_cols = ['vgg_pro','svmrbfpro'] 69 | Xtest = datatest[feature_cols] 70 | scaler = StandardScaler() 71 | Xtest = scaler.fit_transform(Xtest)# Features 72 | ytest = datatest.label # Target variable 73 | y_predict_vgg = datatest.vgg_pro 74 | y_predict_svm = datatest.svmrbfpro 75 | 76 | 77 | 78 | y_predict= model.predict(Xtest) 79 | y_predict_pro = model.predict_proba(Xtest) 80 | y_predict_pro = y_predict_pro[:, 1] 81 | 82 | 83 | 84 | df = pd.DataFrame(datatest) 85 | df.insert(25, "svm_nosplit_pro", y_predict_pro, True) 86 | df.insert(26, "svm_nosplit_class", y_predict, True) 87 | 88 | <<<<<<< HEAD 89 | export_csv = df.to_csv ('./vggsvmlogicalregression2features.csv', index = None) 90 | print(confusion_matrix(ytest, y_predict)) 91 | tn, fp, fn, tp = confusion_matrix(ytest, y_predict, labels=[0,1]).ravel() 92 | print(tn,fp,fn,tp) 93 | with open('./split/result.csv', 'a') as csvfile: 94 | ======= 95 | export_csv = df.to_csv ('', index = None) 96 | print(confusion_matrix(ytest, y_predict)) 97 | tn, fp, fn, tp = confusion_matrix(ytest, y_predict, labels=[0,1]).ravel() 98 | print(tn,fp,fn,tp) 99 | with open('', 'a') as csvfile: 100 | >>>>>>> 39db66de7b321f1d8347e674b5c8fa5f34ff3b62 101 | writer = csv.writer(csvfile) 102 | writer.writerow(['',tn,fp,fn,tp]) 103 | csvfile.close() 104 | time = time.time() - start_time 105 | <<<<<<< HEAD 106 | with open('./split/time.csv', 'a') as csvfile: 107 | ======= 108 | with open('', 'a') as csvfile: 109 | >>>>>>> 39db66de7b321f1d8347e674b5c8fa5f34ff3b62 110 | writer = csv.writer(csvfile) 111 | writer.writerow(['',time]) 112 | csvfile.close() 113 | 114 | 115 | 116 | from sklearn.metrics import classification_report, confusion_matrix 117 | from sklearn.metrics import accuracy_score 118 | from sklearn.metrics import cohen_kappa_score 119 | from sklearn import metrics 120 | from sklearn.metrics import precision_recall_curve 121 | from sklearn.metrics import average_precision_score 122 | from sklearn.metrics import matthews_corrcoef 123 | from sklearn.metrics import roc_auc_score 124 | from sklearn.metrics import balanced_accuracy_score 125 | from sklearn.metrics import roc_curve 126 | from matplotlib import pyplot 127 | print(confusion_matrix(ytest, y_predict)) 128 | print(classification_report(ytest, y_predict)) 129 | print(accuracy_score(ytest, y_predict)) 130 | print(balanced_accuracy_score(ytest, y_predict)) 131 | print(metrics.precision_score(ytest, y_predict)) 132 | print(metrics.recall_score(ytest, y_predict)) 133 | print(metrics.f1_score(ytest, y_predict)) 134 | print(matthews_corrcoef(ytest, y_predict)) 135 | print(roc_auc_score(ytest, y_predict)) 136 | print(roc_auc_score(ytest, y_predict_vgg )) 137 | print(roc_auc_score(ytest, y_predict)) 138 | lr_fpr, lr_tpr, _ = roc_curve(ytest, y_predict_pro) 139 | lr_fpr_vgg, lr_tpr_vgg, _ = roc_curve(ytest, y_predict_vgg ) 140 | lr_fpr_svm, lr_tpr_svm, _ = roc_curve(ytest, y_predict_svm) 141 | 142 | # pyplot.plot(lr_fpr, lr_tpr, marker='x', label='Logistic',linewidth=2,linestyle='dashed') 143 | # pyplot.plot(lr_fpr_vgg, lr_tpr_vgg, marker='o', label='vgg') 144 | # pyplot.plot(lr_fpr_svm, lr_tpr_svm, marker='v', label='svm kernel=rbf') 145 | 146 | pyplot.plot(lr_fpr, lr_tpr, label='SolarFinder', linewidth=3, linestyle='-',color='green') 147 | pyplot.plot(lr_fpr_vgg, lr_tpr_vgg, label='Pure CNN', linewidth=3, linestyle=':',color='red') 148 | pyplot.plot(lr_fpr_svm, lr_tpr_svm, label='Pure SVM', linewidth=3, linestyle='--',color='orange') 149 | 150 | pyplot.xlabel('False Positive Rate',{'size': 14}) 151 | pyplot.ylabel('True Positive Rate',{'size': 14}) 152 | # show the legend 153 | pyplot.legend() 154 | pyplot.tight_layout() 155 | <<<<<<< HEAD 156 | pyplot.savefig('./finaltest/data/split_roc.png') 157 | ======= 158 | pyplot.savefig('') 159 | >>>>>>> 39db66de7b321f1d8347e674b5c8fa5f34ff3b62 160 | # show the plot 161 | pyplot.show() 162 | 163 | 164 | 165 | 166 | 167 | 168 | 169 | 170 | 171 | 172 | -------------------------------------------------------------------------------- /result_presentation/feature_statistics.py: -------------------------------------------------------------------------------- 1 | # %matplotlib inline 2 | import numpy as np 3 | import pandas as pd 4 | from scipy import stats, integrate 5 | import matplotlib.pyplot as plt 6 | import seaborn as sns 7 | sns.set(color_codes=True) 8 | 9 | <<<<<<< HEAD 10 | # col_names = ['id','image','size','pole','mean','square','ratiowh','ratioarea','approxlen','numangle','numangle90','numangle70','label'] 11 | # # load dataset 12 | # data = pd.read_csv("./mean.csv", names=col_names) 13 | # data = data.dropna() 14 | 15 | # df = pd.DataFrame(data, columns=["size",'label']) 16 | # sns.jointplot(x="size", y="label", data=df) 17 | # plt.savefig("out.png") 18 | # mean, cov = [0, 1], [(1, .5), (.5, 1)] 19 | # data = np.random.multivariate_normal(mean, cov, 200) 20 | # df = pd.DataFrame(data, columns=["x", "y"]) 21 | # sns.barplot(x="x", y="y", data=df); 22 | 23 | import seaborn as sns 24 | sns.set(style="darkgrid") 25 | titanic = pd.read_csv("./mean.csv") 26 | ======= 27 | import seaborn as sns 28 | sns.set(style="darkgrid") 29 | titanic = pd.read_csv("") 30 | >>>>>>> 39db66de7b321f1d8347e674b5c8fa5f34ff3b62 31 | ax = sns.countplot(x="mean", data=titanic) -------------------------------------------------------------------------------- /result_presentation/kmeans_draw.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import numpy as np 3 | import matplotlib.pyplot as plt 4 | from matplotlib.image import imread 5 | import pandas as pd 6 | import seaborn as sns 7 | import math 8 | from sklearn.datasets.samples_generator import (make_blobs, 9 | make_circles, 10 | make_moons) 11 | from sklearn.cluster import KMeans, SpectralClustering 12 | from sklearn.preprocessing import StandardScaler 13 | from sklearn.metrics import silhouette_samples, silhouette_score 14 | import matplotlib.pyplot as plt 15 | from matplotlib import style 16 | from sklearn.cluster import KMeans 17 | from sklearn.datasets.samples_generator import make_blobs 18 | 19 | %matplotlib inline 20 | import numpy as np 21 | 22 | 23 | 24 | 25 | <<<<<<< HEAD 26 | img = imread('./finaltest/data/roof_images/28.png') 27 | ======= 28 | img = imread('') 29 | >>>>>>> 39db66de7b321f1d8347e674b5c8fa5f34ff3b62 30 | img_size = img.shape 31 | 32 | print(img_size) 33 | # Reshape it to be 2-dimension 34 | X = img.reshape(img_size[0] * img_size[1], img_size[2]) 35 | print(X.shape) 36 | 37 | cost =[] 38 | for i in range(1, 11): 39 | KM = KMeans(n_clusters = i, max_iter = 100) 40 | KM.fit(X) 41 | 42 | # calculates squared error 43 | # for the clustered points 44 | cost.append(KM.inertia_) 45 | 46 | # plot the cost against K values 47 | plt.plot(range(1, 11), cost, color ='g', linewidth ='3') 48 | # plt.rcParams.update({'font.size': 22}) 49 | plt.xlabel("Value of K", {'size': 14}) 50 | plt.ylabel("Sqaured Error (Cost)", {'size': 14}) 51 | plt.tight_layout() 52 | <<<<<<< HEAD 53 | plt.savefig("./data/roof_images/square_error28.png") 54 | ======= 55 | plt.savefig("") 56 | >>>>>>> 39db66de7b321f1d8347e674b5c8fa5f34ff3b62 57 | plt.show() # clear the plot -------------------------------------------------------------------------------- /tools/solar_labeller/input_data_preparation.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import csv 3 | 4 | def main(argv): 5 | # print ('Number of arguments:', len(argv), 'arguments.') 6 | # print ('Argument List:', str(argv)) 7 | 8 | output_dir = '/Users/Aaron/projects/solarpanel/data/rooftop_with_solar_array/' 9 | output_csv_path = output_dir + 'rooftop_positive_dataset.csv' 10 | output_csv_header = ['id', 'location', 'location_id', 'label'] 11 | 12 | with open(output_csv_path, 'a') as output_csv_file: 13 | writer = csv.DictWriter(output_csv_file, fieldnames=output_csv_header) 14 | writer.writeheader() 15 | output_csv_file.close() 16 | 17 | origin_rooftop_csv_dir = '/Users/Aaron/projects/solarpanel/data/origin_rooftop_csv/' 18 | 19 | for location_id in range(1,11): 20 | current_origin_rooftop_csv_path = origin_rooftop_csv_dir + 'house' + str(location_id) + '.csv' 21 | print(current_origin_rooftop_csv_path) 22 | 23 | with open(current_origin_rooftop_csv_path, newline='') as current_origin_rooftop_csv_file: 24 | reader = csv.DictReader(current_origin_rooftop_csv_file) 25 | for row in reader: 26 | if row['label'] == '0': 27 | continue 28 | output_row = {} 29 | output_row['id'] = row['id'] 30 | output_row['location'] = row['location'] 31 | output_row['location_id'] = str(location_id) 32 | output_row['label'] = row['label'] 33 | 34 | with open(output_csv_path, 'a') as output_csv_file: 35 | writer = csv.writer(output_csv_file) 36 | writer.writerow([output_row['id'], output_row['location'], output_row['location_id'], output_row['label']]) 37 | output_csv_file.close() 38 | 39 | current_origin_rooftop_csv_file.close() 40 | 41 | if __name__ == "__main__": 42 | main(sys.argv[1:]) 43 | -------------------------------------------------------------------------------- /tools/solar_labeller/requirements.txt: -------------------------------------------------------------------------------- 1 | astroid==2.3.3 2 | isort==4.3.21 3 | lazy-object-proxy==1.4.3 4 | mccabe==0.6.1 5 | numpy==1.18.1 6 | opencv-python==4.1.2.30 7 | pylint==2.4.4 8 | six==1.14.0 9 | wrapt==1.11.2 10 | -------------------------------------------------------------------------------- /tools/solar_labeller/solar_marker.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import cv2 3 | import sys 4 | import csv 5 | 6 | import os.path as path 7 | 8 | outline_list = [] 9 | current_outline = [] 10 | current_row = {} 11 | 12 | def save_solar_polygon(solar_polygon): 13 | global output_csv_path 14 | global current_row 15 | print('polygon to save: ' + str(solar_polygon)) 16 | with open(output_csv_path, 'a') as output_csv_file: 17 | output_row = {} 18 | # roof_id, outline_nodes, parameters, groundtruth, solar_outline 19 | output_row['roof_id'] = current_row['roof_id'] 20 | output_row['outline_nodes'] = current_row['outline_nodes'] 21 | output_row['parameters'] = current_row['parameters'] 22 | output_row['groundtruth'] = current_row['groundtruth'] 23 | output_row['solar_outline'] = solar_polygon 24 | 25 | writer = csv.writer(output_csv_file) 26 | writer.writerow([output_row['roof_id'], output_row['outline_nodes'], output_row['parameters'], output_row['groundtruth'], output_row['solar_outline']]) 27 | output_csv_file.close() 28 | print('saved') 29 | 30 | def hotkey(): 31 | global tile_id 32 | global outline_list 33 | global current_outline 34 | global current_row 35 | global total_labeled_counter 36 | 37 | KEY_TRUE = ord('t') 38 | KEY_FALSE = ord('f') 39 | KEY_UNDO = ord('u') 40 | KEY_CLEAN = ord('c') 41 | KEY_NEXT = ord('n') 42 | KEY_SAVE = ord('s') 43 | KEY_QUIT = ord('q') 44 | KEY_INFO = ord('i') 45 | 46 | key = cv2.waitKey(0) 47 | if key == KEY_TRUE: 48 | current_row['groundtruth'] = True 49 | print('*** TRUE: has solar array') 50 | hotkey() 51 | elif key == KEY_FALSE: 52 | current_row['groundtruth'] = False 53 | print('*** FALSE: has NO solar array') 54 | hotkey() 55 | elif key == KEY_UNDO: 56 | print('*** Undo') 57 | if len(current_outline) >= 1: 58 | del current_outline[-1] 59 | print(current_outline) 60 | hotkey() 61 | elif key == KEY_CLEAN: 62 | print('*** Clean') 63 | current_outline = [] 64 | print(current_outline) 65 | hotkey() 66 | elif key == KEY_NEXT: 67 | if len(current_outline) > 0: 68 | outline_list.append(current_outline) 69 | print(outline_list) 70 | print('*** Mark next outline') 71 | current_outline = [] 72 | print(current_outline) 73 | hotkey() 74 | elif key == KEY_SAVE: 75 | print('*** Save') 76 | total_labeled_counter += 1 77 | if len(current_outline) > 0: 78 | outline_list.append(current_outline) 79 | current_outline = [] 80 | # if len(outline_list) <= 0: 81 | # outline_list 82 | if len(outline_list) > 0: 83 | current_row['groundtruth'] = True 84 | save_solar_polygon(outline_list) 85 | cv2.destroyAllWindows() 86 | elif key == KEY_QUIT: 87 | print(f'Last unfinished tile: {tile_id}') 88 | print(f'You have totally labeled {total_labeled_counter} roofs this time.') 89 | print('*** Quit solar marker') 90 | exit() 91 | elif key == KEY_INFO: 92 | print('*** Current Rooftop Info:') 93 | print(current_row) 94 | hotkey() 95 | else: 96 | print('*** Undefined key') 97 | hotkey() 98 | 99 | def onMouse(event, x, y, flags, param): 100 | global current_outline 101 | if event == cv2.EVENT_LBUTTONDOWN: 102 | click_point = (x, y) 103 | current_outline.append(click_point) 104 | print('*** Add point: ' + str(click_point)) 105 | print(current_outline) 106 | 107 | def main(argv): 108 | # print ('Number of arguments:', len(argv), 'arguments.') 109 | 110 | global outline_list 111 | global current_outline 112 | global output_csv_path 113 | global current_row 114 | global tile_id 115 | global total_labeled_counter 116 | 117 | tile_id = str(argv[0]) 118 | 119 | if not(int(tile_id) >= 0 and int(tile_id) < 6400): 120 | print('Error: Tile id is out of range.') 121 | exit() 122 | 123 | workspace_path = '' 124 | 125 | mass_osm_path = f'{workspace_path}data/Mass_osm_information.csv' 126 | 127 | mass_osm_reader = None 128 | 129 | total_labeled_counter = 0 130 | 131 | with open(mass_osm_path, newline='') as mass_osm_file: 132 | mass_osm_reader = csv.DictReader(mass_osm_file) 133 | 134 | for row in mass_osm_reader: 135 | number_of_roof = int(row['number_roof']) 136 | 137 | current_mass_osm_tile_id = row['id'] 138 | 139 | if int(current_mass_osm_tile_id) < int(tile_id) or number_of_roof <= 0: 140 | continue 141 | else: 142 | tile_id = current_mass_osm_tile_id 143 | 144 | 145 | print(f'*** Start Labelling tile: {str(tile_id)} ***') 146 | 147 | 148 | output_csv_path = f'{workspace_path}output/{tile_id}_groundtruth.csv' 149 | 150 | rooftop_img_dir = f'{workspace_path}data/rooftop/{tile_id}/' 151 | 152 | rooftop_csv_dir = f'{workspace_path}data/osm_parsed/' 153 | rooftop_csv_path = f'{rooftop_csv_dir}{tile_id}.csv' 154 | 155 | if not path.exists(rooftop_csv_path): 156 | # print(rooftop_csv_path) 157 | print(f'Tile {tile_id} csv file does not exist.') 158 | exit() 159 | 160 | last_row_of_output = None 161 | 162 | continued_work = False 163 | skipped = False 164 | 165 | if path.exists(output_csv_path): 166 | continued_work = True 167 | 168 | # Get las row of output csv file 169 | if continued_work: 170 | with open(output_csv_path, newline='') as output_csv_file: 171 | reader = csv.DictReader(output_csv_file) 172 | for row in reader: 173 | last_row_of_output = row['roof_id'] 174 | output_csv_file.close() 175 | print('This is a continued work.') 176 | 177 | if not continued_work: 178 | output_csv_header = ['roof_id', 'outline_nodes', 'parameters', 'groundtruth', 'solar_outline'] 179 | with open(output_csv_path, 'a') as output_csv_file: 180 | writer = csv.DictWriter(output_csv_file, fieldnames=output_csv_header) 181 | writer.writeheader() 182 | output_csv_file.close() 183 | 184 | print(f'Start labelling tile: {rooftop_csv_path}.') 185 | roof_counter_of_current_tile = 0 186 | with open(rooftop_csv_path, newline='') as rooftop_csv_file: 187 | reader = csv.DictReader(rooftop_csv_file) 188 | 189 | for row in reader: 190 | roof_counter_of_current_tile += 1 191 | 192 | if continued_work and last_row_of_output is not None: 193 | if not skipped: 194 | if last_row_of_output != row['roof_id']: 195 | continue 196 | elif last_row_of_output == row['roof_id']: 197 | skipped = True 198 | continue 199 | 200 | outline_list = [] 201 | current_outline = [] 202 | 203 | current_row = row 204 | current_row['groundtruth'] = False 205 | 206 | rooftop_img_file_name = row['roof_id'] + '.png' 207 | print(f'Labelling {rooftop_img_file_name}... ({roof_counter_of_current_tile}/{number_of_roof})') 208 | img_path = rooftop_img_dir + rooftop_img_file_name 209 | 210 | if not path.exists(img_path): 211 | print(f'Error: Rooftop image {img_path} is not exist!') 212 | exit() 213 | 214 | img = cv2.imread(img_path) 215 | window_name = row['roof_id'] 216 | cv2.namedWindow(window_name) 217 | cv2.moveWindow(window_name, 20, 20) 218 | cv2.imshow(window_name, img) 219 | 220 | cv2.setMouseCallback(row['roof_id'], onMouse) 221 | 222 | hotkey() 223 | 224 | rooftop_csv_file.close() 225 | mass_osm_file.close() 226 | 227 | if __name__ == "__main__": 228 | main(sys.argv[1:]) -------------------------------------------------------------------------------- /tools/solar_labeller/solar_marker_fixer.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import cv2 3 | import sys 4 | import csv 5 | 6 | import os.path as path 7 | 8 | outline_list = [] 9 | current_outline = [] 10 | current_row = {} 11 | 12 | def save_solar_polygon(solar_polygon): 13 | global output_csv_path 14 | global current_row 15 | print('polygon to save: ' + str(solar_polygon)) 16 | with open(output_csv_path, 'a') as output_csv_file: 17 | output_row = {} 18 | # roof_id, outline_nodes, parameters, groundtruth, solar_outline 19 | output_row['roof_id'] = current_row['roof_id'] 20 | output_row['outline_nodes'] = current_row['outline_nodes'] 21 | output_row['parameters'] = current_row['parameters'] 22 | output_row['groundtruth'] = current_row['groundtruth'] 23 | output_row['solar_outline'] = solar_polygon 24 | 25 | writer = csv.writer(output_csv_file) 26 | writer.writerow([output_row['roof_id'], output_row['outline_nodes'], output_row['parameters'], output_row['groundtruth'], output_row['solar_outline']]) 27 | output_csv_file.close() 28 | print('saved') 29 | 30 | def hotkey(): 31 | global tile_id 32 | global outline_list 33 | global current_outline 34 | global current_row 35 | global total_labeled_counter 36 | 37 | KEY_TRUE = ord('t') 38 | KEY_FALSE = ord('f') 39 | KEY_UNDO = ord('u') 40 | KEY_CLEAN = ord('c') 41 | KEY_NEXT = ord('n') 42 | KEY_SAVE = ord('s') 43 | KEY_QUIT = ord('q') 44 | KEY_INFO = ord('i') 45 | 46 | key = cv2.waitKey(0) 47 | if key == KEY_TRUE: 48 | current_row['groundtruth'] = True 49 | print('*** TRUE: has solar array') 50 | hotkey() 51 | elif key == KEY_FALSE: 52 | current_row['groundtruth'] = False 53 | print('*** FALSE: has NO solar array') 54 | hotkey() 55 | elif key == KEY_UNDO: 56 | print('*** Undo') 57 | if len(current_outline) >= 1: 58 | del current_outline[-1] 59 | print(current_outline) 60 | hotkey() 61 | elif key == KEY_CLEAN: 62 | print('*** Clean') 63 | current_outline = [] 64 | print(current_outline) 65 | hotkey() 66 | elif key == KEY_NEXT: 67 | if len(current_outline) > 0: 68 | outline_list.append(current_outline) 69 | print(outline_list) 70 | print('*** Mark next outline') 71 | current_outline = [] 72 | print(current_outline) 73 | hotkey() 74 | elif key == KEY_SAVE: 75 | print('*** Save') 76 | total_labeled_counter += 1 77 | if len(current_outline) > 0: 78 | outline_list.append(current_outline) 79 | current_outline = [] 80 | # if len(outline_list) <= 0: 81 | # outline_list 82 | if len(outline_list) > 0: 83 | current_row['groundtruth'] = True 84 | save_solar_polygon(outline_list) 85 | cv2.destroyAllWindows() 86 | elif key == KEY_QUIT: 87 | print(f'Last unfinished tile: {tile_id}') 88 | print(f'You have totally labeled {total_labeled_counter} roofs this time.') 89 | print('*** Quit solar marker') 90 | exit() 91 | elif key == KEY_INFO: 92 | print('*** Current Rooftop Info:') 93 | print(current_row) 94 | hotkey() 95 | else: 96 | print('*** Undefined key') 97 | hotkey() 98 | 99 | def onMouse(event, x, y, flags, param): 100 | global current_outline 101 | if event == cv2.EVENT_LBUTTONDOWN: 102 | click_point = (x, y) 103 | current_outline.append(click_point) 104 | print('*** Add point: ' + str(click_point)) 105 | print(current_outline) 106 | 107 | def main(argv): 108 | # print ('Number of arguments:', len(argv), 'arguments.') 109 | 110 | global outline_list 111 | global current_outline 112 | global output_csv_path 113 | global current_row 114 | global tile_id 115 | global total_labeled_counter 116 | 117 | workspace_path = '' 118 | 119 | total_labeled_counter = 0 120 | 121 | rooftop_csv_path = f'{workspace_path}data/missing.csv' 122 | 123 | if not path.exists(rooftop_csv_path): 124 | # print(rooftop_csv_path) 125 | print(f'The input csv file does not exist.') 126 | exit() 127 | 128 | # output_csv_header = ['roof_id', 'outline_nodes', 'parameters', 'groundtruth', 'solar_outline'] 129 | # with open(output_csv_path, 'a') as output_csv_file: 130 | # writer = csv.DictWriter(output_csv_file, fieldnames=output_csv_header) 131 | # writer.writeheader() 132 | # output_csv_file.close() 133 | 134 | print(f'Start labelling tile: {rooftop_csv_path}.') 135 | 136 | roof_counter = 0 137 | 138 | with open(rooftop_csv_path, newline='') as rooftop_csv_file: 139 | reader = csv.DictReader(rooftop_csv_file) 140 | 141 | number_of_roof = 546 142 | for row in reader: 143 | print('here') 144 | tile_id = row['tile_id'] 145 | 146 | output_csv_path = f'{workspace_path}output/{tile_id}_groundtruth.csv' 147 | rooftop_img_dir = f'{workspace_path}data/rooftop/{tile_id}/' 148 | 149 | roof_counter += 1 150 | 151 | continued_work = False 152 | skipped = False 153 | 154 | if path.exists(output_csv_path): 155 | continued_work = True 156 | 157 | # Add header to output csv 158 | if not continued_work: 159 | output_csv_header = ['roof_id', 'outline_nodes', 'parameters', 'groundtruth', 'solar_outline'] 160 | with open(output_csv_path, 'a') as output_csv_file: 161 | writer = csv.DictWriter(output_csv_file, fieldnames=output_csv_header) 162 | writer.writeheader() 163 | output_csv_file.close() 164 | 165 | outline_list = [] 166 | current_outline = [] 167 | 168 | current_row = row 169 | current_row['groundtruth'] = False 170 | 171 | rooftop_img_file_name = row['roof_id'] + '.png' 172 | print(f'Labelling {rooftop_img_file_name}... ({roof_counter}/{number_of_roof})') 173 | img_path = rooftop_img_dir + rooftop_img_file_name 174 | 175 | if not path.exists(img_path): 176 | print(f'Error: Rooftop image {img_path} is not exist!') 177 | exit() 178 | 179 | img = cv2.imread(img_path) 180 | window_name = row['roof_id'] 181 | cv2.namedWindow(window_name) 182 | cv2.moveWindow(window_name, 20, 20) 183 | cv2.imshow(window_name, img) 184 | 185 | cv2.setMouseCallback(row['roof_id'], onMouse) 186 | 187 | hotkey() 188 | 189 | rooftop_csv_file.close() 190 | 191 | if __name__ == "__main__": 192 | main(sys.argv[1:]) --------------------------------------------------------------------------------