├── replot_DBrecord.py
├── color_decomposition.py
├── LICENSE
├── final-record.py
├── README.md
├── final_record_func.py
├── object_detection_axes_legend.py
└── posterization.py


/replot_DBrecord.py:
--------------------------------------------------------------------------------
 1 | # DB of emissivity records can be downloaded
 2 | # from https://bit.ly/3NKnmpL
 3 | #
 4 | # unzip the archive, place this file in the
 5 | # same directory and use it to work with any
 6 | # of the DB records.
 7 | 
 8 | import json
 9 | import argparse
10 | import numpy as np
11 | import matplotlib.pyplot as plt
12 | 
13 | parser = argparse.ArgumentParser()
14 | parser.add_argument('-f','--fname', default='1.json', type=str, help='Enter the name of the JSON record you want to plot')
15 | args = parser.parse_args()
16 | filename = args.fname
17 | 
18 | print('NOTE: To open other file, run this script with flag -f and provide the path to the record.')
19 | print('Working with file '+filename)
20 | 
21 | f = open(filename ,)
22 | record = json.load(f)
23 | f.close()
24 | 
25 | X = np.array(record['data'])[:,0]
26 | Y = np.array(record['data'])[:,1]
27 | materials = ", ".join(record['materials'])
28 | title = 'From '+record['authors'][0]+' et al., '+str(record['year'])+' doi='+record['doi'] + ', FIG '+record['figure_number']+'\n'+record['geometry_key']+' '+record['composition_key']+' made with '+ materials
29 | 
30 | fig = plt.figure(figsize=(8,6))
31 | plt.plot(X,Y,color = record['color'])
32 | plt.xlabel('Wavelength, $\mu m$')
33 | plt.ylabel('Emissivity, a.u.')
34 | plt.title(title)
35 | fig.savefig(filename[:-4]+'png')
36 | 
37 | print('Check the plot at '+filename[:-4]+'png')


--------------------------------------------------------------------------------
/color_decomposition.py:
--------------------------------------------------------------------------------
 1 | from os import walk
 2 | import os
 3 | import re
 4 | import cv2
 5 | import numpy as np
 6 | from PIL import Image
 7 | from posterization import detect_colors
 8 | from posterization import preprocess, rgb2hex, save_palette, get_matrix, save_cluster, save_json, get_cluster_data, data_score_mult
 9 | 
10 | PATH_TO_DIR = 'images'
11 | 
12 | fnames = []
13 | for (dirpath, dirnames, filenames) in walk(PATH_TO_DIR):
14 | 	fnames.extend(filenames)
15 | 	break
16 | 
17 | images = [filename for filename in fnames if 'axis' not in filename and 'Legend' not in filename and 'png' in filename]
18 | legends = [filename for filename in fnames if 'Legend' in filename and 'json' in filename]
19 | 
20 | for image_name in images:
21 | 	print(image_name)
22 | 	legend_names = [os.path.join(PATH_TO_DIR,legend) for legend in legends if re.match(image_name[:-4]+'_Legend', legend)]
23 | 	image_arr = preprocess(os.path.join(PATH_TO_DIR,image_name), legend_names)
24 | 	if (image_arr<255).any():
25 | 		image = Image.fromarray(image_arr)
26 | 		image.save(os.path.join(PATH_TO_DIR,image_name)[:-4]+'_colorcut.png')
27 | 		colorsrgb = detect_colors(image_arr)
28 | 		#print('colors: ',colorsrgb, type(colorsrgb))
29 | 		save_palette([colorsrgb],os.path.join(PATH_TO_DIR,image_name))
30 | 		matrix = get_matrix(image_arr,colorsrgb)
31 | 		for i in range(len(colorsrgb)):
32 | 			cluster = get_cluster_data(matrix,i)
33 | 			if len(cluster)>300:
34 | 				score_m = data_score_mult(cluster)
35 | 				#print('Cluster '+str(i)+' score: ', score_m)
36 | 				if score_m > 0.66:
37 | 					save_cluster(cluster,i,rgb2hex(colorsrgb[i]),os.path.join(PATH_TO_DIR,image_name))
38 | 					save_json(cluster,rgb2hex(colorsrgb[i]),i,os.path.join(PATH_TO_DIR,image_name))
39 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | Copyright (c) 2022, The Regents of the University of
 2 | California, through Lawrence Berkeley National Laboratory (subject
 3 | to receipt of any required approvals from the U.S. Dept. of Energy).
 4 | All rights reserved.
 5 | 
 6 | Redistribution and use in source and binary forms, with or without
 7 | modification, are permitted provided that the following conditions
 8 | are met:
 9 | 
10 | (1) Redistributions of source code must retain the above copyright
11 | notice, this list of conditions and the following disclaimer.
12 | 
13 | (2) Redistributions in binary form must reproduce the above
14 | copyright notice, this list of conditions and the following
15 | disclaimer in the documentation and/or other materials provided with
16 | the distribution.
17 | 
18 | (3) Neither the name of the University of California, Lawrence
19 | Berkeley National Laboratory, U.S. Dept. of Energy nor the names of
20 | its contributors may be used to endorse or promote products derived
21 | from this software without specific prior written permission.
22 | 
23 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
24 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
25 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
26 | FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
27 | COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
28 | INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
29 | BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
30 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
31 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32 | LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
33 | ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34 | POSSIBILITY OF SUCH DAMAGE.
35 | 
36 | You are under no obligation whatsoever to provide any bug fixes,
37 | patches, or upgrades to the features, functionality or performance
38 | of the source code ("Enhancements") to anyone; however, if you
39 | choose to make your Enhancements available either publicly, or
40 | directly to Lawrence Berkeley National Laboratory or its
41 | contributors, without imposing a separate written license agreement
42 | for such Enhancements, then you hereby grant the following license:
43 | a  non-exclusive, royalty-free perpetual license to install, use,
44 | modify, prepare derivative works, incorporate into other computer
45 | software, distribute, and sublicense such enhancements or derivative
46 | works thereof, in binary and source code form.
47 | 


--------------------------------------------------------------------------------
/final-record.py:
--------------------------------------------------------------------------------
 1 | import easyocr
 2 | import cv2
 3 | from easyocr import Reader
 4 | import json
 5 | import numpy as np
 6 | import pandas as pd
 7 | import re
 8 | from os import walk
 9 | import os
10 | import argparse
11 | from final_record_func import get_scaling, save_json
12 | 
13 | parser = argparse.ArgumentParser()
14 | parser.add_argument('-ax','--axscale', default='xy', type=str, help='Axes to rescale. Options: x, y, xy')
15 | args = parser.parse_args()
16 | scale_mode = args.axscale
17 | 
18 | reader = easyocr.Reader(['en'], gpu=True)
19 | allowlist = '0123456789.-'
20 | PATH_TO_DIR = os.getcwd()
21 | print(PATH_TO_DIR)
22 | 
23 | fnames = []
24 | for (dirpath, dirnames, filenames) in walk(os.path.join(PATH_TO_DIR,'images')):
25 |     fnames.extend(filenames)
26 |     break
27 | #print(anames)
28 | imgnames = [filename for filename in fnames if 'png' in filename and 'axis' not in filename and 'Legend' not in filename and 'colorcut' not in filename]
29 | #print(imgnames)
30 | axisnames = [filename for filename in fnames if 'axis' in filename and 'json' in filename]
31 | #print(axisnames)
32 | clusternames = [filename for filename in fnames if 'cluster' in filename and 'json' in filename]
33 | #print(clusternames)
34 | 
35 | 
36 | for i,clustername in enumerate(clusternames):
37 | 	FILE = clustername[:-24]
38 | 	print(FILE)
39 | 	f = open(os.path.join('images',clustername))  
40 | 	cluster = json.load(f)
41 | 	f.close()
42 | 	XAXIS = [xname for xname in axisnames if re.match(FILE+'_X_axis', xname)][0]
43 | 	YAXIS = [xname for xname in axisnames if re.match(FILE+'_Y_axis', xname)][0]
44 | 	print(XAXIS,YAXIS)
45 | 	f = open(os.path.join('images',XAXIS))
46 | 	print(os.path.join('images',XAXIS))
47 | 	x_axis_json = json.load(f)
48 | 	f.close()
49 | 	x_axis_img = cv2.imread(os.path.join('images',XAXIS[:-4]+'png'))
50 | 	print(os.path.join('images',XAXIS[:-4]+'png'))
51 | 	[confidence, x_a1, x_box] = get_scaling(x_axis_json, x_axis_img, reader, allowlist, 'x')
52 | 	if confidence == 'unconfident':
53 | 		print(i, clustername, FILE, confidence)
54 | 		continue
55 | 	else:
56 | 		f = open(os.path.join('images',YAXIS))
57 | 		print(os.path.join('images',YAXIS))
58 | 		y_axis_json = json.load(f)
59 | 		f.close()
60 | 		y_axis_img = cv2.imread(os.path.join('images',YAXIS[:-4]+'png'))
61 | 		print(os.path.join('images',YAXIS[:-4]+'png'))
62 | 		[confidence, y_a1, y_box] = get_scaling(y_axis_json, y_axis_img, reader, allowlist, 'y')
63 | 		if confidence == 'unconfident':
64 | 			continue
65 | 		else:
66 | 			save_json(FILE, clustername, cluster, x_box, x_a1, y_box, y_a1)
67 | 			print(i, clustername, FILE, confidence)
68 | 
69 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Installation
  2 | 
  3 | *Automated curve data extraction. Works with colored curves. Cross-platform. Color decomposition with k-means. X units parser. Automatic axes scale recognition.*
  4 | 
  5 | 1. Download and unzip **folder** from https://drive.google.com/file/d/1e0UTKwhgJN9DuD2qYsLcWcKd6WomvRkl/view?usp=sharing
  6 | 
  7 | 2. Download scripts above.
  8 | 
  9 | ### Part I: install packages and TF2 object detection API
 10 | 
 11 | 3. Create anaconda variable
 12 | ```
 13 | conda create -n imgrec python=3.9
 14 | conda install -n imgrec pip
 15 | conda activate imgrec
 16 | ```
 17 | 4. Install Tensorflow 2.7
 18 | ```
 19 | pip install TensorFlow==2.7
 20 | ```
 21 | 5. Install protobuf (probably it is already there after previous step; try installation via brew for macOS)
 22 | ```
 23 | pip install protobuf
 24 | ```
 25 | 6. Install some packages
 26 | ```
 27 | pip install easyocr
 28 | pip install opencv-python==4.5.4.60
 29 | ```
 30 | 7. Install TF object detection API
 31 | 
 32 | Create a directory for Tensorflow Models repository. Go to this directory and do
 33 | ```
 34 | git clone https://github.com/tensorflow/models.git
 35 | cd models/research
 36 | protoc object_detection/protos/*.proto --python_out=.
 37 | cp object_detection/packages/tf2/setup.py .
 38 | python -m pip install .
 39 | ```
 40 | 8. Check installation
 41 | ```
 42 | python object_detection/builders/model_builder_tf2_test.py
 43 | ```
 44 | 
 45 | ### Part II: use pre-trained model
 46 | 
 47 | 9. 
 48 | ```
 49 | cd object_detection
 50 | mkdir inference_graph
 51 | ```
 52 | 10. copy the content of the inference_graph from **folder** to the corresponding directory
 53 | 
 54 | 11.
 55 | ``` 
 56 | mkdir training 
 57 | ```
 58 | copy labelmap.pbtxt from **folder** to training/.
 59 | 
 60 | 12. 
 61 | ```
 62 | mkdir images
 63 | ```
 64 | copy file 0.png from **folder** to images/.
 65 | 
 66 | 13. copy file label_map_util_v2.py from **folder** to utils/.
 67 | 
 68 | 14. copy script object_detection_axes_legend.py from above (step 2) to  models/research/object_detection/.
 69 | 
 70 | 15. run this file from models/research/object_detection/ as
 71 | ```
 72 | python object_detection_axes_legend.py
 73 | ```
 74 | You should obtain json and png files for Legend, X axis and Y axis in images/
 75 | 
 76 | ### Part III: run the rest of the project
 77 | 
 78 | 16. copy scripts posterization.py and color_decomposition.py from above (step 2) to models/research/object_detection/.
 79 | 
 80 | 17. Run file color_decomposition.py as
 81 | ```
 82 | python color_decomposition.py
 83 | ```
 84 | You should obtain png of color cut and palette as well as json and png of separate clusters in images/
 85 | 
 86 | 18. Copy scripts final-record.py and final_record_func.py from above (step 2) to models/research/object_detection/.
 87 | 
 88 | 19. Run final-record.py as
 89 | ```
 90 | python final-record.py 
 91 | ```
 92 | You should obtain json files and png images of final records for all successfully detected clusters.
 93 | 
 94 | To work with your figure put figure.PNG to images/.
 95 | 
 96 | *Regarding the TFOD API part, thanks to https://github.com/TannerGilbert/Tensorflow-Object-Detection-API-Train-Model*
 97 | 
 98 | # Citation
 99 | 
100 | If you find this tool or any of it's parts useful, please cite our paper https://www.nature.com/articles/s41597-022-01699-3
101 | You can also find more details regarding the usage of the algorithm there.
102 | 


--------------------------------------------------------------------------------
/final_record_func.py:
--------------------------------------------------------------------------------
  1 | import easyocr
  2 | import cv2
  3 | from easyocr import Reader
  4 | import matplotlib.pyplot as plt
  5 | import json
  6 | import numpy as np
  7 | import pandas as pd
  8 | import scipy
  9 | from scipy import stats
 10 | import math
 11 | import re
 12 | from os import walk
 13 | import os
 14 | '''
 15 | def rgb_to_hex(color):
 16 |     [r,g,b] = color
 17 |     rgb = (int(np.round(255*r)), int(np.round(255*g)), int(np.round(255*b)))
 18 |     return '#%02x%02x%02x' % rgb
 19 | '''
 20 | def get_scaling(axis_json, axis_img, reader, allowlist, axis):
 21 |     box = axis_json['detected_box'][0]
 22 |     width = box['xmax']-box['xmin']
 23 |     height = box['ymax']-box['ymin']
 24 |     axis_img = cv2.copyMakeBorder(axis_img, 10, 10, 10, 10, cv2.BORDER_CONSTANT,value=(255,255,255))
 25 |     results = reader.readtext(axis_img, min_size = 10, mag_ratio=2, allowlist=allowlist)
 26 |     a = clean_array(results, axis)
 27 |     [a1, confidence] = well_approximated(a, axis)
 28 |     return [confidence, a1, box]
 29 | 
 30 | def clean_array(results, axis):
 31 |     a = np.array([])
 32 |     record = []
 33 |     m_height_box = 10
 34 |     m_width_box = 5
 35 |     for (bbox, text, prob) in results:
 36 |         if prob < 0.5:
 37 |             continue
 38 |         if text =='' or text == '.' or text == '-':
 39 |             continue
 40 |         else:
 41 |             try:
 42 |                 float(text)
 43 |             except ValueError:
 44 |                 continue
 45 |             (tl, tr, br, bl) = bbox
 46 |             width_box = br[0]-tl[0]
 47 |             height_box = br[1]-tl[1]
 48 |             y = tl[1]+int(height_box/2)
 49 |             x = tl[0]+int(width_box/2)
 50 |             record.append((x,y,float(text)))
 51 |             m_height_box = min(height_box, 10)
 52 |             m_width_box = min(width_box,5)
 53 |     #print(record)
 54 |     if len(record)>0:
 55 |         dtype = [('x', int), ('y', int), ('value', float)]
 56 |         a = np.array(record, dtype=dtype)
 57 |         a = np.sort(a, order=axis)
 58 |         axis2 = 'xy'.replace(axis,'')
 59 |         param_box = {'x':max(int(m_height_box/2),6), 'y':max(int(m_width_box/2),3)}
 60 |         if stats.mode(a[axis2])[1]>1:
 61 |             A_bool = [[math.isclose(i,j, abs_tol=param_box[axis]) for i in a[axis2]] for j in a[axis2]]
 62 |             rows = [stats.mode(j)[0][0] for j in A_bool]
 63 |             a = a[rows]
 64 |     return a
 65 | 
 66 | def lin_approx(a,axis):
 67 |     linear_model=np.polyfit(a[axis],a['value'],1)
 68 |     linear_model_fn=np.poly1d(linear_model)
 69 |     return linear_model_fn(a[axis])
 70 | 
 71 | # want error of approximation for every dot to be less than 5%
 72 | def big_rel_error(arr1, arr2, threshold = 0.05):
 73 |     err = np.absolute(1 - arr1/arr2) > threshold
 74 |     return err.any()
 75 | 
 76 | def drop_outlier(a,axis):
 77 |     err = lin_approx(a,axis) - a['value']
 78 |     a1 =  a[err**2 < max(err**2)]
 79 |     return a1
 80 | 
 81 | def well_approximated(a, axis):
 82 |     a1=a
 83 |     while True:
 84 |         if len(a1)>2:
 85 |             #print('long enough')
 86 |             if big_rel_error(lin_approx(a1,axis),a1['value']):
 87 |                 a1 = drop_outlier(a1,axis)
 88 |                 #print('drop')
 89 |             else:
 90 |                 confidence = 'confident'
 91 |                 break
 92 |         else:
 93 |             confidence = 'unconfident'
 94 |             break
 95 |     return [a1, confidence]
 96 | 
 97 | def intersect(x1,v1,x2,v2):
 98 |     return v1-x1*(v2-v1)/(x2-x1)
 99 | 
100 | def axis_rec(axis_arr, a1, box, axis):
101 |     pix1 = a1[len(a1)//2-1][axis]
102 |     val1 = a1[len(a1)//2-1]['value']
103 |     pix2 = a1[len(a1)//2][axis]
104 |     val2 = a1[len(a1)//2]['value']
105 |     starting_pix = box[axis+'min']
106 |     return (val2-val1)/(pix2-pix1)*axis_arr + intersect(pix1+starting_pix,val1,pix2+starting_pix,val2)
107 | 
108 | def recalc(x_a1,x_box,y_a1,y_box,cluster):
109 |     X = np.array(cluster['coordinates'])[:,0]
110 |     Y = np.array(cluster['coordinates'])[:,1]
111 |     X_rec = axis_rec(X, x_a1, x_box, 'x')
112 |     #print('X data type ', type(X_rec))
113 |     Y_rec = axis_rec(Y, y_a1, y_box, 'y')
114 |     #print('Y data type ', type(Y_rec))
115 |     cluster = np.array([(float(x),float(y)) for (x,y) in zip(X_rec,Y_rec)])
116 |     result = cluster[cluster[:, 0].argsort()].tolist()
117 |     #print('result type', type(result))
118 |     return result
119 | 
120 | def save_json(name, cluster_name, cluster, x_box, x_a1, y_box, y_a1):
121 |     record = {}
122 |     record['file_name']=name
123 |     record['cluster_name'] = cluster_name[:-5]
124 |     record['axes_units'] = "na"
125 |     record['color'] = cluster['color']
126 |     record['data'] = recalc(x_a1,x_box,y_a1,y_box,cluster)
127 |     #print(record)
128 |     cluster_name_out = cluster_name[:-5]+'final_record.json'
129 |     with open(os.path.join('images',cluster_name_out), 'w') as outfile:
130 |         json.dump(record, outfile)
131 |     X = np.array(record['data'])[:,0]
132 |     Y = np.array(record['data'])[:,1]
133 |     fig = plt.figure(figsize=(8,6))
134 |     plt.plot(X,Y,color = record['color'])
135 |     fig.savefig(os.path.join('images',cluster_name_out)[:-4]+'png')
136 |     plt.close('all')
137 | 


--------------------------------------------------------------------------------
/object_detection_axes_legend.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import os
  3 | import six.moves.urllib as urllib
  4 | import sys
  5 | import tarfile
  6 | import tensorflow as tf
  7 | import zipfile
  8 | import json
  9 | from os import walk
 10 | import glob
 11 | 
 12 | from distutils.version import StrictVersion
 13 | from collections import defaultdict
 14 | from io import StringIO
 15 | from matplotlib import pyplot as plt
 16 | from PIL import Image
 17 | 
 18 | sys.path.append("..")
 19 | 
 20 | from utils import label_map_util_v2
 21 | from utils import visualization_utils as vis_util
 22 | 
 23 | from utils import ops as utils_ops
 24 | 
 25 | def load_image_into_numpy_array(image):
 26 |     (im_width, im_height) = image.size
 27 |     return np.array(image.convert("RGB").getdata()).reshape((im_height, im_width, 3)).astype(np.uint8)
 28 | 
 29 | 
 30 | def run_inference_for_single_image(image, graph):
 31 |     with graph.as_default():
 32 |         with tf.compat.v1.Session() as sess:
 33 |             # Get handles to input and output tensors
 34 |             ops = tf.compat.v1.get_default_graph().get_operations()
 35 |             all_tensor_names = {output.name for op in ops for output in op.outputs}
 36 |             tensor_dict = {}
 37 |             for key in ['num_detections', 'detection_boxes', 'detection_scores','detection_classes', 'detection_masks']:
 38 |                 tensor_name = key + ':0'
 39 |                 if tensor_name in all_tensor_names:
 40 |                     tensor_dict[key] = tf.compat.v1.get_default_graph().get_tensor_by_name(tensor_name)
 41 |             if 'detection_masks' in tensor_dict:
 42 |                 # The following processing is only for single image
 43 |                 detection_boxes = tf.squeeze(tensor_dict['detection_boxes'], [0])
 44 |                 detection_masks = tf.squeeze(tensor_dict['detection_masks'], [0])
 45 |                 # Reframe is required to translate mask from box coordinates to image coordinates and fit the image size.
 46 |                 real_num_detection = tf.cast(tensor_dict['num_detections'][0], tf.int32)
 47 |                 detection_boxes = tf.slice(detection_boxes, [0, 0], [real_num_detection, -1])
 48 |                 detection_masks = tf.slice(detection_masks, [0, 0, 0], [real_num_detection, -1, -1])
 49 |                 detection_masks_reframed = utils_ops.reframe_box_masks_to_image_masks(detection_masks, detection_boxes, image.shape[0], image.shape[1])
 50 |                 detection_masks_reframed = tf.cast(tf.greater(detection_masks_reframed, 0.5), tf.uint8)
 51 |                 # Follow the convention by adding back the batch dimension
 52 |                 tensor_dict['detection_masks'] = tf.expand_dims(detection_masks_reframed, 0)
 53 |             image_tensor = tf.compat.v1.get_default_graph().get_tensor_by_name('image_tensor:0')
 54 |             # Run inference
 55 |             output_dict = sess.run(tensor_dict,feed_dict={image_tensor: np.expand_dims(image, 0)})
 56 |             # all outputs are float32 numpy arrays, so convert types as appropriate
 57 |             output_dict['num_detections'] = int(output_dict['num_detections'][0])
 58 |             output_dict['detection_classes'] = output_dict['detection_classes'][0].astype(np.uint8)
 59 |             output_dict['detection_boxes'] = output_dict['detection_boxes'][0]
 60 |             output_dict['detection_scores'] = output_dict['detection_scores'][0]
 61 |             if 'detection_masks' in output_dict:
 62 |                 output_dict['detection_masks'] = output_dict['detection_masks'][0]
 63 |     return output_dict
 64 | 
 65 | 
 66 | def get_image_boxes_and_write_json(image_path,im_width, im_height,image, boxes, classes, scores, category_index, use_normalized_coordinates=True,max_boxes_to_draw=20,min_score_thresh=.5):
 67 |     for i in range(min(max_boxes_to_draw, boxes.shape[0])):
 68 |         if scores is None or scores[i] > min_score_thresh:
 69 |             box = tuple(boxes[i].tolist())
 70 |             ymin, xmin, ymax, xmax = box
 71 |             if use_normalized_coordinates:
 72 |                 (left, right, top, bottom) = (int(xmin * im_width), int(xmax * im_width), int(ymin * im_height), int(ymax * im_height))
 73 |             else:
 74 |                 (left, right, top, bottom) = (xmin, xmax, ymin, ymax)
 75 |             if classes[i] in category_index.keys():
 76 |                 class_name = category_index[classes[i]]['name']
 77 |             else:
 78 |                 class_name = 'NA'
 79 |             ending_string = '_'+str(class_name)+'_'+str(i)
 80 |             output_image_path = image_path[:-4]+ending_string+'.png'
 81 |             json_record = {}
 82 |             json_record['image_name'] = image_path
 83 |             json_record['image_size'] = []
 84 |             json_record['image_size'].append({'height':im_height, 'width':im_width})
 85 |             json_record['detected_box'] = []
 86 |             json_record['detected_box'].append({'box_image_name':output_image_path, 'label':str(class_name), 'score':int(100*scores[i]), 'xmin':left, 'xmax':right, 'ymin':top, 'ymax':bottom})
 87 |             subimage = Image.fromarray(image[top:bottom,left:right,:])
 88 |             subimage.save(output_image_path)
 89 |             output_json_path = output_image_path[:-4]+'.json'
 90 |             with open(output_json_path,'w') as outfile:
 91 |                 json.dump(json_record,outfile)
 92 | 
 93 | 
 94 | if StrictVersion(tf.__version__) < StrictVersion('1.9.0'):
 95 |   raise ImportError('Please upgrade your TensorFlow installation to v1.9.* or later!')
 96 | 
 97 | # What model to download.
 98 | MODEL_NAME = 'inference_graph'
 99 | 
100 | # Path to frozen detection graph. This is the actual model that is used for the object detection.
101 | PATH_TO_FROZEN_GRAPH = os.path.join(MODEL_NAME,'frozen_inference_graph.pb')
102 | 
103 | # List of the strings that is used to add correct label for each box.
104 | PATH_TO_LABELS = os.path.join('training','labelmap.pbtxt')
105 | 
106 | PATH_TO_SAMPLE_IMAGES_DIR = 'images'
107 | SAMPLE_IMAGE_PATHS = []
108 | for (dirpath, dirnames, filenames) in walk(PATH_TO_SAMPLE_IMAGES_DIR):
109 |     print(filenames)
110 |     SAMPLE_IMAGE_PATHS.extend([filename for filename in filenames if 'png' in filename])
111 |     break
112 | print(SAMPLE_IMAGE_PATHS)
113 | 
114 | detection_graph = tf.Graph()
115 | with detection_graph.as_default():
116 |   od_graph_def = tf.compat.v1.GraphDef()
117 |   with tf.compat.v2.io.gfile.GFile(PATH_TO_FROZEN_GRAPH, 'rb') as fid:
118 |     serialized_graph = fid.read()
119 |     od_graph_def.ParseFromString(serialized_graph)
120 |     tf.import_graph_def(od_graph_def, name='')
121 | 
122 | category_index = label_map_util_v2.create_category_index_from_labelmap(PATH_TO_LABELS, use_display_name=True)
123 | 
124 | for ind, filename in enumerate(SAMPLE_IMAGE_PATHS):
125 |     image_path = os.path.join(PATH_TO_SAMPLE_IMAGES_DIR,filename)
126 |     image = Image.open(image_path)
127 |     im_width, im_height = image.size
128 |     # the array based representation of the image will be used later in order to prepare the
129 |     # result image with boxes and labels on it.
130 |     image_np = load_image_into_numpy_array(image)
131 |     # Expand dimensions since the model expects images to have shape: [1, None, None, 3]
132 |     image_np_expanded = np.expand_dims(image_np, axis=0)
133 |     # Actual detection.
134 |     output_dict = run_inference_for_single_image(image_np, detection_graph)
135 |     #Extraction and writing json
136 |     get_image_boxes_and_write_json(image_path,im_width, im_height,image_np, output_dict['detection_boxes'],output_dict['detection_classes'],output_dict['detection_scores'], category_index, use_normalized_coordinates=True)
137 | 
138 | 


--------------------------------------------------------------------------------
/posterization.py:
--------------------------------------------------------------------------------
  1 | from __future__ import division
  2 | from matplotlib.image import imread
  3 | import matplotlib.pyplot as plt
  4 | import numpy as np
  5 | from collections import Counter
  6 | import json
  7 | import cv2
  8 | from PIL import Image, ImageDraw
  9 | from PIL.Image import Image as Image_type
 10 | 
 11 | def read_image(image):
 12 |     if type(image) is not Image_type:
 13 |         img = Image.open(image)
 14 |     else:
 15 |         img = image
 16 |     if img.mode != 'RGBA':
 17 |         img = img.convert('RGBA')
 18 | 
 19 |     im = np.asarray(img)
 20 |     im = im[:, :, :3]
 21 |     return im
 22 | 
 23 | def remove_legend(img, legend_filename):
 24 |     """given image and path to legend json record; open json record, find box with coordinates, return image without legend + 5 pixels padding"""
 25 | 
 26 |     f = open(legend_filename)
 27 |     data = json.load(f)
 28 |     legend_box = data['detected_box'][0]
 29 | 
 30 |     return cv2.rectangle(img, (legend_box['xmin']-5,legend_box['ymin']-5), (legend_box['xmax']+5,legend_box['ymax']+5), (255, 255, 255), -1)
 31 | 
 32 | def preprocess(image_filename, legend_filenames):
 33 |     """cuts legend and all b/w objects"""
 34 |     img0 = read_image(image_filename)
 35 |     hsv0 = cv2.cvtColor(img0,cv2.COLOR_RGB2HSV)
 36 |     hsv = hsv0
 37 |     for i in range(len(hsv0)):
 38 |         for j in range(len(hsv0[0])):
 39 |             if hsv0[i,j,1] < 124 or hsv0[i,j,2] < 124:
 40 |                 hsv[i,j] = [0, 0 , 255]
 41 |     img = cv2.cvtColor(hsv,cv2.COLOR_HSV2RGB)
 42 |     if len(legend_filenames)>0:
 43 |         for legend_filename in legend_filenames:
 44 |             img = remove_legend(img, legend_filename)
 45 |     return img
 46 | 
 47 | def hex2rgb(hex_code):
 48 |     h = hex_code.lstrip('#')
 49 |     return tuple(int(h[i:i+2], 16)/255 for i in (0, 2, 4))
 50 | 
 51 | def rgb2hex(color):
 52 |     [r,g,b] = color
 53 |     rgb = (int(np.round(255*r)), int(np.round(255*g)), int(np.round(255*b)))
 54 |     return '#%02x%02x%02x' % rgb
 55 | 
 56 | def squared_distance(p, q):
 57 |     """given points p and q returns the sum of the squares"""   
 58 |     return sum((p_i - q_i) ** 2 for p_i, q_i in zip(p, q))
 59 | 
 60 | def closest_index(pixels,  colors):
 61 |     """given a pixel point and a list of color points, returns the index of the closest color point"""
 62 |     return min(range(len(colors)), key=lambda i: squared_distance(pixels, colors[i]))
 63 | 
 64 | def get_matrix(image_array, colorsrgb):
 65 |     im = image_array[:, :, :3]/255
 66 |     return [[closest_index(pixel, colorsrgb) for pixel in row] for row in im]
 67 | 
 68 | def get_cluster_data(matrix,index):
 69 |     """returns list of (x,y) pixel coordinates of pixels associated with this cluster"""
 70 |     return [(j,i) for i,_ in enumerate(matrix) for j,_ in enumerate(matrix[i]) if matrix[i][j]==index]
 71 | 
 72 | def mean(ps):
 73 |     """given a list of points, check if the cluster is emply, return the single point
 74 |     whose first element is the average of all the first elements,
 75 |     whose second element is the average of all the second elements, and so on"""   
 76 |     n = len(ps)
 77 |     if n > 0:
 78 |         k = len(ps[0])
 79 |         mean = [sum(p[i] for p in ps) / n for i in range(k)]
 80 |     else:
 81 |         mean = None
 82 |     return mean
 83 | 
 84 | def average_distance(means, new_mean_set, drop_out):
 85 |     """drop out the cluster if needed, calculate the average distance in RGB space between cluster centers"""
 86 |     if len(drop_out)>0:
 87 |         means = [means[idx] for idx in range(len(means)) if idx not in drop_out]
 88 |     distances = [squared_distance(means[i],new_mean_set[i]) for i in range(len(means))]
 89 |     return np.mean(np.array([(d)**0.5 for d in distances]))
 90 | 
 91 | def new_means(ps, old_means):
 92 |     """given a list of points and some cluster means,
 93 |     assign each point to its closest cluster,
 94 |     and then compute the means of the new clusters and indexes of dropped clusters"""
 95 |     positions = list(range(len(old_means)))
 96 |     indexes = [closest_index(p, old_means) for p in ps]
 97 |     return [mean([p for p, i in zip(ps, indexes) if i == j]) for j in Counter(indexes).keys()], [idx for idx in positions if idx not in Counter(indexes).keys()]
 98 | 
 99 | def k_means(ps, num_iterations, convergence):
100 |     """given a list of points, start with basic 8 color palette,
101 |     then compute new_means num_iteration times or until convergence criteria is reached,
102 |     returning the final means and convergence path"""
103 |     #white, red, green, blue, yellow, purple, cyan, black
104 |     means = [[0.99, 0.99, 0.99], [0.99, 0.01, 0.01], [0.01, 0.99, 0.01], [0.01, 0.01, 0.99], [0.99, 0.99, 0.01], [0.99, 0.01, 0.99], [0.01, 0.99, 0.99], [0.01, 0.01, 0.01]]
105 |     track_convergence = []
106 |     for i in range(num_iterations):
107 |         drop_out = []
108 |         new_mean_set, drop_out = new_means(ps, means)
109 |         #print (i, new_mean_set)
110 |         track_convergence.append(average_distance(means,new_mean_set,drop_out))
111 |         if len(track_convergence)>2 and track_convergence[-1] < convergence and track_convergence[-2] < convergence:
112 |             break
113 |         else:
114 |             means = new_mean_set
115 |     return new_mean_set
116 | 
117 | 
118 | def detect_colors(image_array,num_iterations = 12, convergence = 0.05):
119 |     """returns image palette with cluster centers as a list of hex codes"""
120 |     im = image_array[:, :, :3]/255
121 |     flattened = [pixel for row in im for pixel in row]
122 |     palette = k_means(flattened, num_iterations, convergence)
123 |     return palette
124 | 
125 | def save_cluster(cluster, index, colorhex, img_filename):
126 |     """save cluster image"""
127 | 
128 |     fig = plt.figure()
129 |     plt.scatter(np.array(cluster)[:,0],np.array(cluster)[:,1],s=0.01,c=colorhex)
130 |     plt.gca().invert_yaxis()
131 |     plt.axis('off')
132 |     fig.savefig(img_filename[:-4]+'_colorcut_cluster_'+str(index)+'.png')
133 |     plt.cla()
134 |     plt.clf()
135 |     plt.close()
136 | 
137 | def save_json(cluster,color,index,img_filename):
138 |     """write (x,y) in coordinates, palette colors"""
139 |     record = {}
140 |     record['color'] = color
141 |     record['coordinates'] = cluster
142 |     with open(img_filename[:-4]+'_colorcut_cluster_'+str(index)+'.json', 'w') as outfile:
143 |         json.dump(record, outfile)
144 | 
145 | def save_palette(pixels, img_filename):
146 |     """save the pixels"""
147 |     plt.imsave(arr=np.array(pixels), fname=img_filename[:-4]+'_colorcut_palette.png')
148 |     plt.cla()
149 |     plt.clf()
150 |     plt.close()
151 | 
152 | def ygroups(ys):
153 |     ys_ref = range(ys[0],ys[0]+len(ys))
154 |     if sum(np.array(ys)-ys_ref) == 0:
155 |         ys_res = int(np.mean(ys))
156 |         comment = ''
157 |     else:
158 |         dif = list(np.array(ys)-ys_ref)
159 |         classes = list(set(dif))
160 |         ys_res = [int(np.mean([y for i,y in enumerate(ys) if dif[i]==cl])) for cl in classes]
161 |         comment = 'multiple'
162 |     return [ys_res, comment]
163 | 
164 | def data_structured(cluster):
165 |     structured = []
166 |     Xs = np.array(cluster)[:,0]
167 |     for ind in range(min(Xs),max(Xs)+1):
168 |         ys = sorted([item[1] for item in cluster if item[0]==ind])
169 |         comment = ''
170 |         if len(ys)==0:
171 |             comment = 'gap'
172 |             ys_res = []
173 |         else:
174 |             [ys_res,comment] = ygroups(ys)
175 |         structured.append({'x': ind, 'y': ys_res, 'c': comment})
176 |     return structured
177 | 
178 | def data_score_mult(cluster):
179 |     structured = data_structured(cluster)
180 |     mults = len([entry for entry in structured if entry['c']=='multiple'])
181 |     gaps = len([entry for entry in structured if entry['c']=='gap'])
182 |     tot = len(structured)
183 |     return 1-mults/(tot-gaps)
184 | 


--------------------------------------------------------------------------------