├── data ├── l_weights │ └── .gitignore └── test_imgs │ ├── sample_input │ └── .gitignore │ └── sample_output │ └── .gitignore ├── requirements.txt ├── .gitignore ├── model ├── image_handler.py ├── loss.py ├── wmts_handler.py ├── iiif_handler.py ├── save_localheight_original_txt_fastzk.py ├── predict_annotations.py └── mymodel.py ├── Dockerfile └── README.md /data/l_weights/.gitignore: -------------------------------------------------------------------------------- 1 | * 2 | !.gitignore -------------------------------------------------------------------------------- /data/test_imgs/sample_input/.gitignore: -------------------------------------------------------------------------------- 1 | * 2 | !.gitignore -------------------------------------------------------------------------------- /data/test_imgs/sample_output/.gitignore: -------------------------------------------------------------------------------- 1 | * 2 | !.gitignore -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | xmltodict 2 | requests 3 | shapely 4 | rasterio -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | __pycache__ 2 | .idea 3 | model/debug.py 4 | model/debug.txt -------------------------------------------------------------------------------- /model/image_handler.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import numpy as np 3 | import rasterio 4 | 5 | 6 | class ImageHandler: 7 | def __init__(self): 8 | self.img = None 9 | print("ImageHandler") 10 | 11 | def process_img(self, args): 12 | print(f"ImageHandler: {args}") 13 | 14 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM zekunli/zekun-keras-gpu 2 | 3 | WORKDIR = /map-kurator 4 | 5 | # Install GDAL for Rasterio 6 | RUN add-apt-repository -y ppa:ubuntugis/ppa \ 7 | && apt-get update -y \ 8 | && apt-get install -y python-numpy gdal-bin libgdal-dev 9 | 10 | COPY requirements.txt requirements.txt 11 | 12 | RUN pip3 install -r requirements.txt -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # map-kurator 2 | Wrapper around Zekun's model to detect map text labels. 3 | 4 | [**UPDATE 2022/10**]: A more comprehensive pipeline that performs detection, recogntion, img-to-geocoordinate conversion and postOCR is available here: https://github.com/knowledge-computing/mapkurator-system 5 | 6 | 7 | ## Installation 8 | ### 1. Installing Docker 9 | If the machine doesn't have Docker installed, you can follow instructions (for e.g., Ubuntu) here: https://docs.docker.com/engine/install/ubuntu/ 10 | 11 | In particular, here are the commands I ran to install Docker on Azure VM: 12 | ```shell 13 | # 1. Install prerequisites 14 | sudo apt-get update 15 | 16 | sudo apt-get install -y \ 17 | apt-transport-https \ 18 | ca-certificates \ 19 | curl \ 20 | gnupg \ 21 | lsb-release 22 | 23 | 24 | # 2. Add Docker’s official GPG key: 25 | curl -fsSL https://download.docker.com/linux/ubuntu/gpg | sudo gpg --dearmor -o /usr/share/keyrings/docker-archive-keyring.gpg 26 | 27 | # 3. Set up repo 28 | echo \ 29 | "deb [arch=amd64 signed-by=/usr/share/keyrings/docker-archive-keyring.gpg] https://download.docker.com/linux/ubuntu \ 30 | $(lsb_release -cs) stable" | sudo tee /etc/apt/sources.list.d/docker.list > /dev/null 31 | 32 | # 4. Install Docker 33 | sudo apt-get update 34 | sudo apt-get install -y docker-ce docker-ce-cli containerd.io 35 | 36 | # 5. Verify that everything works 37 | sudo docker run hello-world 38 | 39 | # 6. Add mrm user to docker's group to allow running without sudo 40 | usermod -a -G docker mrm 41 | ``` 42 | 43 | ### 2. Download map-kurator 44 | 45 | 1. Clone this repository: 46 | ``` 47 | git clone https://github.com/machines-reading-maps/map-kurator.git 48 | ``` 49 | 2. `cd map-kurator/` 50 | 51 | 3. Build docker image, if you haven't already. 52 | ```shell 53 | docker build -t map-kurator . 54 | ``` 55 | This command should build the image from `Dockerfile` file in the current directory (`.`) and name the image `map-kurator` 56 | 57 | 4. **IMPORTANT** make sure the file with the model weights is available: 58 | ```shell 59 | ls -lah data/l_weights/finetune_map_model_map_w1e50_bsize8_w1_spe200_ep50.hdf5 60 | #> -rwxrwxr-x 1 danf danf 183M Jul 5 18:48 data/l_weights/finetune_map_model_map_w1e50_bsize8_w1_spe200_ep50.hdf5 61 | ``` 62 | This file is over the size limit to be stored on github, hence you need to download it from [here](https://drive.google.com/file/d/1PW_wPZO54Cr5wPk44Uf8g5_gEN7UGReA/view?usp=sharing) and put it under `data/l_weights` folder. 63 | 64 | If you are trying to run map-kurator locally and you have access to the Turing VM (and the VM is running), you can download it to your machine: 65 | ```shell 66 | scp {USER}@{VM_HOST}:~/finetune_map_model_map_w1e50_bsize8_w1_spe200_ep50.hdf5 data/l_weights/finetune_map_model_map_w1e50_bsize8_w1_spe200_ep50.hdf5 67 | 68 | ``` 69 | 70 | ## Usage 71 | 72 | ### Input 73 | 74 | #### WMTS 75 | 76 | ```shell 77 | docker run -it -v $(pwd)/data/:/map-kurator/data -v $(pwd)/model:/map-kurator/model --rm --workdir=/map-kurator map-kurator python model/predict_annotations.py wmts --url='https://wmts.maptiler.com/aHR0cDovL3dtdHMubWFwdGlsZXIuY29tL2FIUjBjSE02THk5dFlYQnpaWEpwWlhNdGRHbHNaWE5sZEhNdWN6TXVZVzFoZW05dVlYZHpMbU52YlM4eU5WOXBibU5vTDNsdmNtdHphR2x5WlM5dFpYUmhaR0YwWVM1cWMyOXUvanNvbg/wmts' --boundary='{"type":"Feature","properties":{},"geometry":{"type":"Polygon","coordinates":[[[-1.1248,53.9711],[-1.0592,53.9711],[-1.0592,53.9569],[-1.1248,53.9569],[-1.1248,53.9711]]]}}' --zoom=16 --dst=data/test_imgs/sample_output/ --filename=sample_filename 78 | ``` 79 | 80 | For WMTS, you can also choose to return the predicted polygons in the EPSG4326 coordinate system (lat, lng) by adding `--coord epsg4326` at the end of the above command. 81 | 82 | #### IIIF 83 | 84 | ```shell 85 | docker run -it -v $(pwd)/data/:/map-kurator/data -v $(pwd)/model:/map-kurator/model --rm --workdir=/map-kurator map-kurator python model/predict_annotations.py iiif --url='https://map-view.nls.uk/iiif/2/12563%2F125635459/info.json' --dst=data/test_imgs/sample_output/ --filename=sample_filename 86 | ``` 87 | 88 | #### Regular File 89 | ```shell 90 | docker run -it -v $(pwd)/data/:/map-kurator/data -v $(pwd)/model:/map-kurator/model --rm --workdir=/map-kurator map-kurator python model/predict_annotations.py file --src={PATH_TO_INPUT_FILE} --dst=data/test_imgs/sample_output/ --filename=sample_filename 91 | ``` 92 | 93 | ### Output 94 | 95 | Assuming output directory is `--dst=$OUT_DIR` and (optional) `--filename=my_filename`, if either of the above commands ran successfully, `$OUT_DIR` will have the following files: 96 | 97 | - `my_filename_stitched.jpg`: image that was passed to the model 98 | 99 | - `my_filename_predictions.jpg`: text regions detected by the model 100 | 101 | - `my_filename_annotations.json`: detected text region outlines represented as polygons (using [Web Annotation](https://www.w3.org/TR/annotation-model/) format) 102 | 103 | If `--filename` is not provided, it will be generated automatically as a unique `uuid4()` 104 | -------------------------------------------------------------------------------- /model/loss.py: -------------------------------------------------------------------------------- 1 | from keras import backend as K 2 | 3 | # https://gist.github.com/wassname/ce364fddfc8a025bfab4348cf5de852d 4 | def weighted_categorical_crossentropy(weights): 5 | """ 6 | A weighted version of keras.objectives.categorical_crossentropy 7 | 8 | Variables: 9 | weights: numpy array of shape (C,) where C is the number of classes 10 | 11 | Usage: 12 | weights = np.array([0.5,2,10]) # Class one at 0.5, class 2 twice the normal weights, class 3 10x. 13 | loss = weighted_categorical_crossentropy(weights) 14 | model.compile(loss=loss,optimizer='adam') 15 | """ 16 | 17 | weights = K.variable(weights) 18 | 19 | def loss(y_true, y_pred): 20 | # scale predictions so that the class probas of each sample sum to 1 21 | y_pred /= K.sum(y_pred, axis=-1, keepdims=True) 22 | # clip to prevent NaN's and Inf's 23 | y_pred = K.clip(y_pred, K.epsilon(), 1 - K.epsilon()) 24 | # calc 25 | loss = y_true * K.log(y_pred) * weights 26 | loss = -K.sum(loss, -1) 27 | return loss 28 | 29 | return loss 30 | 31 | def mean_squared_error_mask(y_true, y_pred): 32 | y_mask = y_true[:,:,:,0] #bsize, h, w, 5(m,x,y,sin,cos) 33 | y_mask = K.expand_dims(y_mask, axis = -1) 34 | #print y_true.shape, y_pred.shape 35 | y_true = y_true[:,:,:,1:] 36 | #y_pred = y_pred[:,:,:,1:] 37 | return K.sum(K.square((y_pred - y_true)*y_mask), axis= -1) # mse at each pixel location 38 | 39 | def mean_absolute_error_mask(y_true, y_pred): 40 | y_mask = y_true[:,:,:,0] #bsize, h, w, 5(m,x,y,sin,cos) 41 | y_mask = K.expand_dims(y_mask, axis = -1) 42 | #print y_true.shape, y_pred.shape 43 | y_true = y_true[:,:,:,1:] 44 | 45 | return K.sum(K.abs((y_pred - y_true)*y_mask), axis=-1) 46 | 47 | 48 | def mean_absolute_percentage_error_mask(y_true, y_pred): 49 | y_mask = y_true[:,:,:,0] #bsize, h, w, 5(m,x,y,sin,cos) 50 | y_mask = K.expand_dims(y_mask, axis = -1) 51 | #print y_true.shape, y_pred.shape 52 | y_true = y_true[:,:,:,1:] 53 | 54 | diff = K.abs(((y_true - y_pred))*y_mask / K.clip(K.abs(y_true * y_mask), 55 | K.epsilon(), 56 | None)) 57 | return 100. * K.sum(diff, axis=-1) 58 | -------------------------------------------------------------------------------- /model/wmts_handler.py: -------------------------------------------------------------------------------- 1 | import requests 2 | import xmltodict 3 | import json 4 | import shapely.geometry 5 | import math 6 | import cv2 7 | import numpy as np 8 | import os 9 | 10 | 11 | class WMTSHandler: 12 | def __init__(self, url, bounds, zoom, output_dir, img_filename): 13 | self.url = url 14 | self.tile_info = {} 15 | self.bounds = json.loads(bounds) 16 | self.zoom = zoom 17 | self.output_dir = output_dir 18 | self.img_filename = img_filename 19 | 20 | def _tile_idxs_in_poly(self, poly: shapely.geometry.Polygon): 21 | min_lon, min_lat, max_lon, max_lat = poly.bounds 22 | (min_x, max_y), (max_x, min_y) = self._latlon2tile(min_lat, min_lon), self._latlon2tile(max_lat, max_lon) 23 | 24 | tile_idxs = [] 25 | 26 | for x in range(int(min_x), int(max_x) + 1): 27 | for y in range(int(min_y), int(max_y) + 1): 28 | nw_pt = self._tile2latlon(x, y)[::-1] # poly is defined in geojson form 29 | ne_pt = self._tile2latlon(x + 1, y)[::-1] # poly is defined in geojson form 30 | sw_pt = self._tile2latlon(x, y + 1)[::-1] # poly is defined in geojson form 31 | se_pt = self._tile2latlon(x + 1, y + 1)[::-1] # poly is defined in geojson form 32 | 33 | bbox = shapely.geometry.Polygon([nw_pt, ne_pt, sw_pt, se_pt]) 34 | 35 | # print(f"{x}-{y}; {nw_pt} {ne_pt} {sw_pt} {se_pt}") 36 | # if any(map(lambda pt: shapely.geometry.Point(pt).within(poly), (nw_pt, ne_pt, sw_pt, se_pt))): 37 | if poly.intersects(bbox): 38 | tile_idxs.append((x, y)) 39 | 40 | return tile_idxs, int(max_x + 1) - int(min_x), int(max_y + 1) - int(min_y), int(min_x), int(min_y) 41 | 42 | def _generate_tile_info(self, tile_idxs, min_x, min_y, url_template): 43 | zoom_level = str(self.zoom) 44 | tile_info = { 45 | 'zoom_level': zoom_level, 46 | 'tile_idxs': {} 47 | } 48 | 49 | for (x, y) in tile_idxs: 50 | # tile_col = str(x) 51 | # tile_row = str(y) 52 | 53 | url = url_template.replace('{TileMatrix}', zoom_level).replace('{TileCol}', str(x)).replace('{TileRow}', 54 | str(y)) 55 | tile_info['tile_idxs'][(x - min_x, y - min_y)] = {'url': url} 56 | 57 | return tile_info 58 | 59 | def process_wmts(self): 60 | # print(args) 61 | # zoom_level = 18 # ~45min to download and predict; similar results to zoom=16; stitched png ~100Mb 62 | # zoom_level = 16 # ~2 min to download to predict; decent results; stitched png ~7Mb 63 | # zoom_level = 14 # too small for the model to detect text 64 | 65 | r = requests.get(self.url) 66 | # print(r.status_code) 67 | # print(str(r.headers)) 68 | # print(json.dumps(xmltodict.parse(r.content))) 69 | response_dict = xmltodict.parse(r.content) 70 | wmts_capabilities = response_dict['Capabilities'] 71 | # print(list(wmts_capabilities.keys())) 72 | url_template = wmts_capabilities['Contents']['Layer']['ResourceURL']['@template'] 73 | 74 | poly = shapely.geometry.shape(self.bounds['geometry']) 75 | 76 | tile_idxs, num_tiles_w, num_tiles_h, min_x, min_y = self._tile_idxs_in_poly(poly) 77 | 78 | # print(f"num_tiles: {len(tile_idxs)}") 79 | tile_info = self._generate_tile_info(tile_idxs, min_x, min_y, url_template) 80 | tile_info['num_tiles_w'] = num_tiles_w 81 | tile_info['num_tiles_h'] = num_tiles_h 82 | tile_info['min_x'] = min_x 83 | tile_info['min_y'] = min_y 84 | 85 | tile_info = self._download_tiles(tile_info) 86 | 87 | map_path = self._generate_img(tile_info) 88 | 89 | # update self.tile_info 90 | self.tile_info = tile_info 91 | 92 | return map_path 93 | 94 | def _download_tiles(self, tile_info): 95 | 96 | for tile_idx in list(tile_info['tile_idxs'].keys()): 97 | url = tile_info['tile_idxs'][tile_idx]['url'] 98 | 99 | print(f"downloading for key {str(tile_idx)} - {url}") 100 | 101 | resp = requests.get(url) 102 | img = np.asarray(bytearray(resp.content), dtype=np.uint8) 103 | img = cv2.imdecode(img, cv2.IMREAD_COLOR) 104 | 105 | tile_info['tile_idxs'][tile_idx]['img'] = img 106 | 107 | # return the images 108 | return tile_info 109 | 110 | def _generate_img(self, tile_info): 111 | num_tiles_w = tile_info['num_tiles_w'] 112 | num_tiles_h = tile_info['num_tiles_h'] 113 | 114 | shift_size = 256 115 | 116 | enlarged_width = int(shift_size * num_tiles_w) 117 | enlarged_height = int(shift_size * num_tiles_h) 118 | 119 | # paste the original map to the enlarged map 120 | enlarged_map = np.zeros((enlarged_height, enlarged_width, 3)).astype(np.uint8) 121 | 122 | # process tile by tile 123 | for idx in range(0, max(1,num_tiles_w)): 124 | # paste the predicted probabilty maps to the output image 125 | for jdx in range(0, max(1,num_tiles_h)): 126 | img = tile_info['tile_idxs'][(idx, jdx)]['img'] 127 | enlarged_map[jdx * shift_size:(jdx + 1) * shift_size, idx * shift_size:(idx + 1) * shift_size, :] = img 128 | 129 | map_path = os.path.join(self.output_dir, self.img_filename) 130 | 131 | cv2.imwrite(map_path, enlarged_map) 132 | return map_path 133 | 134 | def _stitch_tiles(self): 135 | # needs input path with (cached) image tiles 136 | # needs output path 137 | return True 138 | 139 | # from OSM Slippy Tile definitions & https://github.com/Caged/tile-stitch 140 | def _latlon2tile(self, lat, lon): 141 | lat_radians = lat * math.pi / 180.0 142 | n = 1 << self.zoom 143 | return ( 144 | n * ((lon + 180.0) / 360.0), 145 | n * (1 - (math.log(math.tan(lat_radians) + 1 / math.cos(lat_radians)) / math.pi)) / 2.0 146 | ) 147 | 148 | # from OSM Slippy Tile definitions & https://github.com/Caged/tile-stitch 149 | def _tile2latlon(self, x, y): 150 | n = 1 << self.zoom 151 | lat_radians = math.atan(math.sinh(math.pi * (1.0 - 2.0 * y / n))) 152 | lat = lat_radians * 180 / math.pi 153 | lon = 360 * x / n - 180.0 154 | return (lat, lon) 155 | 156 | def _tile2latlon_list(self, x_list, y_list): 157 | n = 1 << self.zoom 158 | x_list, y_list = np.array(x_list), np.array(y_list) 159 | lat_radians_list = np.arctan(np.sinh(np.pi * (1.0 - 2.0 * y_list / n))) 160 | lat_list = lat_radians_list * 180 / math.pi 161 | lon_list = 360 * x_list / n - 180.0 162 | return (lat_list, lon_list) 163 | -------------------------------------------------------------------------------- /model/iiif_handler.py: -------------------------------------------------------------------------------- 1 | import requests 2 | import cv2 3 | import numpy as np 4 | import json 5 | import pprint as pp 6 | import math 7 | from urllib.parse import urlparse, unquote 8 | import os 9 | import uuid 10 | 11 | 12 | class IIIFHandler: 13 | def __init__(self, manifest_url, output_dir, img_filename): 14 | self.tile_info = {'tile_idxs': {}, 'num_tiles_w': 0, 'num_tiles_h': 0} 15 | # self.url = "https://map-view.nls.uk/iiif/2/12563%2F125635459/12288,8192,4096,3242/512,/0/default.jpg" 16 | # self.url = "https://map-view.nls.uk/iiif/2/12563%2F125635459/full/max/0/default.jpg" 17 | self.tile_width = None 18 | self.tile_height = None 19 | self.img_width = None 20 | self.img_height = None 21 | self.url_prefix = None 22 | # self.manifest_url = "https://map-view.nls.uk/iiif/2/12563%2F125635459/info.json" 23 | self.manifest_url = manifest_url 24 | self.output_dir = output_dir 25 | self.rotation = 0 26 | self.tile_size = "full" 27 | self.quality = "default" 28 | self.img_format = "jpg" 29 | self.img_filename = img_filename 30 | 31 | def process_url(self): 32 | r = requests.get(self.manifest_url) 33 | # print(r.status_code) 34 | # print(str(r.headers)) 35 | 36 | response_dict = r.json() 37 | print(json.dumps(response_dict, indent=2)) 38 | 39 | self.url_prefix = response_dict['@id'] 40 | # self.img_filename = unquote(urlparse(self.url_prefix).path).split("/")[-1] 41 | 42 | self.img_width = response_dict['width'] 43 | self.img_height = response_dict['height'] 44 | 45 | 46 | if response_dict['profile'] is not None: 47 | profile_list = response_dict['profile'] 48 | if type(profile_list) == list and len(profile_list) > 1: 49 | profile_info = profile_list[1] 50 | if 'qualities' in profile_info: 51 | if 'native' in profile_info['qualities']: 52 | self.quality = 'native' 53 | print('set to native') 54 | 55 | if response_dict['tiles'] is not None: 56 | #assert response_dict['tiles'][0]['width'] == response_dict['tiles'][0]['height'] 57 | #tile_size = response_dict['tiles'][0]['width'] 58 | #self.tile_size = str(tile_size) + ',' 59 | 60 | tile_info = response_dict['tiles'][0] 61 | self.tile_width = tile_info['width'] 62 | # hack for sanborn maps 63 | if 'height' in tile_info: 64 | self.tile_height = tile_info['height'] 65 | else: 66 | self.tile_height = tile_info['width'] 67 | 68 | 69 | assert self.tile_height == self.tile_width 70 | 71 | # hack for david rumsey maps 72 | try: 73 | # probe once to decide the url format 74 | probe_bbox_str = ",".join([str(0), str(0), str(self.tile_width), str(self.tile_height)]) 75 | probe_url = self.url_prefix + f"/{probe_bbox_str}/{self.tile_size}/{self.rotation}/{self.quality}.{self.img_format}" 76 | probe_resp = requests.get(probe_url) 77 | probe_img = np.asarray(bytearray(probe_resp.content), dtype=np.uint8) 78 | _,_,_ = prob_img.shape # DO NOT delete this line. This line would cause an error and trigger the execption branch if url format is incorrect 79 | except: 80 | 81 | self.tile_size = str(self.tile_height) + ',' 82 | 83 | 84 | self._generate_tile_info() 85 | # pp.pprint(self.tile_info) 86 | self._download_tiles() 87 | map_path = self._generate_img() 88 | return map_path 89 | 90 | 91 | 92 | # generate a list of unique urls for each tile to download the entire image in pieces 93 | # https://iiif.io/api/image/2.1/#appendices 94 | def _generate_tile_info(self): 95 | row_idx = 0 96 | col_idx = 0 97 | 98 | max_col_idx = math.ceil(self.img_width / self.tile_width) 99 | max_row_idx = math.ceil(self.img_height / self.tile_height) 100 | 101 | current_region_x = col_idx * self.tile_width 102 | current_region_w = self.tile_width 103 | current_region_y = row_idx * self.tile_height 104 | current_region_h = self.tile_height 105 | 106 | while col_idx < max_col_idx: 107 | row_idx = 0 # always start outer loop from new row 108 | current_region_x = col_idx * self.tile_width 109 | current_region_w = self.tile_width 110 | if current_region_x + current_region_w > self.img_width: 111 | current_region_w = self.img_width - current_region_x 112 | 113 | while row_idx < max_row_idx: 114 | current_region_y = row_idx * self.tile_height 115 | current_region_h = self.tile_height 116 | 117 | if current_region_y + current_region_h > self.img_height: 118 | current_region_h = self.img_height - current_region_y 119 | 120 | url = self._generate_url(current_region_x, current_region_y, current_region_w, current_region_h) 121 | self.tile_info['tile_idxs'][(col_idx, row_idx)] = {'url': url} 122 | 123 | row_idx += 1 124 | 125 | col_idx += 1 126 | 127 | url = self._generate_url(current_region_x, current_region_y, current_region_w, current_region_h) 128 | self.tile_info['tile_idxs'][(col_idx, row_idx)] = {'url': url} 129 | 130 | self.tile_info['num_tiles_w'] = max_col_idx 131 | self.tile_info['num_tiles_h'] = max_row_idx 132 | 133 | def _download_tiles(self): 134 | 135 | for tile_idx in list(self.tile_info['tile_idxs'].keys()): 136 | url = self.tile_info['tile_idxs'][tile_idx]['url'] 137 | 138 | print(f"downloading for key {str(tile_idx)} - {url}") 139 | 140 | resp = requests.get(url) 141 | #print(url) 142 | img = np.asarray(bytearray(resp.content), dtype=np.uint8) 143 | 144 | if img.shape[0] == 0: # empty image 145 | continue 146 | 147 | try: 148 | img = cv2.imdecode(img, cv2.IMREAD_COLOR) 149 | img_height, img_width, img_depth = img.shape 150 | print(img.shape) 151 | 152 | except: 153 | print('Tile might be empty, skipped', url) 154 | #exit(-1) 155 | 156 | try: 157 | # Pad width and height to multiples of self.tile_width and self.tile_height 158 | d_height = self.tile_height - img_height 159 | d_width = self.tile_width - img_width 160 | top = 0 161 | bottom = d_height 162 | left = 0 163 | right = d_width 164 | 165 | img = cv2.copyMakeBorder(img.copy(), top, bottom, left, right, cv2.BORDER_CONSTANT, value=[0, 0, 0]) 166 | 167 | self.tile_info['tile_idxs'][tile_idx]['img'] = img 168 | except: 169 | print('Error making border, skipped', url) 170 | 171 | 172 | def _generate_img(self): 173 | num_tiles_w = self.tile_info['num_tiles_w'] 174 | num_tiles_h = self.tile_info['num_tiles_h'] 175 | 176 | enlarged_width = int(self.tile_width * num_tiles_w) 177 | enlarged_height = int(self.tile_height * num_tiles_h) 178 | print(f"ntw, nth: {num_tiles_h}, {num_tiles_w}") 179 | print(f"ew, eh: {enlarged_width}, {enlarged_height}") 180 | 181 | # print("BLAGALHAGLAHGA:") 182 | # print(f"{width}-{num_tiles_w}, {height}-{num_tiles_h}, {enlarged_width}, {enlarged_height}") 183 | # paste the original map to the enlarged map 184 | enlarged_map = np.zeros((enlarged_height, enlarged_width, 3)).astype(np.uint8) 185 | 186 | # process tile by tile 187 | for idx in range(0, num_tiles_w): 188 | # paste the predicted probabilty maps to the output image 189 | for jdx in range(0, num_tiles_h): 190 | if 'img' not in self.tile_info['tile_idxs'][(idx, jdx)]: 191 | continue 192 | 193 | img = self.tile_info['tile_idxs'][(idx, jdx)]['img'] 194 | 195 | # print(f"img shape for ({idx}, {jdx}) - {img.shape}") 196 | enlarged_map[jdx * self.tile_width:(jdx + 1) * self.tile_width, idx * self.tile_height:(idx + 1) * self.tile_height, :] = img 197 | 198 | map_path = os.path.join(self.output_dir, self.img_filename) 199 | cv2.imwrite(map_path, enlarged_map) 200 | 201 | return map_path 202 | 203 | def _generate_url(self, x, y, w, h): 204 | 205 | bbox_str = ",".join([str(x), str(y), str(w), str(h)]) 206 | return_url = self.url_prefix + f"/{bbox_str}/{self.tile_size}/{self.rotation}/{self.quality}.{self.img_format}" 207 | #print(return_url) 208 | return return_url 209 | 210 | -------------------------------------------------------------------------------- /model/save_localheight_original_txt_fastzk.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import glob 3 | import json 4 | 5 | import cv2 6 | import math 7 | import numpy as np 8 | import os 9 | 10 | os.environ['KERAS_BACKEND'] = 'tensorflow' 11 | import sys 12 | import tensorflow as tf 13 | 14 | print(tf.__file__) 15 | print(tf.__version__) 16 | 17 | # gpus = tf.config.list_physical_devices('GPU') 18 | # if gpus: 19 | # # Restrict TensorFlow to only allocate 1GB of memory on the first GPU 20 | # try: 21 | # tf.config.experimental.set_virtual_device_configuration( 22 | # gpus[0], 23 | # [tf.config.experimental.VirtualDeviceConfiguration(memory_limit=4096)]) 24 | # logical_gpus = tf.config.experimental.list_logical_devices('GPU') 25 | # print(len(gpus), "Physical GPUs,", len(logical_gpus), "Logical GPUs") 26 | # except RuntimeError as e: 27 | # # Virtual devices must be set before GPUs have been initialized 28 | # print(e) 29 | 30 | 31 | os.environ['CUDA_VISIBLE_DEVICES'] = "" 32 | import keras 33 | from keras.models import load_model 34 | from keras.models import Sequential 35 | from keras.layers import Dense, Dropout, Flatten, Activation 36 | from keras.layers import Conv2D, MaxPooling2D 37 | from keras import backend as K 38 | from keras.callbacks import Callback 39 | from keras.layers import Lambda, Input, Dense, Concatenate, Conv2DTranspose 40 | from keras.layers import LeakyReLU, BatchNormalization, AveragePooling2D, Reshape 41 | from keras.layers import UpSampling2D, ZeroPadding2D 42 | from keras.losses import mse, binary_crossentropy 43 | from keras.models import Model 44 | from keras.layers import Lambda, TimeDistributed 45 | from keras import layers 46 | 47 | import numpy as np 48 | import cv2 49 | import argparse 50 | import glob 51 | 52 | 53 | from loss import weighted_categorical_crossentropy, mean_squared_error_mask 54 | from loss import mean_absolute_error_mask, mean_absolute_percentage_error_mask 55 | from mymodel import model_U_VGG_Centerline_Localheight 56 | 57 | 58 | map_images = glob.glob('./data/test_imgs/sample_input/101201496_h10w3.jpg') 59 | # map_images = glob.glob('./data/*.png') 60 | 61 | # print(globals()['map_images']) 62 | 63 | print("-----") 64 | print(locals()['map_images']) 65 | 66 | output_path = './data/test_imgs/sample_output/' 67 | 68 | saved_weights = './data/l_weights/finetune_map_model_map_w1e50_bsize8_w1_spe200_ep50.hdf5' 69 | model = model_U_VGG_Centerline_Localheight() 70 | model.load_weights(saved_weights) 71 | 72 | if not os.path.isdir(output_path): 73 | os.makedirs(output_path) 74 | 75 | 76 | shift_size = 512 77 | 78 | for map_path in map_images: 79 | 80 | base_name = os.path.basename(map_path) 81 | 82 | txt_name = output_path + base_name[0:len(base_name) - 4] + '.txt' 83 | 84 | f = open(txt_name, 'w+') 85 | 86 | print(map_path) 87 | 88 | map_img = cv2.imread(map_path) 89 | 90 | width = map_img.shape[1] # dimension2 91 | height = map_img.shape[0] # dimension1 92 | 93 | in_map_img = map_img / 255. 94 | 95 | # pad the image to the size divisible by shift-size 96 | num_tiles_w = int(np.ceil(1. * width/shift_size)) 97 | num_tiles_h = int(np.ceil(1. * height/shift_size)) 98 | enlarged_width = int(shift_size * num_tiles_w) 99 | enlarged_height = int(shift_size * num_tiles_h) 100 | print("BLAGALHAGLAHGA:") 101 | print(f"{width}-{num_tiles_w}, {height}-{num_tiles_h}, {enlarged_width}, {enlarged_height}") 102 | # paste the original map to the enlarged map 103 | enlarged_map = np.zeros((enlarged_height, enlarged_width, 3)).astype(np.float32) 104 | enlarged_map[0:height, 0:width, :] = in_map_img 105 | 106 | # define the output probability maps 107 | localheight_map_o = np.zeros((enlarged_height, enlarged_width, 1), np.float32) 108 | center_map_o = np.zeros((enlarged_height, enlarged_width, 2), np.float32) 109 | prob_map_o = np.zeros((enlarged_height, enlarged_width, 3), np.float32) 110 | 111 | # process tile by tile 112 | for idx in range(0, num_tiles_h): 113 | # pack several tiles in a batch and feed the batch to the model 114 | test_batch = [] 115 | for jdx in range(0, num_tiles_w): 116 | img_clip = enlarged_map[idx*shift_size:(idx+1)*shift_size, jdx*shift_size:(jdx+1)*shift_size, :] 117 | test_batch.append(img_clip) 118 | test_batch = np.array(test_batch).astype(np.float32) 119 | 120 | # use the pretrained model to predict 121 | batch_out = model.predict(test_batch) 122 | 123 | # get predictions 124 | prob_map_batch = batch_out[0] 125 | center_map_batch = batch_out[1] 126 | localheight_map_batch = batch_out[2] 127 | 128 | # paste the predicted probabilty maps to the output image 129 | for jdx in range(0, num_tiles_w): 130 | localheight_map_o[idx*shift_size:(idx+1)*shift_size, jdx*shift_size:(jdx+1)*shift_size, :] = localheight_map_batch[jdx] 131 | center_map_o[idx*shift_size:(idx+1)*shift_size, jdx*shift_size:(jdx+1)*shift_size, :] = center_map_batch[jdx] 132 | prob_map_o[idx*shift_size:(idx+1)*shift_size, jdx*shift_size:(jdx+1)*shift_size, :] = prob_map_batch[jdx] 133 | 134 | 135 | # convert from 0-1? to 0-255 range 136 | prob_map_o = (prob_map_o * 255).astype(np.uint8) 137 | center_map_o = (center_map_o[:, :, 1] * 255).astype(np.uint8) 138 | #localheight_map = (localheight_map_o * 255).astype(np.uint8) 139 | 140 | prob_map_o = prob_map_o[0:height, 0:width, :] 141 | center_map_o = center_map_o[0:height, 0:width] 142 | localheight_map_o = localheight_map_o[0:height, 0:width, :] 143 | 144 | 145 | 146 | num_c, connected_map = cv2.connectedComponents(center_map_o) 147 | print('num_c:', num_c) 148 | 149 | # process component by component 150 | for cur_cc_idx in range(1, num_c): # index_0 is the background 151 | 152 | if cur_cc_idx % 100 == 0: 153 | print('processed', str(cur_cc_idx)) 154 | 155 | centerline_indices = np.where(connected_map == cur_cc_idx) 156 | 157 | centerPoints=[] 158 | for i, j in zip(centerline_indices[0], centerline_indices[1]): 159 | if localheight_map_o[i, j, 0] > 0: 160 | centerPoints.append([i, j]) 161 | 162 | if len(centerPoints) == 0: 163 | continue 164 | 165 | mini, minj = np.min(centerPoints, axis=0) 166 | maxi, maxj = np.max(centerPoints, axis=0) 167 | 168 | localheight_result_o = np.zeros((maxi-mini+100, maxj-minj+100, 3), np.uint8) 169 | 170 | for i, j in centerPoints: 171 | cv2.circle(localheight_result_o, (j-minj+50, i-mini+50), int(localheight_map_o[i][j]*0.4), (0, 0, 255), -1) 172 | 173 | img_gray = cv2.cvtColor(localheight_result_o, cv2.COLOR_BGR2GRAY) 174 | 175 | contours, hierarchy = cv2.findContours(img_gray, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) 176 | 177 | new_context = '' 178 | 179 | if len(contours) == 0: 180 | continue 181 | 182 | for i in range(0, len(contours[0])): 183 | if i < len(contours[0]) - 1: 184 | new_context = new_context + str(contours[0][i][0][0].item()+minj-50) + ',' + str(contours[0][i][0][1].item()+mini-50) + ',' 185 | else: 186 | new_context = new_context + str(contours[0][i][0][0].item()+minj-50) + ',' + str(contours[0][i][0][1].item()+mini-50) 187 | 188 | new_context = new_context + '\n' 189 | 190 | f.writelines(new_context) 191 | 192 | cv2.imwrite(output_path+'prob_' + base_name[0:len(base_name) - 4] + '.jpg', prob_map_o) 193 | cv2.imwrite(output_path+'cent_' + base_name[0:len(base_name) - 4] + '.jpg', center_map_o) 194 | cv2.imwrite(output_path+'localheight_map_' + base_name[0:len(base_name) - 4] + '.jpg', localheight_map_o) 195 | 196 | f.close() 197 | 198 | 199 | #txt parse 200 | with open(txt_name, 'r') as f: 201 | data = f.readlines() 202 | 203 | polyList = [] 204 | 205 | for line in data: 206 | polyStr = line.split(',') 207 | poly = [] 208 | for i in range(0, len(polyStr)): 209 | if i % 2 == 0: 210 | poly.append([int(polyStr[i]), int(polyStr[i+1])]) 211 | 212 | polyList.append(poly) 213 | 214 | 215 | for i in range(0,len(polyList)): 216 | polyPoints = np.array([polyList[i]], dtype=np.int32) 217 | cv2.polylines(map_img, polyPoints, True, (0, 0, 255), 3) 218 | 219 | 220 | cv2.imwrite(output_path+'parse_result_'+base_name[0:len(base_name) - 4] + '.jpg',map_img) 221 | 222 | 223 | # Generate web annotations: https://www.w3.org/TR/annotation-model/ 224 | annotations = [] 225 | for polygon in polyList: 226 | svg_polygon_coords = ' '.join([f"{x},{y}" for x, y in polygon]) 227 | annotation = { 228 | "@context": "http://www.w3.org/ns/anno.jsonld", 229 | "id": "", 230 | "body": [{ 231 | "type": "TextualBody", 232 | "purpose": "tagging", 233 | "value": "null" 234 | }], 235 | "target": { 236 | "selector": [{ 237 | "type": "SvgSelector", 238 | "value": f"" 239 | }] 240 | } 241 | } 242 | annotations.append(annotation) 243 | 244 | with open(output_path+'web_annotations'+base_name[0:len(base_name) - 4] + '.json', 'w') as f: 245 | f.write(json.dumps(annotations, indent=2)) 246 | # print(f"{polyList}") 247 | 248 | print('done processing') 249 | 250 | 251 | -------------------------------------------------------------------------------- /model/predict_annotations.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import os 3 | import uuid 4 | 5 | from wmts_handler import WMTSHandler 6 | from image_handler import ImageHandler 7 | from iiif_handler import IIIFHandler 8 | from mymodel import model_U_VGG_Centerline_Localheight 9 | 10 | import cv2 11 | import numpy as np 12 | import json 13 | from shapely.geometry import Polygon 14 | 15 | os.environ['KERAS_BACKEND'] = 'tensorflow' 16 | os.environ['CUDA_VISIBLE_DEVICES'] = "" 17 | 18 | import sys 19 | import tensorflow as tf 20 | 21 | import time 22 | 23 | print(tf.__file__) 24 | print(tf.__version__) 25 | 26 | # basically copy-pasted from the original implementation in save_localheight_original_txt_fastzk.py 27 | def run_model(map_id, map_path, output_dir): 28 | saved_weights = './data/l_weights/finetune_map_model_map_w1e50_bsize8_w1_spe200_ep50.hdf5' 29 | model = model_U_VGG_Centerline_Localheight() 30 | model.load_weights(saved_weights) 31 | 32 | map_img = cv2.imread(map_path) 33 | #print(map_path) 34 | shift_size = 512 35 | 36 | base_name = os.path.basename(map_path) 37 | 38 | width = map_img.shape[1] # dimension2 39 | height = map_img.shape[0] # dimension1 40 | 41 | in_map_img = map_img / 255. 42 | 43 | # pad the image to the size divisible by shift-size 44 | num_tiles_w = int(np.ceil(1. * width / shift_size)) 45 | num_tiles_h = int(np.ceil(1. * height / shift_size)) 46 | enlarged_width = int(shift_size * num_tiles_w) 47 | enlarged_height = int(shift_size * num_tiles_h) 48 | # print(f"{width}-{num_tiles_w}, {height}-{num_tiles_h}, {enlarged_width}, {enlarged_height}") 49 | # paste the original map to the enlarged map 50 | enlarged_map = np.zeros((enlarged_height, enlarged_width, 3)).astype(np.float32) 51 | enlarged_map[0:height, 0:width, :] = in_map_img 52 | 53 | # define the output probability maps 54 | localheight_map_o = np.zeros((enlarged_height, enlarged_width, 1), np.float32) 55 | center_map_o = np.zeros((enlarged_height, enlarged_width, 2), np.float32) 56 | prob_map_o = np.zeros((enlarged_height, enlarged_width, 3), np.float32) 57 | 58 | # process tile by tile 59 | for idx in range(0, num_tiles_h): 60 | # pack several tiles in a batch and feed the batch to the model 61 | test_batch = [] 62 | for jdx in range(0, num_tiles_w): 63 | img_clip = enlarged_map[idx * shift_size:(idx + 1) * shift_size, jdx * shift_size:(jdx + 1) * shift_size, :] 64 | test_batch.append(img_clip) 65 | test_batch = np.array(test_batch).astype(np.float32) 66 | 67 | # use the pretrained model to predict 68 | batch_out = model.predict(test_batch) 69 | 70 | # get predictions 71 | prob_map_batch = batch_out[0] 72 | center_map_batch = batch_out[1] 73 | localheight_map_batch = batch_out[2] 74 | 75 | # paste the predicted probabilty maps to the output image 76 | for jdx in range(0, num_tiles_w): 77 | localheight_map_o[idx * shift_size:(idx + 1) * shift_size, jdx * shift_size:(jdx + 1) * shift_size, :] = \ 78 | localheight_map_batch[jdx] 79 | center_map_o[idx * shift_size:(idx + 1) * shift_size, jdx * shift_size:(jdx + 1) * shift_size, :] = \ 80 | center_map_batch[jdx] 81 | prob_map_o[idx * shift_size:(idx + 1) * shift_size, jdx * shift_size:(jdx + 1) * shift_size, :] = \ 82 | prob_map_batch[jdx] 83 | 84 | # convert from 0-1? to 0-255 range 85 | prob_map_o = (prob_map_o * 255).astype(np.uint8) 86 | center_map_o = (center_map_o[:, :, 1] * 255).astype(np.uint8) 87 | # localheight_map = (localheight_map_o * 255).astype(np.uint8) 88 | 89 | prob_map_o = prob_map_o[0:height, 0:width, :] 90 | center_map_o = center_map_o[0:height, 0:width] 91 | localheight_map_o = localheight_map_o[0:height, 0:width, :] 92 | 93 | num_c, connected_map = cv2.connectedComponents(center_map_o) 94 | print('num_c:', num_c) 95 | 96 | poly_list = [] 97 | # process component by component 98 | for cur_cc_idx in range(1, num_c): # index_0 is the background 99 | 100 | if cur_cc_idx % 100 == 0: 101 | print('processed', str(cur_cc_idx)) 102 | 103 | centerline_indices = np.where(connected_map == cur_cc_idx) 104 | 105 | centerPoints = [] 106 | for i, j in zip(centerline_indices[0], centerline_indices[1]): 107 | if localheight_map_o[i, j, 0] > 0: 108 | centerPoints.append([i, j]) 109 | 110 | if len(centerPoints) == 0: 111 | continue 112 | 113 | mini, minj = np.min(centerPoints, axis=0) 114 | maxi, maxj = np.max(centerPoints, axis=0) 115 | 116 | localheight_result_o = np.zeros((maxi - mini + 100, maxj - minj + 100, 3), np.uint8) 117 | 118 | for i, j in centerPoints: 119 | cv2.circle(localheight_result_o, (j - minj + 50, i - mini + 50), int(localheight_map_o[i][j] * 0.5), 120 | (0, 0, 255), -1) 121 | 122 | img_gray = cv2.cvtColor(localheight_result_o, cv2.COLOR_BGR2GRAY) 123 | 124 | contours, hierarchy = cv2.findContours(img_gray, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) 125 | 126 | new_context = '' 127 | 128 | if len(contours) == 0: 129 | continue 130 | 131 | for i in range(0, len(contours[0])): 132 | if i < len(contours[0]) - 1: 133 | new_context = new_context + str(contours[0][i][0][0].item() + minj - 50) + ',' + str( 134 | contours[0][i][0][1].item() + mini - 50) + ',' 135 | else: 136 | new_context = new_context + str(contours[0][i][0][0].item() + minj - 50) + ',' + str( 137 | contours[0][i][0][1].item() + mini - 50) 138 | 139 | # new_context = new_context + '\n' 140 | poly_str = new_context.split(',') 141 | poly = [] 142 | for i in range(0, len(poly_str)): 143 | if i % 2 == 0: 144 | poly.append((int(poly_str[i]), int(poly_str[i + 1]))) 145 | 146 | try: 147 | simple_poly = Polygon(poly).simplify(tolerance = 5, preserve_topology=False).exterior.coords[:] # tolerance is a hyper-param. Larger tolerance leads to fewer points 148 | #poly_list.append(poly) 149 | poly_list.append(simple_poly) 150 | except: 151 | poly_list.append(poly) 152 | 153 | # cv2.imwrite(output_path + 'prob_' + base_name[0:len(base_name) - 4] + '.jpg', prob_map_o) 154 | # cv2.imwrite(output_path + 'cent_' + base_name[0:len(base_name) - 4] + '.jpg', center_map_o) 155 | # cv2.imwrite(output_path + 'localheight_map_' + base_name[0:len(base_name) - 4] + '.jpg', localheight_map_o) 156 | 157 | 158 | for i in range(0,len(poly_list)): 159 | poly_points = np.array([poly_list[i]], dtype=np.int32) 160 | cv2.polylines(map_img, poly_points, True, (0, 0, 255), 3) 161 | 162 | predictions_file = os.path.join(output_dir, map_id + '_predictions.jpg') 163 | cv2.imwrite(predictions_file, map_img) 164 | 165 | 166 | return poly_list 167 | 168 | def write_annotation(map_id, output_dir, poly_list, handler = None): 169 | 170 | 171 | if handler is not None: 172 | # perform this operation for WMTS tiles only 173 | # based on the tile info, convert from image coordinate system to EPSG:4326 174 | # assumes that the tilesize = 256x256 175 | 176 | tile_info = handler.tile_info 177 | 178 | min_tile_x = tile_info['min_x'] 179 | min_tile_y = tile_info['min_y'] 180 | 181 | latlon_poly_list = [] 182 | for polygon in poly_list: 183 | 184 | if np.array(polygon).shape[0] == 0: 185 | continue 186 | 187 | # process each polygon 188 | poly_x_list , poly_y_list = np.array(polygon)[:,0], np.array(polygon)[:,1] 189 | 190 | # get corresponding tile index in the current map, i.e. tile shift range from min_tile_x ,min_tile_y 191 | temp_tile_x_list, temp_tile_y_list = np.floor(poly_x_list/ 256.), np.floor(poly_y_list/256.) 192 | 193 | # compute the starting tile idx that the polygon point lies in 194 | tile_x_list, tile_y_list = min_tile_x + temp_tile_x_list , min_tile_y + temp_tile_y_list 195 | 196 | # get polygon point pixel location in its current tile 197 | remainder_x_list, remainder_y_list = poly_x_list/256. - temp_tile_x_list , poly_y_list/256. - temp_tile_y_list 198 | 199 | # final position in EPSG:3857? 200 | tile_x_list, tile_y_list = tile_x_list + remainder_x_list, tile_y_list + remainder_y_list 201 | 202 | # convert to EPSG:4326 203 | lat_list, lon_list = handler._tile2latlon_list(tile_x_list, tile_y_list) 204 | 205 | # x=long, y = lat. so need to flip 206 | #latlon_poly = [[x,y] for x,y in zip(lon_list, lat_list)] 207 | latlon_poly = [["{:.6f}".format(x),"{:.6f}".format(y)] for x,y in zip(lon_list, lat_list)] 208 | 209 | 210 | latlon_poly_list.append(latlon_poly) 211 | 212 | poly_list = latlon_poly_list 213 | # reassign latlon_poly_list to poly_list for consistency 214 | 215 | 216 | # Generate web annotations: https://www.w3.org/TR/annotation-model/ 217 | annotations = [] 218 | for polygon in poly_list: 219 | svg_polygon_coords = ' '.join([f"{x},{y}" for x, y in polygon]) 220 | annotation = { 221 | "@context": "http://www.w3.org/ns/anno.jsonld", 222 | "id": "", 223 | #"body": [{ 224 | # "type": "TextualBody", 225 | # "purpose": "tagging", 226 | # "value": "null" 227 | #}], 228 | "target": { 229 | "selector": [{ 230 | "type": "SvgSelector", 231 | "value": f"" 232 | }] 233 | } 234 | } 235 | annotations.append(annotation) 236 | 237 | annotation_file = os.path.join(output_dir, map_id + '_annotations.json') 238 | with open(annotation_file, 'w') as f: 239 | f.write(json.dumps(annotations, indent=2)) 240 | 241 | return annotation_file 242 | # print(f"{polyList}") 243 | 244 | 245 | if __name__ == "__main__": 246 | parser = argparse.ArgumentParser() 247 | 248 | arg_parser_common = argparse.ArgumentParser(add_help=False) 249 | arg_parser_common.add_argument('--dst', required=True, type=str, help='path to output annotations file') 250 | arg_parser_common.add_argument('--filename', required=False, type=str, help='output filename prefix') 251 | arg_parser_common.add_argument('--coord', default = 'img_coord', required=False, type=str, choices = ['img_coord' ,'epsg4326'], help='return annotation in image coord or EPSG:4326') 252 | 253 | # parser.add_argument("input_type", choices=["wmts", "iiif", "tiff", "jpeg", "png"]) 254 | subparsers = parser.add_subparsers(dest='subcommand') 255 | 256 | arg_parser_wmts = subparsers.add_parser('wmts', parents=[arg_parser_common], 257 | help='generate annotations for wmts input type') 258 | arg_parser_wmts.add_argument('--url', required=True, type=str, help='getCapabilities url') 259 | arg_parser_wmts.add_argument('--boundary', required=True, type=str, help='desired region boundary in GeoJSON') 260 | arg_parser_wmts.add_argument('--zoom', default=14, type=int, help='desired zoom level') 261 | 262 | arg_parser_iiif = subparsers.add_parser('iiif', parents=[arg_parser_common], 263 | help='generate annotations for iiif input type') 264 | arg_parser_iiif.add_argument('--url', required=True, type=str, help='IIIF manifest url') 265 | 266 | arg_parser_raw_input = subparsers.add_parser('file', parents=[arg_parser_common]) 267 | arg_parser_raw_input.add_argument('--src', required=True, type=str, help='path to input image') 268 | 269 | args = parser.parse_args() 270 | 271 | map_path = None 272 | output_dir = args.dst 273 | 274 | if args.filename is not None: 275 | img_id = args.filename 276 | else: 277 | img_id = str(uuid.uuid4()) 278 | 279 | if not os.path.isdir(output_dir): 280 | os.makedirs(output_dir) 281 | 282 | 283 | if args.coord == 'epsg4326': 284 | assert args.subcommand == 'wmts' 285 | 286 | 287 | if args.subcommand == 'wmts': 288 | ''' 289 | time docker run -it -v $(pwd)/data/:/map-kurator/data -v $(pwd)/model:/map-kurator/model --rm --runtime=nvidia --gpus all --workdir=/map-kurator map-kurator python model/predict_annotations.py wmts --url='https://wmts.maptiler.com/aHR0cDovL3dtdHMubWFwdGlsZXIuY29tL2FIUjBjSE02THk5dFlYQnpaWEpwWlhNdGRHbHNaWE5sZEhNdWN6TXVZVzFoZW05dVlYZHpMbU52YlM4eU5WOXBibU5vTDNsdmNtdHphR2x5WlM5dFpYUmhaR0YwWVM1cWMyOXUvanNvbg/wmts' --boundary='{"type":"Feature","properties":{},"geometry":{"type":"Polygon","coordinates":[[[-1.1248,53.9711],[-1.0592,53.9711],[-1.0592,53.9569],[-1.1248,53.9569],[-1.1248,53.9711]]]}}' --zoom=16 --dst=data/test_imgs/sample_output/ 290 | ''' 291 | 292 | wmts_handler = WMTSHandler(url=args.url, bounds=args.boundary, zoom=args.zoom, output_dir=output_dir, img_filename=img_id + '_stitched.jpg') 293 | map_path = wmts_handler.process_wmts() 294 | 295 | poly_list = run_model(img_id, map_path, output_dir) 296 | if args.coord == 'img_coord': 297 | annotation_file = write_annotation(img_id, output_dir, poly_list) 298 | else: 299 | annotation_file = write_annotation(img_id, output_dir, poly_list, handler = wmts_handler) 300 | 301 | if args.subcommand == 'iiif': 302 | ''' 303 | time docker run -it -v $(pwd)/data/:/map-kurator/data -v $(pwd)/model:/map-kurator/model --rm --runtime=nvidia --gpus all --workdir=/map-kurator map-kurator python model/predict_annotations.py iiif --url='https://map-view.nls.uk/iiif/2/12563%2F125635459/info.json' --dst=data/test_imgs/sample_output/ 304 | ''' 305 | start_download = time.time() 306 | iiif_handler = IIIFHandler(args.url, output_dir, img_filename=img_id + '_stitched.jpg') 307 | map_path = iiif_handler.process_url() 308 | 309 | end_download = time.time() 310 | 311 | poly_list = run_model(img_id, map_path, output_dir) 312 | annotation_file = write_annotation(img_id, output_dir, poly_list) 313 | 314 | end_detection = time.time() 315 | 316 | print('download time: ', end_download - start_download) 317 | print('detection time: ', end_detection - end_download) 318 | 319 | 320 | if args.subcommand == 'file': 321 | ''' 322 | time docker run -it -v $(pwd)/data/:/map-kurator/data -v $(pwd)/model:/map-kurator/model --rm --runtime=nvidia --gpus all --workdir=/map-kurator map-kurator python model/predict_annotations.py file --src=data/test_imgs/sample_input/101201496_h10w3.jpg --dst=data/test_imgs/sample_output/ 323 | ''' 324 | map_path = args.src 325 | 326 | poly_list = run_model(img_id, map_path, output_dir) 327 | annotation_file = write_annotation(img_id, output_dir, poly_list) 328 | 329 | 330 | 331 | 332 | print("done") 333 | print(annotation_file) 334 | -------------------------------------------------------------------------------- /model/mymodel.py: -------------------------------------------------------------------------------- 1 | import keras 2 | from keras.models import Sequential 3 | from keras.layers import Dense, Dropout, Flatten , Activation 4 | from keras.layers import Conv2D, MaxPooling2D 5 | from keras import backend as K 6 | from keras.callbacks import Callback 7 | from keras.layers import Lambda, Input, Dense, Concatenate ,Conv2DTranspose 8 | from keras.layers import LeakyReLU,BatchNormalization,AveragePooling2D,Reshape 9 | from keras.layers import UpSampling2D,ZeroPadding2D 10 | from keras.losses import mse, binary_crossentropy 11 | from keras.models import Model 12 | from keras.layers import Lambda,TimeDistributed 13 | from keras import layers 14 | 15 | def UNET(pretrained_weights = None,input_size = (256,256,3)): 16 | inputs = Input(input_size) 17 | conv1 = Conv2D(16, 3, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal', name = 'conv1-1')(inputs) 18 | conv1 = Conv2D(16, 3, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal', name = 'conv1-2')(conv1) 19 | pool1 = MaxPooling2D(pool_size=(2, 2))(conv1) 20 | 21 | conv2 = Conv2D(32, 3, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal', name = 'conv2-1')(pool1) 22 | conv2 = Conv2D(32, 3, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal', name = 'conv2-2')(conv2) 23 | pool2 = MaxPooling2D(pool_size=(2, 2))(conv2) 24 | 25 | conv3 = Conv2D(64, 3, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal', name = 'conv3-1')(pool2) 26 | conv3 = Conv2D(64, 3, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal', name = 'conv3-2')(conv3) 27 | pool3 = MaxPooling2D(pool_size=(2, 2))(conv3) 28 | 29 | conv4 = Conv2D(128, 3, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal', name = 'conv4-1')(pool3) 30 | conv4 = Conv2D(128, 3, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal', name = 'conv4-2')(conv4) 31 | drop4 = Dropout(0.5)(conv4) 32 | pool4 = MaxPooling2D(pool_size=(2, 2))(drop4) 33 | 34 | conv5 = Conv2D(256, 3, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal', name = 'conv5-1')(pool4) 35 | conv5 = Conv2D(256, 3, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal', name = 'conv5-2')(conv5) 36 | drop5 = Dropout(0.5)(conv5) 37 | pool5 = MaxPooling2D(pool_size=(2, 2))(drop5) 38 | 39 | conv6 = Conv2D(256, 3, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal', name = 'conv6-1')(pool5) 40 | conv6 = Conv2D(256, 3, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal', name = 'conv6-2')(conv6) 41 | drop6 = Dropout(0.5)(conv6) 42 | 43 | #conv7 = Conv2D(256, 3, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal', name = 'conv7-1')(pool6) 44 | #conv7 = Conv2D(256, 3, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal', name = 'conv7-2')(conv7) 45 | #drop7 = Dropout(0.5)(conv7) 46 | 47 | #////////////////////////////////////////////////////////// 48 | 49 | up6 = Conv2D(512, 2, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal', name = 'conv6u-0')(UpSampling2D(size = (2,2))(conv6)) 50 | #merge6 = concatenate([drop4,up6], axis = 3) 51 | conv6u = Conv2D(512, 3, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal', name = 'conv6u-1')(up6) 52 | conv6u = Conv2D(512, 3, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal', name = 'conv6u-2')(conv6u) 53 | 54 | up7 = Conv2D(256, 2, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal', name = 'conv7u-0')(UpSampling2D(size = (2,2))(conv6u)) 55 | #merge7 = concatenate([conv3,up7], axis = 3) 56 | conv7u = Conv2D(256, 3, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal', name = 'conv7u-1')(up7) 57 | conv7u = Conv2D(256, 3, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal', name = 'conv7u-2')(conv7u) 58 | 59 | up8 = Conv2D(128, 2, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal', name = 'conv8u-0')(UpSampling2D(size = (2,2))(conv7u)) 60 | #merge8 = concatenate([conv2,up8], axis = 3) 61 | conv8u = Conv2D(128, 3, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal', name = 'conv8u-1')(up8) 62 | conv8u = Conv2D(128, 3, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal', name = 'conv8u-2')(conv8u) 63 | 64 | up9 = Conv2D(64, 2, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal', name = 'conv9u-0')(UpSampling2D(size = (2,2))(conv8u)) 65 | #merge9 = concatenate([conv1,up9], axis = 3) 66 | conv9u = Conv2D(64, 3, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal', name = 'conv9u-1')(up9) 67 | conv9u = Conv2D(64, 3, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal', name = 'conv9u-2')(conv9u) 68 | 69 | 70 | up10 = Conv2D(64, 2, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal', name = 'conv10u-0')(UpSampling2D(size = (2,2))(conv9u)) 71 | #merge9 = concatenate([conv1,up9], axis = 3) 72 | conv10u = Conv2D(64, 3, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal', name = 'conv10u-1')(up10) 73 | conv10u = Conv2D(64, 3, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal', name = 'conv10u-2')(conv10u) 74 | conv10u = Conv2D(3, 3, activation = 'sigmoid', padding = 'same', kernel_initializer = 'he_normal', name = 'conv10u-3')(conv10u) 75 | 76 | model = Model(inputs, conv10u) 77 | 78 | #model.compile(optimizer = Adam(lr = 1e-4), loss = 'binary_crossentropy', metrics = ['accuracy']) 79 | 80 | if(pretrained_weights): 81 | model.load_weights(pretrained_weights) 82 | 83 | return model 84 | 85 | def model_U_VGG(): 86 | #input_shape = (720, 1280, 3) 87 | #input_shape = (512,512,3) 88 | input_shape = (None,None,3) 89 | inputs = Input(shape=input_shape, name='input') 90 | 91 | 92 | # Block 1 93 | x0 = layers.Conv2D(64, (3, 3), 94 | activation='relu', 95 | padding='same', 96 | name='block1_conv1')(inputs) 97 | x0 = layers.Conv2D(64, (3, 3), 98 | activation='relu', 99 | padding='same', 100 | name='block1_conv2')(x0) 101 | x0 = layers.MaxPooling2D((2, 2), strides=(2, 2), name='block1_pool')(x0) 102 | 103 | # Block 2 104 | x1 = layers.Conv2D(128, (3, 3), 105 | activation='relu', 106 | padding='same', 107 | name='block2_conv1')(x0) 108 | x1 = layers.Conv2D(128, (3, 3), 109 | activation='relu', 110 | padding='same', 111 | name='block2_conv2')(x1) 112 | x1 = layers.MaxPooling2D((2, 2), strides=(2, 2), name='block2_pool')(x1) 113 | 114 | # Block 3 115 | x2 = layers.Conv2D(256, (3, 3), 116 | activation='relu', 117 | padding='same', 118 | name='block3_conv1')(x1) 119 | x2 = layers.Conv2D(256, (3, 3), 120 | activation='relu', 121 | padding='same', 122 | name='block3_conv2')(x2) 123 | x2_take = layers.Conv2D(256, (3, 3), 124 | activation='relu', 125 | padding='same', 126 | name='block3_conv3')(x2) 127 | x2 = layers.MaxPooling2D((2, 2), strides=(2, 2), name='block3_pool')(x2_take) 128 | 129 | # Block 4 130 | x3 = layers.Conv2D(512, (3, 3), 131 | activation='relu', 132 | padding='same', 133 | name='block4_conv1')(x2) 134 | x3 = layers.Conv2D(512, (3, 3), 135 | activation='relu', 136 | padding='same', 137 | name='block4_conv2')(x3) 138 | x3_take = layers.Conv2D(512, (3, 3), 139 | activation='relu', 140 | padding='same', 141 | name='block4_conv3')(x3) 142 | x3 = layers.MaxPooling2D((2, 2), strides=(2, 2), name='block4_pool')(x3_take) 143 | 144 | # Block 5 145 | x4 = layers.Conv2D(512, (3, 3), 146 | activation='relu', 147 | padding='same', 148 | name='block5_conv1')(x3) 149 | x4 = layers.Conv2D(512, (3, 3), 150 | activation='relu', 151 | padding='same', 152 | name='block5_conv2')(x4) 153 | x4_take = layers.Conv2D(512, (3, 3), 154 | activation='relu', 155 | padding='same', 156 | name='block5_conv3')(x4) 157 | x4 = layers.MaxPooling2D((2, 2), strides=(2, 2), name='block5_pool')(x4_take) 158 | 159 | #f1 = UpSampling2D((2,2))(x4) 160 | #if TASK_4: 161 | # f1 = ZeroPadding2D(padding=((1,0), (0,0)), name = 'f1')(f1) 162 | f1 = x4_take 163 | f2 = x3 164 | h1 = Concatenate()([f2, f1]) 165 | h1 = layers.Conv2D(128, (1, 1), 166 | activation='relu', 167 | padding='same', 168 | name='up1_1')(h1) 169 | 170 | h1 = layers.Conv2D(128, (3, 3), 171 | activation='relu', 172 | padding='same', 173 | name='up1_2')(h1) 174 | 175 | 176 | h2 = Concatenate()([x2, UpSampling2D((2,2))(h1)]) 177 | h2 = layers.Conv2D(64, (1,1), 178 | activation = 'relu', 179 | padding = 'same', 180 | name = 'up2_1')(h2) 181 | h2 = layers.Conv2D(64, (3,3), 182 | activation = 'relu', 183 | padding = 'same', 184 | name = 'up2_2')(h2) 185 | 186 | h3 = Concatenate()([x1, UpSampling2D((2,2))(h2)]) 187 | h3 = layers.Conv2D(32, (1,1), 188 | activation = 'relu', 189 | padding = 'same', 190 | name = 'up3_1')(h3) 191 | h3 = layers.Conv2D(32, (3,3), 192 | activation = 'relu', 193 | padding = 'same', 194 | name = 'up3_2')(h3) 195 | 196 | h4 = Concatenate()([x0, UpSampling2D((2,2))(h3)]) 197 | h4 = layers.Conv2D(32, (1,1), 198 | activation = 'relu', 199 | padding = 'same', 200 | name = 'up4_1')(h4) 201 | h4 = layers.Conv2D(32, (3,3), 202 | activation = 'relu', 203 | padding = 'same', 204 | name = 'up4_2')(h4) 205 | 206 | h5 = Concatenate()([inputs, UpSampling2D((2,2))(h4)]) 207 | h5 = layers.Conv2D(16, (1,1), 208 | activation = 'relu', 209 | padding = 'same', 210 | name = 'up5_1')(h5) 211 | ################## output for TEXT/NON-TEXT ############ 212 | 213 | o1 = layers.Conv2D(3, (3,3), 214 | activation = 'softmax', 215 | padding = 'same', 216 | name = 'up5_2')(h5) 217 | 218 | ################ Regression ########################### 219 | b1 = Concatenate(name = 'agg_feat-1')([x4_take, h1]) # block_conv3, up1_2 # 32,32,630 220 | b1 = layers.Conv2DTranspose(128, (3,3), strides=(2, 2), padding='same', 221 | activation = 'relu', name = 'agg_feat-2')(b1) # 64,64,128 222 | 223 | #------ xy regression ------- 224 | o2 = layers.Conv2DTranspose(64, (3,3), strides = (2,2),padding = 'same', 225 | activation = 'relu', name = 'regress-1-1')(b1) # 128,128, 32 226 | o2 = layers.Conv2DTranspose(32, (3,3), strides = (1,1),padding = 'same', 227 | activation = 'relu', name = 'regress-1-2')(o2) # 128,128, 32 228 | o2 = layers.Conv2DTranspose(16, (3,3),strides = (2,2), padding = 'same', 229 | activation = 'relu',name = 'regress-1-3')(o2) # 256,256, 8 230 | o2 = layers.Conv2DTranspose(8, (3,3),strides = (1,1), padding = 'same', 231 | activation = 'relu',name = 'regress-1-4')(o2) # 256,256, 8 232 | o2 = layers.Conv2DTranspose(4, (3,3),strides = (2,2), padding = 'same', 233 | activation = 'relu', name = 'regress-1-5')(o2) # 512,512, 2 234 | o2 = layers.Conv2DTranspose(2, (3,3),strides = (1,1), padding = 'same', 235 | activation = 'tanh', name = 'regress-1-6')(o2) # 512,512, 2 236 | 237 | #------ wh regression ------- 238 | o4 = layers.Conv2DTranspose(64, (3,3), strides = (2,2),padding = 'same', 239 | activation = 'relu',name = 'regress-3-1')(b1) # 128,128, 32 240 | o4 = layers.Conv2DTranspose(32, (3,3), strides = (1,1),padding = 'same', 241 | activation = 'relu',name = 'regress-3-2')(o4) # 128,128, 32 242 | o4 = layers.Conv2DTranspose(16, (3,3),strides = (2,2), padding = 'same', 243 | activation = 'relu', name = 'regress-3-3')(o4) # 256,256, 8 244 | o4 = layers.Conv2DTranspose(8, (3,3),strides = (1,1), padding = 'same', 245 | activation = 'relu', name = 'regress-3-4')(o4) # 256,256, 8 246 | o4 = layers.Conv2DTranspose(4, (3,3),strides = (2,2), padding = 'same', 247 | activation = 'relu', name = 'regress-3-5')(o4) # 256,256, 8 248 | o4 = layers.Conv2DTranspose(2, (3,3),strides = (1,1), padding = 'same', 249 | activation = 'sigmoid',name = 'regress-3-6')(o4) # 512,512, 2 250 | 251 | # ------ sin/cos regression ------- 252 | b2 = Concatenate()([x3_take, b1]) # block4_conv3, agg_feat-2 # 64,64,630 253 | b2 = layers.Conv2DTranspose(128, (3,3), strides=(2, 2), padding='same', 254 | activation = 'relu', name = 'regress-2-1')(b2) # 128, 128, 128 255 | o3 = Concatenate()([x2_take, b2 ]) # block3_conv3, agg_feat-3 # 128, 128, (256+128) 256 | o3 = layers.Conv2DTranspose(32, (3,3),strides = (2,2),padding = 'same', 257 | activation = 'relu', name = 'regress-2-2')(o3) # 256,256, 32 258 | o3 = layers.Conv2DTranspose(2, (3,3),strides = (2,2),padding = 'same', 259 | activation = 'tanh', name = 'regress-2-3')(o3) # 512,512,2 260 | 261 | 262 | model = Model(inputs, [o1,o2,o3,o4], name = 'U-VGG-model') 263 | 264 | return model 265 | 266 | 267 | 268 | def model_U_VGG_Centerline(): 269 | #input_shape = (720, 1280, 3) 270 | #input_shape = (512,512,3) 271 | input_shape = (None,None,3) 272 | inputs = Input(shape=input_shape, name='input') 273 | 274 | 275 | # Block 1 276 | x0 = layers.Conv2D(64, (3, 3), 277 | activation='relu', 278 | padding='same', 279 | name='block1_conv1')(inputs) 280 | x0 = layers.Conv2D(64, (3, 3), 281 | activation='relu', 282 | padding='same', 283 | name='block1_conv2')(x0) 284 | x0 = layers.MaxPooling2D((2, 2), strides=(2, 2), name='block1_pool')(x0) 285 | 286 | # Block 2 287 | x1 = layers.Conv2D(128, (3, 3), 288 | activation='relu', 289 | padding='same', 290 | name='block2_conv1')(x0) 291 | x1 = layers.Conv2D(128, (3, 3), 292 | activation='relu', 293 | padding='same', 294 | name='block2_conv2')(x1) 295 | x1 = layers.MaxPooling2D((2, 2), strides=(2, 2), name='block2_pool')(x1) 296 | 297 | # Block 3 298 | x2 = layers.Conv2D(256, (3, 3), 299 | activation='relu', 300 | padding='same', 301 | name='block3_conv1')(x1) 302 | x2 = layers.Conv2D(256, (3, 3), 303 | activation='relu', 304 | padding='same', 305 | name='block3_conv2')(x2) 306 | x2_take = layers.Conv2D(256, (3, 3), 307 | activation='relu', 308 | padding='same', 309 | name='block3_conv3')(x2) 310 | x2 = layers.MaxPooling2D((2, 2), strides=(2, 2), name='block3_pool')(x2_take) 311 | 312 | # Block 4 313 | x3 = layers.Conv2D(512, (3, 3), 314 | activation='relu', 315 | padding='same', 316 | name='block4_conv1')(x2) 317 | x3 = layers.Conv2D(512, (3, 3), 318 | activation='relu', 319 | padding='same', 320 | name='block4_conv2')(x3) 321 | x3_take = layers.Conv2D(512, (3, 3), 322 | activation='relu', 323 | padding='same', 324 | name='block4_conv3')(x3) 325 | x3 = layers.MaxPooling2D((2, 2), strides=(2, 2), name='block4_pool')(x3_take) 326 | 327 | # Block 5 328 | x4 = layers.Conv2D(512, (3, 3), 329 | activation='relu', 330 | padding='same', 331 | name='block5_conv1')(x3) 332 | x4 = layers.Conv2D(512, (3, 3), 333 | activation='relu', 334 | padding='same', 335 | name='block5_conv2')(x4) 336 | x4_take = layers.Conv2D(512, (3, 3), 337 | activation='relu', 338 | padding='same', 339 | name='block5_conv3')(x4) 340 | x4 = layers.MaxPooling2D((2, 2), strides=(2, 2), name='block5_pool')(x4_take) 341 | 342 | #f1 = UpSampling2D((2,2))(x4) 343 | #if TASK_4: 344 | # f1 = ZeroPadding2D(padding=((1,0), (0,0)), name = 'f1')(f1) 345 | f1 = x4_take 346 | f2 = x3 347 | h1 = Concatenate()([f2, f1]) 348 | h1 = layers.Conv2D(128, (1, 1), 349 | activation='relu', 350 | padding='same', 351 | name='up1_1')(h1) 352 | 353 | h1 = layers.Conv2D(128, (3, 3), 354 | activation='relu', 355 | padding='same', 356 | name='up1_2')(h1) 357 | 358 | 359 | h2 = Concatenate()([x2, UpSampling2D((2,2))(h1)]) 360 | h2 = layers.Conv2D(64, (1,1), 361 | activation = 'relu', 362 | padding = 'same', 363 | name = 'up2_1')(h2) 364 | h2 = layers.Conv2D(64, (3,3), 365 | activation = 'relu', 366 | padding = 'same', 367 | name = 'up2_2')(h2) 368 | 369 | h3 = Concatenate()([x1, UpSampling2D((2,2))(h2)]) 370 | h3 = layers.Conv2D(32, (1,1), 371 | activation = 'relu', 372 | padding = 'same', 373 | name = 'up3_1')(h3) 374 | h3 = layers.Conv2D(32, (3,3), 375 | activation = 'relu', 376 | padding = 'same', 377 | name = 'up3_2')(h3) 378 | 379 | h4_take = Concatenate()([x0, UpSampling2D((2,2))(h3)]) 380 | 381 | h4 = layers.Conv2D(32, (1,1), 382 | activation = 'relu', 383 | padding = 'same', 384 | name = 'up4_1')(h4_take) 385 | h4 = layers.Conv2D(32, (3,3), 386 | activation = 'relu', 387 | padding = 'same', 388 | name = 'up4_2')(h4) 389 | 390 | h5 = Concatenate()([inputs, UpSampling2D((2,2))(h4)]) 391 | h5 = layers.Conv2D(16, (1,1), 392 | activation = 'relu', 393 | padding = 'same', 394 | name = 'up5_1')(h5) 395 | ################## output for TEXT/NON-TEXT ############ 396 | 397 | o1 = layers.Conv2D(3, (3,3), 398 | activation = 'softmax', 399 | padding = 'same', 400 | name = 'up5_2')(h5) 401 | ################## output for centerline /other ########### 402 | h41 = layers.Conv2D(32, (1,1), 403 | activation = 'relu', 404 | padding = 'same', 405 | name = 'up41_1')(h4_take) 406 | h41 = layers.Conv2D(32, (3,3), 407 | activation = 'relu', 408 | padding = 'same', 409 | name = 'up41_2')(h41) 410 | 411 | h51 = Concatenate()([inputs, UpSampling2D((2,2))(h41)]) 412 | h51 = layers.Conv2D(16, (1,1), 413 | activation = 'relu', 414 | padding = 'same', 415 | name = 'up51_1')(h51) 416 | 417 | o11 = layers.Conv2D(2, (3,3), 418 | activation = 'softmax', 419 | padding = 'same', 420 | name = 'up51_2')(h51) 421 | 422 | ################ Regression ########################### 423 | b1 = Concatenate(name = 'agg_feat-1')([x4_take, h1]) # block_conv3, up1_2 # 32,32,630 424 | b1 = layers.Conv2DTranspose(128, (3,3), strides=(2, 2), padding='same', 425 | activation = 'relu', name = 'agg_feat-2')(b1) # 64,64,128 426 | 427 | #------ xy regression ------- 428 | o2 = layers.Conv2DTranspose(64, (3,3), strides = (2,2),padding = 'same', 429 | activation = 'relu', name = 'regress-1-1')(b1) # 128,128, 32 430 | o2 = layers.Conv2DTranspose(32, (3,3), strides = (1,1),padding = 'same', 431 | activation = 'relu', name = 'regress-1-2')(o2) # 128,128, 32 432 | o2 = layers.Conv2DTranspose(16, (3,3),strides = (2,2), padding = 'same', 433 | activation = 'relu',name = 'regress-1-3')(o2) # 256,256, 8 434 | o2 = layers.Conv2DTranspose(8, (3,3),strides = (1,1), padding = 'same', 435 | activation = 'relu',name = 'regress-1-4')(o2) # 256,256, 8 436 | o2 = layers.Conv2DTranspose(4, (3,3),strides = (2,2), padding = 'same', 437 | activation = 'relu', name = 'regress-1-5')(o2) # 512,512, 2 438 | o2 = layers.Conv2DTranspose(2, (3,3),strides = (1,1), padding = 'same', 439 | activation = 'tanh', name = 'regress-1-6')(o2) # 512,512, 2 440 | 441 | #------ wh regression ------- 442 | o4 = layers.Conv2DTranspose(64, (3,3), strides = (2,2),padding = 'same', 443 | activation = 'relu',name = 'regress-3-1')(b1) # 128,128, 32 444 | o4 = layers.Conv2DTranspose(32, (3,3), strides = (1,1),padding = 'same', 445 | activation = 'relu',name = 'regress-3-2')(o4) # 128,128, 32 446 | o4 = layers.Conv2DTranspose(16, (3,3),strides = (2,2), padding = 'same', 447 | activation = 'relu', name = 'regress-3-3')(o4) # 256,256, 8 448 | o4 = layers.Conv2DTranspose(8, (3,3),strides = (1,1), padding = 'same', 449 | activation = 'relu', name = 'regress-3-4')(o4) # 256,256, 8 450 | o4 = layers.Conv2DTranspose(4, (3,3),strides = (2,2), padding = 'same', 451 | activation = 'relu', name = 'regress-3-5')(o4) # 256,256, 8 452 | o4 = layers.Conv2DTranspose(2, (3,3),strides = (1,1), padding = 'same', 453 | activation = 'sigmoid',name = 'regress-3-6')(o4) # 512,512, 2 454 | 455 | # ------ sin/cos regression ------- 456 | b2 = Concatenate()([x3_take, b1]) # block4_conv3, agg_feat-2 # 64,64,630 457 | b2 = layers.Conv2DTranspose(128, (3,3), strides=(2, 2), padding='same', 458 | activation = 'relu', name = 'regress-2-1')(b2) # 128, 128, 128 459 | o3 = Concatenate()([x2_take, b2 ]) # block3_conv3, agg_feat-3 # 128, 128, (256+128) 460 | o3 = layers.Conv2DTranspose(32, (3,3),strides = (2,2),padding = 'same', 461 | activation = 'relu', name = 'regress-2-2')(o3) # 256,256, 32 462 | o3 = layers.Conv2DTranspose(2, (3,3),strides = (2,2),padding = 'same', 463 | activation = 'tanh', name = 'regress-2-3')(o3) # 512,512,2 464 | 465 | 466 | 467 | #o1: t/nt, o11:centerline, o2:x,y, o3:sin,cos, o4:bounding box width,height 468 | model = Model(inputs, [o1,o11, o2,o3,o4], name = 'U-VGG-model') 469 | 470 | 471 | return model 472 | 473 | 474 | def model_U_VGG_Centerline_Localheight(): 475 | # input_shape = (720, 1280, 3) 476 | # input_shape = (512,512,3) 477 | input_shape = (None, None, 3) 478 | inputs = Input(shape=input_shape, name='input') 479 | 480 | # Block 1 481 | x0 = layers.Conv2D(64, (3, 3), 482 | activation='relu', 483 | padding='same', 484 | name='block1_conv1')(inputs) 485 | x0 = layers.Conv2D(64, (3, 3), 486 | activation='relu', 487 | padding='same', 488 | name='block1_conv2')(x0) 489 | x0 = layers.MaxPooling2D((2, 2), strides=(2, 2), name='block1_pool')(x0) 490 | 491 | # Block 2 492 | x1 = layers.Conv2D(128, (3, 3), 493 | activation='relu', 494 | padding='same', 495 | name='block2_conv1')(x0) 496 | x1 = layers.Conv2D(128, (3, 3), 497 | activation='relu', 498 | padding='same', 499 | name='block2_conv2')(x1) 500 | x1 = layers.MaxPooling2D((2, 2), strides=(2, 2), name='block2_pool')(x1) 501 | 502 | # Block 3 503 | x2 = layers.Conv2D(256, (3, 3), 504 | activation='relu', 505 | padding='same', 506 | name='block3_conv1')(x1) 507 | x2 = layers.Conv2D(256, (3, 3), 508 | activation='relu', 509 | padding='same', 510 | name='block3_conv2')(x2) 511 | x2_take = layers.Conv2D(256, (3, 3), 512 | activation='relu', 513 | padding='same', 514 | name='block3_conv3')(x2) 515 | x2 = layers.MaxPooling2D((2, 2), strides=(2, 2), name='block3_pool')(x2_take) 516 | 517 | # Block 4 518 | x3 = layers.Conv2D(512, (3, 3), 519 | activation='relu', 520 | padding='same', 521 | name='block4_conv1')(x2) 522 | x3 = layers.Conv2D(512, (3, 3), 523 | activation='relu', 524 | padding='same', 525 | name='block4_conv2')(x3) 526 | x3_take = layers.Conv2D(512, (3, 3), 527 | activation='relu', 528 | padding='same', 529 | name='block4_conv3')(x3) 530 | x3 = layers.MaxPooling2D((2, 2), strides=(2, 2), name='block4_pool')(x3_take) 531 | 532 | # Block 5 533 | x4 = layers.Conv2D(512, (3, 3), 534 | activation='relu', 535 | padding='same', 536 | name='block5_conv1')(x3) 537 | x4 = layers.Conv2D(512, (3, 3), 538 | activation='relu', 539 | padding='same', 540 | name='block5_conv2')(x4) 541 | x4_take = layers.Conv2D(512, (3, 3), 542 | activation='relu', 543 | padding='same', 544 | name='block5_conv3')(x4) 545 | x4 = layers.MaxPooling2D((2, 2), strides=(2, 2), name='block5_pool')(x4_take) 546 | 547 | # f1 = UpSampling2D((2,2))(x4) 548 | # if TASK_4: 549 | # f1 = ZeroPadding2D(padding=((1,0), (0,0)), name = 'f1')(f1) 550 | f1 = x4_take 551 | f2 = x3 552 | h1 = Concatenate()([f2, f1]) 553 | h1 = layers.Conv2D(128, (1, 1), 554 | activation='relu', 555 | padding='same', 556 | name='up1_1')(h1) 557 | 558 | h1 = layers.Conv2D(128, (3, 3), 559 | activation='relu', 560 | padding='same', 561 | name='up1_2')(h1) 562 | 563 | h2 = Concatenate()([x2, UpSampling2D((2, 2))(h1)]) 564 | h2 = layers.Conv2D(64, (1, 1), 565 | activation='relu', 566 | padding='same', 567 | name='up2_1')(h2) 568 | h2 = layers.Conv2D(64, (3, 3), 569 | activation='relu', 570 | padding='same', 571 | name='up2_2')(h2) 572 | 573 | h3 = Concatenate()([x1, UpSampling2D((2, 2))(h2)]) 574 | h3 = layers.Conv2D(32, (1, 1), 575 | activation='relu', 576 | padding='same', 577 | name='up3_1')(h3) 578 | h3 = layers.Conv2D(32, (3, 3), 579 | activation='relu', 580 | padding='same', 581 | name='up3_2')(h3) 582 | 583 | h4_take = Concatenate()([x0, UpSampling2D((2, 2))(h3)]) 584 | 585 | h4 = layers.Conv2D(32, (1, 1), 586 | activation='relu', 587 | padding='same', 588 | name='up4_1')(h4_take) 589 | h4 = layers.Conv2D(32, (3, 3), 590 | activation='relu', 591 | padding='same', 592 | name='up4_2')(h4) 593 | 594 | h5 = Concatenate()([inputs, UpSampling2D((2, 2))(h4)]) 595 | h5 = layers.Conv2D(16, (1, 1), 596 | activation='relu', 597 | padding='same', 598 | name='up5_1')(h5) 599 | ################## output for TEXT/NON-TEXT ############ 600 | 601 | o1 = layers.Conv2D(3, (3, 3), 602 | activation='softmax', 603 | padding='same', 604 | name='up5_2')(h5) 605 | ################## output for centerline /other ########### 606 | h41 = layers.Conv2D(32, (1, 1), 607 | activation='relu', 608 | padding='same', 609 | name='up41_1')(h4_take) 610 | h41 = layers.Conv2D(32, (3, 3), 611 | activation='relu', 612 | padding='same', 613 | name='up41_2')(h41) 614 | 615 | h51 = Concatenate()([inputs, UpSampling2D((2, 2))(h41)]) 616 | h51 = layers.Conv2D(16, (1, 1), 617 | activation='relu', 618 | padding='same', 619 | name='up51_1')(h51) 620 | 621 | o11 = layers.Conv2D(2, (3, 3), 622 | activation='softmax', 623 | padding='same', 624 | name='up51_2')(h51) 625 | 626 | ################ Regression ########################### 627 | b1 = Concatenate(name='agg_feat-1')([x4_take, h1]) # block_conv3, up1_2 # 32,32,630 628 | b1 = layers.Conv2DTranspose(128, (3, 3), strides=(2, 2), padding='same', 629 | activation='relu', name='agg_feat-2')(b1) # 64,64,128 630 | 631 | # ------ xy regression ------- 632 | o2 = layers.Conv2DTranspose(64, (3, 3), strides=(2, 2), padding='same', 633 | activation='relu', name='regress-1-1')(b1) # 128,128, 32 634 | o2 = layers.Conv2DTranspose(32, (3, 3), strides=(1, 1), padding='same', 635 | activation='relu', name='regress-1-2')(o2) # 128,128, 32 636 | o2 = layers.Conv2DTranspose(16, (3, 3), strides=(2, 2), padding='same', 637 | activation='relu', name='regress-1-3')(o2) # 256,256, 8 638 | o2 = layers.Conv2DTranspose(8, (3, 3), strides=(1, 1), padding='same', 639 | activation='relu', name='regress-1-4')(o2) # 256,256, 8 640 | o2 = layers.Conv2DTranspose(4, (3, 3), strides=(2, 2), padding='same', 641 | activation='relu', name='regress-1-5')(o2) # 512,512, 2 642 | o2 = layers.Conv2DTranspose(2, (3, 3), strides=(1, 1), padding='same', 643 | activation='tanh', name='regress-1-6')(o2) # 512,512, 2 644 | 645 | # ------ wh regression ------- 646 | o4 = layers.Conv2DTranspose(64, (3, 3), strides=(2, 2), padding='same', 647 | activation='relu', name='regress-3-1')(b1) # 128,128, 32 648 | o4 = layers.Conv2DTranspose(32, (3, 3), strides=(1, 1), padding='same', 649 | activation='relu', name='regress-3-2')(o4) # 128,128, 32 650 | o4 = layers.Conv2DTranspose(16, (3, 3), strides=(2, 2), padding='same', 651 | activation='relu', name='regress-3-3')(o4) # 256,256, 8 652 | o4 = layers.Conv2DTranspose(8, (3, 3), strides=(1, 1), padding='same', 653 | activation='relu', name='regress-3-4')(o4) # 256,256, 8 654 | o4 = layers.Conv2DTranspose(4, (3, 3), strides=(2, 2), padding='same', 655 | activation='relu', name='regress-3-5')(o4) # 256,256, 8 656 | o4 = layers.Conv2DTranspose(2, (3, 3), strides=(1, 1), padding='same', 657 | activation='sigmoid', name='regress-3-6')(o4) # 512,512, 2 658 | 659 | # ------ sin/cos regression ------- 660 | b2 = Concatenate()([x3_take, b1]) # block4_conv3, agg_feat-2 # 64,64,630 661 | b2 = layers.Conv2DTranspose(128, (3, 3), strides=(2, 2), padding='same', 662 | activation='relu', name='regress-2-1')(b2) # 128, 128, 128 663 | o3 = Concatenate()([x2_take, b2]) # block3_conv3, agg_feat-3 # 128, 128, (256+128) 664 | o3 = layers.Conv2DTranspose(32, (3, 3), strides=(2, 2), padding='same', 665 | activation='relu', name='regress-2-2')(o3) # 256,256, 32 666 | o3 = layers.Conv2DTranspose(2, (3, 3), strides=(2, 2), padding='same', 667 | activation='tanh', name='regress-2-3')(o3) # 512,512,2 668 | 669 | # ------ local height regression ------ 670 | o5 = layers.Conv2DTranspose(64, (3, 3), strides=(2, 2), padding='same', 671 | activation='relu', name='regress-4-1')(b1) # 128,128, 32 672 | o5 = layers.Conv2DTranspose(32, (3, 3), strides=(1, 1), padding='same', 673 | activation='relu', name='regress-4-2')(o5) # 128,128, 32 674 | o5 = layers.Conv2DTranspose(16, (3, 3), strides=(2, 2), padding='same', 675 | activation='relu', name='regress-4-3')(o5) # 256,256, 8 676 | o5 = layers.Conv2DTranspose(8, (3, 3), strides=(1, 1), padding='same', 677 | activation='relu', name='regress-4-4')(o5) # 256,256, 8 678 | o5 = layers.Conv2DTranspose(4, (3, 3), strides=(2, 2), padding='same', 679 | activation='relu', name='regress-4-5')(o5) # 256,256, 8 680 | o5 = layers.Conv2DTranspose(2, (3, 3), strides=(1, 1), padding='same', 681 | activation='relu', name='regress-4-6')(o5) # 512,512, 2 682 | o5 = layers.Conv2DTranspose(1, (3, 3), strides=(1, 1), padding='same', 683 | activation='relu', name='regress-4-7')(o5) # 512,512, 1 684 | 685 | # o1: t/nt, o11:centerline, o2:x,y, o3:sin,cos, o4:bounding box width,height, o5:localheight 686 | # model = Model(inputs, [o1,o11, o2,o3,o4], name = 'U-VGG-model') 687 | model = Model(inputs, [o1, o11, o5], name='U-VGG-model-Localheight') 688 | 689 | return model 690 | 691 | --------------------------------------------------------------------------------